## load libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Load FAO-specific user-defined functions
source('../../../src/R/common.R')
Mariculture production in tonnes.
mar <- read.csv('../../mar/v2018/output/MAR_FP_data.csv')
Fisheries data.
fis <- read.csv("../../fis/v2017/data/FP_fis_catch.csv") %>%
select(rgn_id, year, fis_t = fis_catch)
mar <- mar %>%
group_by(rgn_id, year) %>%
summarize(mar_t = sum(value, na.rm=TRUE)) %>%
select(rgn_id, year, mar_t) %>%
ungroup()
# this one is turning to NA in FP
filter(mar, rgn_id ==95) # ok, this makes sense
## # A tibble: 13 x 3
## rgn_id year mar_t
## <int> <int> <dbl>
## 1 95 2004 21.0
## 2 95 2005 2.00
## 3 95 2006 2.00
## 4 95 2007 2.00
## 5 95 2008 0
## 6 95 2009 0
## 7 95 2010 0
## 8 95 2011 0
## 9 95 2012 0
## 10 95 2013 0
## 11 95 2014 0
## 12 95 2015 0
## 13 95 2016 0
Weirdness with a few regions:
## look at a few regions
fis %>%
filter(rgn_id %in% c(3, 89, 4, 95, 105)) %>%
data.frame()
## rgn_id year fis_t
## 1 3 2005 1.141704e+03
## 2 3 2006 7.804059e+02
## 3 3 2007 7.873630e+02
## 4 3 2008 6.899306e+02
## 5 3 2009 6.092612e+02
## 6 3 2010 8.164415e+02
## 7 3 2011 6.798308e+02
## 8 3 2012 7.235042e+02
## 9 3 2013 6.722628e+02
## 10 3 2014 7.155040e+02
## 11 4 2005 2.665577e+02
## 12 4 2006 3.078826e+02
## 13 4 2007 3.552030e+02
## 14 4 2008 4.759524e+02
## 15 4 2009 4.225761e+02
## 16 4 2010 3.200856e+02
## 17 4 2011 3.403972e+02
## 18 4 2012 3.098069e+02
## 19 4 2013 2.753163e+02
## 20 4 2014 0.000000e+00
## 21 89 2005 5.144460e+04
## 22 89 2006 2.045820e+04
## 23 89 2007 2.836660e+04
## 24 89 2008 6.618369e+04
## 25 89 2009 5.459445e+03
## 26 89 2010 1.139715e+04
## 27 89 2011 6.240408e+04
## 28 89 2012 6.409555e+04
## 29 89 2013 3.842687e+04
## 30 89 2014 0.000000e+00
## 31 95 2005 2.645793e+05
## 32 95 2006 4.417701e+05
## 33 95 2007 6.655655e+05
## 34 95 2008 5.494411e+05
## 35 95 2009 3.090001e+05
## 36 95 2010 3.539112e+05
## 37 95 2011 3.792507e+05
## 38 95 2012 4.558968e+05
## 39 95 2013 3.526110e+05
## 40 95 2014 0.000000e+00
## 41 105 2005 1.713401e+01
## 42 105 2006 5.444830e+01
## 43 105 2007 3.929344e+01
## 44 105 2008 7.956632e+00
## 45 105 2009 9.470502e+01
## 46 105 2010 1.313912e+02
## 47 105 2011 1.312402e+02
## 48 105 2012 1.287774e+02
## 49 105 2013 1.026724e+02
## 50 105 2014 0.000000e+00
fis <- fis %>%
mutate(fis_t = ifelse(fis_t==0, NA, fis_t)) %>% # 8 NA values is correct
group_by(rgn_id) %>%
arrange(year) %>%
fill(fis_t)
# Check
fis %>%
filter(rgn_id %in% c(3, 89, 4, 95, 105)) %>%
data.frame()
## rgn_id year fis_t
## 1 3 2005 1.141704e+03
## 2 3 2006 7.804059e+02
## 3 3 2007 7.873630e+02
## 4 3 2008 6.899306e+02
## 5 3 2009 6.092612e+02
## 6 3 2010 8.164415e+02
## 7 3 2011 6.798308e+02
## 8 3 2012 7.235042e+02
## 9 3 2013 6.722628e+02
## 10 3 2014 7.155040e+02
## 11 4 2005 2.665577e+02
## 12 4 2006 3.078826e+02
## 13 4 2007 3.552030e+02
## 14 4 2008 4.759524e+02
## 15 4 2009 4.225761e+02
## 16 4 2010 3.200856e+02
## 17 4 2011 3.403972e+02
## 18 4 2012 3.098069e+02
## 19 4 2013 2.753163e+02
## 20 4 2014 2.753163e+02
## 21 89 2005 5.144460e+04
## 22 89 2006 2.045820e+04
## 23 89 2007 2.836660e+04
## 24 89 2008 6.618369e+04
## 25 89 2009 5.459445e+03
## 26 89 2010 1.139715e+04
## 27 89 2011 6.240408e+04
## 28 89 2012 6.409555e+04
## 29 89 2013 3.842687e+04
## 30 89 2014 3.842687e+04
## 31 95 2005 2.645793e+05
## 32 95 2006 4.417701e+05
## 33 95 2007 6.655655e+05
## 34 95 2008 5.494411e+05
## 35 95 2009 3.090001e+05
## 36 95 2010 3.539112e+05
## 37 95 2011 3.792507e+05
## 38 95 2012 4.558968e+05
## 39 95 2013 3.526110e+05
## 40 95 2014 3.526110e+05
## 41 105 2005 1.713401e+01
## 42 105 2006 5.444830e+01
## 43 105 2007 3.929344e+01
## 44 105 2008 7.956632e+00
## 45 105 2009 9.470502e+01
## 46 105 2010 1.313912e+02
## 47 105 2011 1.312402e+02
## 48 105 2012 1.287774e+02
## 49 105 2013 1.026724e+02
## 50 105 2014 1.026724e+02
Adjust years so they are equivalent.
adjust <- max(mar$year) - max(fis$year)
mar <- mar %>%
mutate(year = year - adjust)
tmp <- full_join(fis, mar, by=c('rgn_id', 'year'), all=TRUE)
## If NA, turn it into a 0 before weighting
tmp <- tmp %>%
mutate(fis_t = ifelse(is.na(fis_t), 0, fis_t)) %>%
mutate(mar_t = ifelse(is.na(mar_t), 0, mar_t)) %>%
mutate(w_fis = fis_t/(fis_t + mar_t)) %>%
mutate(w_fis = ifelse(mar_t==0 & fis_t == 0, NA, w_fis)) %>%
filter(year >= 2005) %>%
select(rgn_id, year, w_fis)
hist(tmp$w_fis)
Compare to previous year data (a big jump in fish data, so not super compatible, but should be correlated at least)
compare <- read.csv("../../fp/v2017/output/wildcaught_weight.csv") %>%
rename(w_fis_old = w_fis) %>%
left_join(tmp, by=c('rgn_id', 'year'))
plot(compare$w_fis_old, compare$w_fis)
abline(0, 1, col="red")
write.csv(tmp, 'output/wildcaught_weight.csv', row.names=FALSE)
## add gf file (no gapfilling)
tmp_gf <- tmp %>%
mutate(w_fis = 0) %>%
select(rgn_id, year, gapfilled=w_fis)
write.csv(tmp_gf, 'output/wildcaught_weight_gf.csv', row.names=FALSE)