ohi logo
OHI Science | Citation policy

1 Methods

## load libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

## Load FAO-specific user-defined functions
source('../../../src/R/common.R')

2 Import Output Data: Mariculture data

Mariculture production in tonnes.

mar <- read.csv('../../mar/v2018/output/MAR_FP_data.csv')

Fisheries data.

fis <- read.csv("../../fis/v2017/data/FP_fis_catch.csv") %>%
  select(rgn_id, year, fis_t = fis_catch)

3 Wrangle

3.1 Tidy MAR data

mar <- mar %>%
  group_by(rgn_id, year) %>%
  summarize(mar_t = sum(value, na.rm=TRUE)) %>%
  select(rgn_id, year, mar_t) %>%
  ungroup()

# this one is turning to NA in FP
filter(mar, rgn_id ==95) # ok, this makes sense
## # A tibble: 13 x 3
##    rgn_id  year mar_t
##     <int> <int> <dbl>
##  1     95  2004 21.0 
##  2     95  2005  2.00
##  3     95  2006  2.00
##  4     95  2007  2.00
##  5     95  2008  0   
##  6     95  2009  0   
##  7     95  2010  0   
##  8     95  2011  0   
##  9     95  2012  0   
## 10     95  2013  0   
## 11     95  2014  0   
## 12     95  2015  0   
## 13     95  2016  0

3.2 Tidy FIS data

Weirdness with a few regions:

  • Appears that some regions have 0 catch in 2014 catch data, despite having catch in previous years.
  • Going to gapfill these cases with previous years data
## look at a few regions
fis %>% 
  filter(rgn_id %in% c(3, 89, 4, 95, 105)) %>%
  data.frame()
##    rgn_id year        fis_t
## 1       3 2005 1.141704e+03
## 2       3 2006 7.804059e+02
## 3       3 2007 7.873630e+02
## 4       3 2008 6.899306e+02
## 5       3 2009 6.092612e+02
## 6       3 2010 8.164415e+02
## 7       3 2011 6.798308e+02
## 8       3 2012 7.235042e+02
## 9       3 2013 6.722628e+02
## 10      3 2014 7.155040e+02
## 11      4 2005 2.665577e+02
## 12      4 2006 3.078826e+02
## 13      4 2007 3.552030e+02
## 14      4 2008 4.759524e+02
## 15      4 2009 4.225761e+02
## 16      4 2010 3.200856e+02
## 17      4 2011 3.403972e+02
## 18      4 2012 3.098069e+02
## 19      4 2013 2.753163e+02
## 20      4 2014 0.000000e+00
## 21     89 2005 5.144460e+04
## 22     89 2006 2.045820e+04
## 23     89 2007 2.836660e+04
## 24     89 2008 6.618369e+04
## 25     89 2009 5.459445e+03
## 26     89 2010 1.139715e+04
## 27     89 2011 6.240408e+04
## 28     89 2012 6.409555e+04
## 29     89 2013 3.842687e+04
## 30     89 2014 0.000000e+00
## 31     95 2005 2.645793e+05
## 32     95 2006 4.417701e+05
## 33     95 2007 6.655655e+05
## 34     95 2008 5.494411e+05
## 35     95 2009 3.090001e+05
## 36     95 2010 3.539112e+05
## 37     95 2011 3.792507e+05
## 38     95 2012 4.558968e+05
## 39     95 2013 3.526110e+05
## 40     95 2014 0.000000e+00
## 41    105 2005 1.713401e+01
## 42    105 2006 5.444830e+01
## 43    105 2007 3.929344e+01
## 44    105 2008 7.956632e+00
## 45    105 2009 9.470502e+01
## 46    105 2010 1.313912e+02
## 47    105 2011 1.312402e+02
## 48    105 2012 1.287774e+02
## 49    105 2013 1.026724e+02
## 50    105 2014 0.000000e+00
fis <- fis %>%
  mutate(fis_t = ifelse(fis_t==0, NA, fis_t)) %>%  # 8 NA values is correct
  group_by(rgn_id) %>%
  arrange(year) %>%
  fill(fis_t)

# Check
fis %>% 
  filter(rgn_id %in% c(3, 89, 4, 95, 105)) %>% 
  data.frame() 
##    rgn_id year        fis_t
## 1       3 2005 1.141704e+03
## 2       3 2006 7.804059e+02
## 3       3 2007 7.873630e+02
## 4       3 2008 6.899306e+02
## 5       3 2009 6.092612e+02
## 6       3 2010 8.164415e+02
## 7       3 2011 6.798308e+02
## 8       3 2012 7.235042e+02
## 9       3 2013 6.722628e+02
## 10      3 2014 7.155040e+02
## 11      4 2005 2.665577e+02
## 12      4 2006 3.078826e+02
## 13      4 2007 3.552030e+02
## 14      4 2008 4.759524e+02
## 15      4 2009 4.225761e+02
## 16      4 2010 3.200856e+02
## 17      4 2011 3.403972e+02
## 18      4 2012 3.098069e+02
## 19      4 2013 2.753163e+02
## 20      4 2014 2.753163e+02
## 21     89 2005 5.144460e+04
## 22     89 2006 2.045820e+04
## 23     89 2007 2.836660e+04
## 24     89 2008 6.618369e+04
## 25     89 2009 5.459445e+03
## 26     89 2010 1.139715e+04
## 27     89 2011 6.240408e+04
## 28     89 2012 6.409555e+04
## 29     89 2013 3.842687e+04
## 30     89 2014 3.842687e+04
## 31     95 2005 2.645793e+05
## 32     95 2006 4.417701e+05
## 33     95 2007 6.655655e+05
## 34     95 2008 5.494411e+05
## 35     95 2009 3.090001e+05
## 36     95 2010 3.539112e+05
## 37     95 2011 3.792507e+05
## 38     95 2012 4.558968e+05
## 39     95 2013 3.526110e+05
## 40     95 2014 3.526110e+05
## 41    105 2005 1.713401e+01
## 42    105 2006 5.444830e+01
## 43    105 2007 3.929344e+01
## 44    105 2008 7.956632e+00
## 45    105 2009 9.470502e+01
## 46    105 2010 1.313912e+02
## 47    105 2011 1.312402e+02
## 48    105 2012 1.287774e+02
## 49    105 2013 1.026724e+02
## 50    105 2014 1.026724e+02

3.3 Combine MAR and FIS

Adjust years so they are equivalent.

adjust <- max(mar$year) - max(fis$year)

mar <- mar %>%
  mutate(year = year - adjust)

tmp <- full_join(fis, mar, by=c('rgn_id', 'year'), all=TRUE)

## If NA, turn it into a 0 before weighting
tmp <- tmp %>%
  mutate(fis_t = ifelse(is.na(fis_t), 0, fis_t)) %>%
  mutate(mar_t = ifelse(is.na(mar_t), 0, mar_t)) %>%
  mutate(w_fis = fis_t/(fis_t + mar_t)) %>%
  mutate(w_fis = ifelse(mar_t==0 & fis_t == 0, NA, w_fis)) %>%
  filter(year >= 2005) %>%
  select(rgn_id, year, w_fis) 

hist(tmp$w_fis)

4 Data check

Compare to previous year data (a big jump in fish data, so not super compatible, but should be correlated at least)

compare <- read.csv("../../fp/v2017/output/wildcaught_weight.csv") %>%
  rename(w_fis_old = w_fis) %>%
  left_join(tmp, by=c('rgn_id', 'year'))
plot(compare$w_fis_old, compare$w_fis)
abline(0, 1, col="red")

5 Save data

write.csv(tmp, 'output/wildcaught_weight.csv', row.names=FALSE)

## add gf file (no gapfilling)
tmp_gf <- tmp %>%
  mutate(w_fis = 0) %>%
  select(rgn_id, year, gapfilled=w_fis)

write.csv(tmp_gf, 'output/wildcaught_weight_gf.csv', row.names=FALSE)