This script takes the SAUP 2021 catch data, a list of fish oil/fish meal (FOFM) species, and B/Bmsy estimates to calculate a score for FOFM species per OHI region. The FIS data prep will need to be completed prior to this data prep.
No methods updates. Just updated fisheries and RAM data.
Reference: Pauly D., Zeller D., Palomares M.L.D. (Editors), 2020. Sea Around Us Concepts, Design and Data (seaaroundus.org).
Downloaded: August 24, 2021
Description: Tons per year and SAUP region with information on sector type, industry type, fishing entitity, reporting status and taxonomic information.
Time range: 1950 - 2018
Format: CSV
Additional Information: Methods
Reference: Froehlich, H.E., Jacobsen, N.S., Essington, T.E., Clavelle, T., and Halpern, B.S. (2018). Avoiding the ecological limits of forage fish for fed aquaculture. Nature Sustainability 1, 298.
Downloaded: July 7, 2020. Obtained from Melanie Frazier (NCEAS).
Description: List of FOFM species from Watson v3 data.
Native data resolution:
Format: CSV format
Reference: RAM Legacy Stock Assessment Database v4.495
Downloaded: 06/03/2021
Description: B/Bmsy value by stock and year (other data, which we do not use, are also available in the database)
Native data resolution: stock (fish stock, species and region specific)
Time range: 1950 - 2016
Format: CSV format
Additional Information: We use the finalized b/bmsy layer from OHI-global for this data prep. We do not actually read in the raw RAM data here.
::opts_chunk$set(eval=FALSE)
knitr
library(here)
library(tidyverse)
source(here('workflow/R/common.R'))
## This file makes it easier to process data for the OHI global assessment
## by creating the following objects:
##
## * dir_M = identifies correct file path to Mazu (internal server) based on your operating system
## * mollCRS = the crs code for the mollweide coordinate reference system we use in the global assessment
## * regions_shape() = function to load global shapefile for land/eez/high seas/antarctica regions
## * ohi_rasters() = function to load two rasters: global eez regions and ocean region
## * region_data() = function to load 2 dataframes describing global regions
## * rgn_syns() = function to load dataframe of region synonyms (used to convert country names to OHI regions)
## * low_pop() = function to load dataframe of regions with low and no human population
## * UNgeorgn = function to load dataframe of UN geopolitical designations used to gapfill missing data
setwd(here())
Steps to this
This forage fish list is based on the older Watson fisheries data. Basically, the code takes the list from Froehlich et al. (2018) and then cross references it with the SAUP taxa data and identifies some extra SAUP taxa that appeared to be fish oil/fish meal (FOFM) fish.
The following is a list from: Froehlich, H.E., Jacobsen, N.S., Essington, T.E., Clavelle, T., and Halpern, B.S. (2018). Avoiding the ecological limits of forage fish for fed aquaculture. Nature Sustainability 1, 298.
They identify 238 forage fish species which account for >99% of forage fish catches in 2012.
31 million tons per year of captured forage fish (since 1980).
Get the list of forage fish used for FOFM:
## Read in Froehlich list of forage fish species
<- read_csv(file.path(here(), "globalprep/np/v2021/raw/msleckman.61.1-CatchMSY_Nis_FAOAreas.csv"))
forage sort(unique(forage$Species)) #238 forage fish groups listed
<- read.csv(file.path(here(), "globalprep/fis/v2021/int/taxon_key_v2021.csv"))
saup_taxonkey
## Combined list:
<- data.frame(forage_fish = sort(unique(c(unique(forage$Species), saup_taxonkey$TaxonName[saup_taxonkey$forage_fish %in% 1]))))
foragefish_list
<- data.frame(forage_fish = sort(unique(forage$Species)))
foragefish_list
<- foragefish_list %>%
foragefish_list mutate(forage_fish = case_when(
== "Ammodytes spp" ~ "Ammodytes",
forage_fish == "Clupea pallasii" ~ "Clupea pallasii pallasii",
forage_fish == "Clupeoidei" ~ "Clupeoids",
forage_fish == "Decapterus spp" ~ "Decapterus",
forage_fish == "Diplodus argenteus" ~ "Diplodus argenteus argenteus",
forage_fish == "Etrumeus teres" ~ "Etrumeus sadina",
forage_fish == "Gadiculus argenteus" ~ "Gadiculus argenteus thori",
forage_fish == "Gymnocephalus cernuus" ~ "Gymnocephalus cernua",
forage_fish == "Mullus barbatus" ~ "Mullus barbatus barbatus",
forage_fish == "Patagonotothen brevicauda" ~ "Patagonotothen brevicauda brevicauda",
forage_fish == "Rioraja agassizi" ~ "Rioraja agassizii",
forage_fish == "Sardinella spp" ~ "Sardinella",
forage_fish == "Trachurus spp" ~ "Trachurus",
forage_fish TRUE ~ forage_fish
%>% # fix some name issues in the forage fish list to match the names in SAUP
)) left_join(saup_taxonkey, by = c("forage_fish" = "TaxonName")) %>%
mutate(inSAUP = ifelse(is.na(TaxonKey), NA, "yes")) ## Now only 9 missing spp.. this is good enough! Note: some of the species that are listed as in the SAUP data, are actually not in the SAUP data. This is because this forage fish list was originally derived from the Watson data, and there are some Watson species which are not in the SAUP data. This is not a problem.
write.csv(foragefish_list, here("globalprep/np/v2021/int/master_taxa_list_SAUP.csv"), row.names=FALSE)
<- file.path(dir_M,'git-annex/globalprep/fis/v2021/int/stock_catch_by_rgn_taxa.csv')
file
<- read.csv(file) %>%
catch rename(common = CommonName, fao_id = fao_rgn, species=TaxonName)
summary(catch)
## filter out non ohi eez regions if there are any
<- catch %>%
catch filter(!is.na(rgn_id)) %>%
filter(!is.na(fao_id)) %>%
filter(rgn_id <= 250) %>%
filter(rgn_id != 213)
## need to get TaxonKey's for each species to join with catch
<- read.csv(here("globalprep/np/v2021/int/master_taxa_list_SAUP.csv"))
foragefish_list
<- foragefish_list %>%
missing_spp filter(is.na(TaxonKey))
# test <- catch %>%
# filter(str_detect(paste(missing_spp$forage_fish, collapse = "|"), species)) # just make sure the NA species are actually not in the SAUP data
# sort(unique(test$species)) # no full matches..
<- foragefish_list %>%
forage_fish_taxa_list left_join(catch, by = c("forage_fish" = "species", "TaxonKey")) %>%
::select(forage_fish, inSAUP, TaxonKey) %>%
dplyrunique() %>%
mutate(inSAUP = ifelse(
is.na(TaxonKey), NA, inSAUP))
## save a list of species not in SAUP data for bookkeeping
<- forage_fish_taxa_list %>%
forage_fish_no_saup filter(is.na(TaxonKey))
write.csv(forage_fish_no_saup, file.path(here(), "globalprep/np/v2021/int/forage_fish_not_in_SAUP.csv"), row.names = FALSE)
## now join with catch data set
<- catch %>%
catch_fishfeed left_join(forage_fish_taxa_list, by = c("TaxonKey")) %>%
::filter(!is.na(forage_fish))
dplyr
write.csv(catch_fishfeed, file.path(here(), "globalprep/np/v2021/int/saup_catch_forage_fish.csv"), row.names = FALSE)
<- catch_fishfeed %>%
test group_by(year) %>%
summarise(sum = sum(tons)) ## looks good.. ~30 million per year
Justification from the Froelich et al. 2018: “Currently, it is estimated about 10% of forage fish enter the human diet directly, but the notoriously tiny-boned fish are labour intensive (thus expensive) to process for human consumption, are the foundation of several industries and thus jobs (creating inertia to change) and are not the preferred fish type for most people.”
<- read_csv(file.path(here(),"globalprep/np/v2021/int/saup_catch_forage_fish.csv"))
catch_fishfeed
<- catch_fishfeed %>%
catch_non_human mutate(tons_non_human = tons*0.9)
<- catch_non_human %>%
catch_non_human ::select(year, rgn_id, fao_id, stock_id, TaxonKey, tons_non_human) %>%
dplyrgroup_by(rgn_id, fao_id, TaxonKey, stock_id, year) %>%
summarize(catch_non_human = sum(tons_non_human)) %>%
ungroup()
<- catch_non_human %>%
test group_by(year) %>%
summarise(sum = sum(catch_non_human)) # looks good ~30 mil per year
## these data have no zero catch values, so add years with no reported catch to data table:
<- catch_non_human %>%
catch_zeros spread(year, catch_non_human) %>%
data.frame() %>%
gather("year", "catch_non_human", num_range("X", min(catch_non_human$year):max(catch_non_human$year))) %>%
mutate(year = as.numeric(gsub("X", "", year))) %>%
mutate(catch_non_human = ifelse(is.na(catch_non_human), 0, catch_non_human))
## this part eliminates the zero catch values prior to the first reported non-zero catch
<- catch_zeros %>%
catch_zeros group_by(fao_id, TaxonKey, stock_id, rgn_id) %>%
arrange(year) %>%
mutate(cum_catch = cumsum(catch_non_human)) %>%
filter(cum_catch > 0) %>%
::select(-cum_catch) %>%
dplyrungroup()
# Calculate mean catch for ohi regions (using data from 1980 onward). These data are used to weight the RAM b/bmsy values
<- catch_zeros %>%
mean_catch filter(year >= 1980) %>%
group_by(rgn_id, fao_id, TaxonKey, stock_id) %>%
mutate(mean_catch_non_human = mean(catch_non_human, na.rm=TRUE)) %>% # mean catch for each stock (in a specific ohi-fao region)
filter(mean_catch_non_human != 0) %>% ## some stocks have no reported catch for time period
ungroup()
options(scipen = 999) # to prevent taxonkey from turning into scientific notation
<- mean_catch %>%
mean_catch_FOFM mutate(stock_id_taxonkey = paste(stock_id, TaxonKey, sep="_")) %>%
::select(rgn_id, stock_id_taxonkey, year, catch_non_human) %>%
dplyrfilter(year >= 2001) %>% # filter to include only analysis years
data.frame() %>%
rename(catch = catch_non_human)
write.csv(mean_catch_FOFM, file.path(here(), "globalprep/np/v2021/int/mean_catch_FOFM.csv"), row.names = FALSE)
<- read_csv(file.path(here(), "globalprep/fis/v2021/output/fis_bbmsy.csv"))
fis_bbmsy
<- read.csv(file.path(here(), "globalprep/fis/v2021/output/fis_bbmsy_gf.csv"))
fis_bbmsy_gf
<- read_csv(file.path(here(), "globalprep/np/v2021/int/mean_catch_FOFM.csv"))
catch_FOFM
## ignore this... we deleted underharvest penalty in v2021
# The following stocks are fished in multiple regions and often have high b/bmsy values
# Due to the underfishing penalty, this actually penalizes the regions that have the highest
# proportion of catch of these stocks.
# high_bmsy_filter <- dplyr::filter(fis_bbmsy, bbmsy>1.5 & year == 2015) %>%
# dplyr::group_by(stock_id) %>%
# dplyr::summarise(n = dplyr::n()) %>%
# data.frame() %>%
# dplyr::filter(n>3)
#
# high_bmsy <- high_bmsy_filter$stock_id
# b <- fis_bbmsy %>%
# dplyr::mutate(bbmsy = ifelse(stock_id %in% high_bmsy &
# bbmsy > 1, 1, bbmsy))
<- fis_bbmsy %>%
b ::mutate(bbmsy = ifelse(bbmsy > 1 , 1, bbmsy))
dplyr
<- catch_FOFM %>%
c ::mutate(stock_id_taxonkey = as.character(stock_id_taxonkey)) %>%
dplyr::mutate(taxon_key = sub('.*_', '', stock_id_taxonkey)) %>%
dplyr::mutate(stock_id = sub('_[^_]*$', '', stock_id_taxonkey)) %>%
dplyr::mutate(catch = as.numeric(catch)) %>%
dplyr::mutate(year = as.numeric(as.character(year))) %>%
dplyr::mutate(region_id = as.numeric(as.character(rgn_id))) %>%
dplyr::mutate(taxon_key = as.numeric(as.character(taxon_key))) %>%
dplyr::select(region_id, year, stock_id, taxon_key, catch)
dplyr
<- b %>%
b ::mutate(bbmsy = as.numeric(bbmsy)) %>%
dplyr::mutate(region_id = as.numeric(as.character(rgn_id))) %>%
dplyr::mutate(year = as.numeric(as.character(year))) %>%
dplyr::mutate(stock_id = as.character(stock_id))
dplyr
####
# Calculate scores for Bbmsy values
####
# *************NOTE *****************************
# These values can be altered
# ***********************************************
<- 0.5
alpha <- 0.25
beta <- 0.95
lowerBuffer <- 1.05
upperBuffer
$score = ifelse(
b$bbmsy < lowerBuffer,
b$bbmsy,
bifelse (b$bbmsy >= lowerBuffer &
$bbmsy <= upperBuffer, 1, NA)
b
)$score = ifelse(!is.na(b$score),
b$score,
bifelse(
1 - alpha * (b$bbmsy - upperBuffer) > beta,
1 - alpha * (b$bbmsy - upperBuffer),
beta
))
####
# Merge the b/bmsy data with catch data
####
<- c %>%
data_fis ::left_join(b, by = c('region_id' = 'rgn_id', 'stock_id', 'year')) %>%
dplyr::select(rgn_id = region_id, stock_id, year, taxon_key, catch, bbmsy, score)
dplyr
###
# Estimate scores for taxa without b/bmsy values
# Mean score of other fish in the region is the starting point
# Then a penalty is applied based on the level the taxa are reported at
###
## this takes the mean score within each region and year
<- data_fis %>%
data_fis_gf ::group_by(rgn_id, year) %>%
dplyr::mutate(mean_score = mean(score, na.rm = TRUE)) %>%
dplyr::ungroup()
dplyr
## this takes the mean score across all regions within a year
# (when no stocks have scores within a region)
<- data_fis_gf %>%
data_fis_gf ::group_by(year) %>%
dplyr::mutate(mean_score_global = mean(score, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::mutate(mean_score = ifelse(is.na(mean_score), mean_score_global, mean_score)) %>%
dplyr::select(-mean_score_global)
dplyr
# *************NOTE *****************************
# In some cases, it may make sense to alter the
# penalty for not identifying fisheries catch data to
# species level.
# ***********************************************
<- data.frame(TaxonPenaltyCode = 1:6,
penaltyTable penalty = c(0.1, 0.25, 0.5, 0.8, 0.9, 1))
<- data_fis_gf %>%
data_fis_gf ::mutate(TaxonPenaltyCode = as.numeric(substring(taxon_key, 1, 1))) %>%
dplyr::left_join(penaltyTable, by = 'TaxonPenaltyCode') %>%
dplyr::mutate(score_gf = mean_score * penalty) %>%
dplyr::mutate(method = ifelse(is.na(score), "Mean gapfilled", NA)) %>%
dplyr::mutate(gapfilled = ifelse(is.na(score), 1, 0)) %>%
dplyr::mutate(score = ifelse(is.na(score), score_gf, score))
dplyr
<- data_fis_gf %>%
gap_fill_data ::select(rgn_id,
dplyr
stock_id,
taxon_key,
year,
catch,
score,
gapfilled,
method)
write.csv(gap_fill_data, here('globalprep/np/v2021/output/NP_bbmsy_summary_gf.csv'), row.names = FALSE)
<- data_fis_gf %>%
score_data ::select(rgn_id, stock_id, year, catch, score)
dplyr
###
# Calculate status for each region
###
## Take a catch weighted average of B/Bmsy scores for each region/year.
<- score_data %>%
score_data ::group_by(year, rgn_id) %>%
dplyr::mutate(SumCatch = sum(catch)) %>%
dplyr::ungroup()
dplyr
<- score_data %>%
score_data ::group_by(rgn_id, year) %>%
dplyr::summarize(score = weighted.mean(score, catch)) %>%
dplyr::ungroup()
dplyr
summary(score_data)
write.csv(score_data, here('globalprep/np/v2021/output/np_fofm_scores.csv'), row.names = FALSE)
<- read_csv(here('globalprep/np/v2020/output/np_fofm_scores.csv'))
old summary(old)
<- read_csv(here('globalprep/np/v2021/output/np_fofm_scores.csv'))
new
# old_prod_weights <- read_csv("../v2020/output/np_product_weights.csv")
# prod_weights <- read_csv("output/np_product_weights.csv")
<- new %>%
check rename("new_score" = "score") %>%
left_join(old, by = c("rgn_id", "year")) %>%
mutate(diff = new_score - score) %>%
filter(year == 2017)
plot(check$new_score, check$score)
abline(0,1, col="red")