[REFERENCE RMD FILE: http://ohi-science.org/ohiprep_v2021/globalprep/np/v2021/STEP1c_np_fishfeed_prep.html]
This script takes the SAUP 2023 catch data, a list of fish oil/fish meal (FOFM) species, and B/Bmsy estimates to calculate a score for FOFM species per OHI region. The FIS data prep will need to be completed prior to this data prep.
catch_zeros
zerofill method to use
pivot_wider()
and pivot_longer()
instead of
spread()
and gather()
rgn_id
fill for improved
utility of data visualization, flipped axes so positive change is more
easily and more intuitively visualized ()Reference: Pauly D., Zeller D., Palomares M.L.D. (Editors), 2020. Sea Around Us Concepts, Design and Data (seaaroundus.org).
Downloaded: September 27, 2022
Description: Tons per year and SAUP region with information on sector type, industry type, fishing entitity, reporting status and taxonomic information.
Time range: 1950 - 2019
Format: CSV
Additional Information: Methods
Reference: Froehlich, H.E., Jacobsen, N.S., Essington, T.E., Clavelle, T., and Halpern, B.S. (2018). Avoiding the ecological limits of forage fish for fed aquaculture. Nature Sustainability 1, 298.
Downloaded: July 7, 2020. Obtained from Melanie Frazier (NCEAS).
Description: List of FOFM species from Watson v3 data.
Native data resolution:
Format: CSV format
Reference: RAM Legacy Stock Assessment Database v4.65, 06/17/2024
Downloaded: August 7, 2024
Description: B/Bmsy value by stock and year (other data, which we do not use, are also available in the database)
Native data resolution: stock (fish stock, species and region specific)
Time range: 1800 - 2023
Format: CSV format
Additional Information: We use the finalized b/bmsy layer from OHI-global for this data prep. We do not actually read in the raw RAM data here.
knitr::opts_chunk$set(eval = FALSE)
# ======= Load packages ============
if (!require(librarian)) {
install.packages("librarian")
library(librarian)
}
librarian::shelf(
ohicore, #devtools::install_github('ohi-science/ohicore@dev') #if relevant: restart session after reinstalling
here,
tidyverse,
#zoo,
plotly,
tictoc,
RColorBrewer
)
# ======= Set directories ===========
# Update scenario year, set up programmatic scenario year updates
scen_year_number <- 2024
scenario_year <- as.character(scen_year_number)
#prev_scen_year <- as.character(scen_year_number - 1)
data_dir_year <- paste0("d", scenario_year)
#prev_data_dir_year <- paste0("d", prev_scen_year)
v_scen_year <- paste0("v", scenario_year)
current_np_dir <- here::here("globalprep", "np", v_scen_year)
scen_year_fis_dir <- here::here("globalprep", "fis", v_scen_year)
# Load FAO-specific user-defined functions
source(here::here("workflow", "R", "fao_fxn.R")) # function for cleaning FAO files
source(here::here("workflow", "R", "common.R")) # directory locations
## This file makes it easier to process data for the OHI global assessment
## by creating the following objects:
##
## * dir_M = identifies correct file path to Mazu (internal server) based on your operating system
## * mollCRS = the crs code for the mollweide coordinate reference system we use in the global assessment
## * regions_shape() = function to load global shapefile for land/eez/high seas/antarctica regions
## * ohi_rasters() = function to load two rasters: global eez regions and ocean region
## * region_data() = function to load 2 dataframes describing global regions
## * rgn_syns() = function to load dataframe of region synonyms (used to convert country names to OHI regions)
## * low_pop() = function to load dataframe of regions with low and no human population
## * UNgeorgn = function to load dataframe of UN geopolitical designations used to gapfill missing data
source(here::here(current_np_dir, "R", "np_fxn.R")) # function for handling FAO commodity data specific to NP
Steps to this
This forage fish list is based on the older Watson fisheries data. Basically, the code takes the list from Froehlich et al. (2018) and then cross references it with the SAUP taxa data and identifies some extra SAUP taxa that appeared to be fish oil/fish meal (FOFM) fish.
The following is a list from: Froehlich, H.E., Jacobsen, N.S., Essington, T.E., Clavelle, T., and Halpern, B.S. (2018). Avoiding the ecological limits of forage fish for fed aquaculture. Nature Sustainability 1, 298.
They identify 238 forage fish species which account for >99% of forage fish catches in 2012.
~31 million tons per year of captured forage fish (since 1980).
Get the list of forage fish used for FOFM:
## Read in Froehlich list of forage fish species
forage <- read_csv(here(current_np_dir, "raw", "msleckman.61.1-CatchMSY_Nis_FAOAreas.csv"))
sort(unique(forage$Species)) #238 forage fish groups listed
# This was not updated in v2023 so we continue to use the 2022 version.
saup_taxonkey <- readr::read_csv(here(scen_year_fis_dir, "int", "taxon_key_v2022.csv"))
## Combined list:
foragefish_list <- data.frame(forage_fish = sort(unique(c(unique(forage$Species), saup_taxonkey$TaxonName[saup_taxonkey$forage_fish %in% 1]))))
foragefish_list <- data.frame(forage_fish = sort(unique(forage$Species)))
# Update column names
foragefish_list <- foragefish_list %>%
mutate(forage_fish = case_when(
forage_fish == "Ammodytes spp" ~ "Ammodytes",
forage_fish == "Clupea pallasii" ~ "Clupea pallasii pallasii",
forage_fish == "Clupeoidei" ~ "Clupeoids",
forage_fish == "Decapterus spp" ~ "Decapterus",
forage_fish == "Diplodus argenteus" ~ "Diplodus argenteus argenteus",
forage_fish == "Etrumeus teres" ~ "Etrumeus sadina",
forage_fish == "Gadiculus argenteus" ~ "Gadiculus argenteus thori",
forage_fish == "Gymnocephalus cernuus" ~ "Gymnocephalus cernua",
forage_fish == "Mullus barbatus" ~ "Mullus barbatus barbatus",
forage_fish == "Patagonotothen brevicauda" ~ "Patagonotothen brevicauda brevicauda",
forage_fish == "Rioraja agassizi" ~ "Rioraja agassizii",
forage_fish == "Sardinella spp" ~ "Sardinella",
forage_fish == "Trachurus spp" ~ "Trachurus",
TRUE ~ forage_fish
)) %>% # fix some name issues in the forage fish list to match the names in SAUP
left_join(saup_taxonkey, by = c("forage_fish" = "TaxonName")) %>%
mutate(inSAUP = ifelse(is.na(TaxonKey), NA, "yes")) ## Now only 9 missing spp.. this is good enough! Note: some of the species that are listed as in the SAUP data, are actually not in the SAUP data. This is because this forage fish list was originally derived from the Watson data, and there are some Watson species which are not in the SAUP data. This is not a problem.
# sum(is.na(foragefish_list$inSAUP))
## 10 missing spp. in v2024
readr::write_csv(foragefish_list, here(current_np_dir, "int", "master_taxa_list_SAUP.csv"))
# This was not updated in v2023 or 2024 so we call out to the v2022 year on Mazu
file <- file.path(dir_M, 'git-annex/globalprep/fis/v2022/int/stock_catch_by_rgn_taxa.csv')
# read in Sea Around Us catch data
catch_raw <- readr::read_csv(file)
# update column names
catch <- catch_raw %>%
rename(common = CommonName, fao_id = fao_rgn, species = TaxonName)
# view summary
summary(catch)
## filter out non ohi eez regions if there are any
catch <- catch %>%
filter(!is.na(rgn_id)) %>%
filter(!is.na(fao_id)) %>%
filter(rgn_id <= 250) %>%
filter(rgn_id != 213) # drop Antarctica
## need to get TaxonKey's for each species to join with catch
# read in list we wrote out earlier
foragefish_list <- readr::read_csv(here::here(current_np_dir, "int", "master_taxa_list_SAUP.csv"))
# find missing species
missing_spp <- foragefish_list %>%
filter(is.na(TaxonKey))
# v2024 missing species:
# [1] "Ariomma indica" "Blicca bjoerkna" "Centrolabrus exoletus"
# [4] "Citharidae" "Hemiramphus balao" "Liza saliens"
# [7] "Percarina demidoffi" "Sardinops caeruleus" "Sardinops melanostictus"
# [10] "Sardinops ocellatus
# test <- catch %>%
# filter(str_detect(paste(missing_spp$forage_fish, collapse = "|"), species)) # just make sure the NA species are actually not in the SAUP data
# sort(unique(test$species)) # no full matches..
# # v2024: [1] "Hemiramphus" "Liza"
# ---- Join forage fish species with catch data ----
forage_fish_taxa_list <- foragefish_list %>%
# subset to only keep catch data for which we have forage species data
left_join(catch, by = c("forage_fish" = "species", "TaxonKey")) %>%
dplyr::select(forage_fish, inSAUP, TaxonKey) %>% # drop stock, common name, and extra catch df columns
unique() %>% # return unique combinations of grouping variables in the data frame
mutate(inSAUP = ifelse(
# if TaxonKey is NA, set inSAUP to NA
is.na(TaxonKey), NA, inSAUP))
# save a list of species not in SAUP data for bookkeeping
forage_fish_no_saup <- forage_fish_taxa_list %>%
filter(is.na(TaxonKey))
# write out missing species list to int
readr::write_csv(forage_fish_no_saup, here(current_np_dir, "int", "forage_fish_not_in_SAUP.csv"))
# join catch data set with forage fish taxa list
catch_fishfeed <- catch %>%
left_join(forage_fish_taxa_list, by = c("TaxonKey")) %>%
# subset to only include forage fish species
dplyr::filter(!is.na(forage_fish))
#nrow(catch_fishfeed)
#[1] 600864 -- v2024
# write output to int folder in np/scenario year directory
# write.csv(catch_fishfeed, file.path(here(), paste0("globalprep/np/v", scenario_year, "/int/saup_catch_forage_fish.csv")), row.names = FALSE)
#write.csv(catch_fishfeed, here(current_np_dir, "int", "saup_catch_forage_fish.csv"), row.names = F)
tic()
readr::write_csv(catch_fishfeed, here::here(current_np_dir, "int", "saup_catch_forage_fish.csv"))
toc()
# switching to write_csv reduced file size from 74.9 MB to 68.2 MB and significantly reduced writing time (from ~2 minutes to ~1 second)
test <- catch_fishfeed %>%
group_by(year) %>%
summarise(sum = sum(tons))
#summary(test)[4, 2]
## looks good.. ~30 million per year in v2023 and v2024
Justification from the Froelich et al. 2018: “Currently, it is estimated about 10% of forage fish enter the human diet directly, but the notoriously tiny-boned fish are labour intensive (thus expensive) to process for human consumption, are the foundation of several industries and thus jobs (creating inertia to change) and are not the preferred fish type for most people.”
#library(tidyverse)
#library(here)
# catch_fishfeed <- read_csv(file.path(here(), paste0("globalprep/np/v", scenario_year, "/int/saup_catch_forage_fish.csv"))) # leaving v2023 code in case future fellows run into file path issues when using `here()`
# read in SAUP catch data subsetted to forage fish species
catch_fishfeed <- readr::read_csv(here::here(current_np_dir, "int", "saup_catch_forage_fish.csv"))
#lobstr::obj_size(catch_fishfeed)
#> 72.20 MB
# Multiply by 0.90 to reflect the amount going to feed/oils, not human consumption
catch_non_human <- catch_fishfeed %>%
mutate(tons_non_human = tons * 0.9)
# for each region, FAO ID, Taxon Key, stock ID, and year,
# find sum of non-human-consumption fish harvest
catch_non_human <- catch_non_human %>%
dplyr::select(year, rgn_id, fao_id, stock_id, TaxonKey, tons_non_human) %>%
group_by(rgn_id, fao_id, TaxonKey, stock_id, year) %>%
summarize(catch_non_human = sum(tons_non_human)) %>%
ungroup()
# lobstr::obj_size(catch_non_human)
# 7.95 MB
test <- catch_non_human %>%
group_by(year) %>%
summarise(sum = sum(catch_non_human))
# looks good ~30 mil per year -- v2021 (this copied comment is in v2021-v2023)
#summary(test)[4, 2]
# ~ 26.6 mil per year -- v2024
# --------- Zero-fill -----------------------
## these data have no zero catch values, so add years with no reported catch to data table:
catch_zeros <- catch_non_human %>%
spread(year, catch_non_human) %>%
data.frame() %>%
gather("year", "catch_non_human", num_range("X", min(catch_non_human$year):max(catch_non_human$year))) %>%
mutate(year = as.numeric(gsub("X", "", year))) %>%
mutate(catch_non_human = ifelse(is.na(catch_non_human), 0, catch_non_human))
# v2024 update to not use superseded functions `spread()` and `gather()`:
catch_year_zeros <- catch_non_human %>%
pivot_wider(names_from = year, values_from = catch_non_human,
names_prefix = "x_",
values_fill = 0) %>%
pivot_longer(cols = starts_with("x_"),
names_to = "year",
values_to = "catch_non_human") %>%
mutate(year = as.numeric(gsub("x_", "", year))) %>%
arrange(year)
# these methods yield functionally identical outputs:
# > sum(catch_zeros == catch_year_zeros)
# [1] 1691340
# > length(catch_zeros) * nrow(catch_zeros)
# [1] 1691340
# the primary difference is that the newer method yields an object of class `tbl_df`, `tbl`, and `data.frame`, while the old method yields an object of class `data.frame`.
#all.equal(catch_year_zeros, catch_zeros)
#attributes(catch_year_zeros); attributes(catch_zeros)
# lobstr::obj_size(catch_zeros)
# lobstr::obj_size(catch_year_zeros)
# lobstr::obj_size(catch_non_human)
# lobstr::obj_size(catch_year_zeros)
## this part eliminates the zero catch values prior to the first reported non-zero catch
catch_year_zeros <- catch_year_zeros %>%
group_by(fao_id, TaxonKey, stock_id, rgn_id) %>%
arrange(year) %>%
mutate(cum_catch = cumsum(catch_non_human)) %>%
filter(cum_catch > 0) %>%
dplyr::select(-cum_catch) %>%
ungroup()
# Calculate mean catch for ohi regions (using data from 1980 onward). These data are used to weight the RAM b/bmsy values
mean_catch <- catch_year_zeros %>%
filter(year >= 1980) %>%
group_by(rgn_id, fao_id, TaxonKey, stock_id) %>%
mutate(mean_catch_non_human = mean(catch_non_human, na.rm=TRUE)) %>% # mean catch for each stock (in a specific ohi-fao region)
filter(mean_catch_non_human != 0) %>% ## some stocks have no reported catch for time period
ungroup()
options(scipen = 999) # to prevent taxonkey from turning into scientific notation
mean_catch_FOFM <- mean_catch %>%
mutate(stock_id_taxonkey = paste(stock_id, TaxonKey, sep="_")) %>%
dplyr::select(rgn_id, stock_id_taxonkey, year, catch_non_human) %>%
filter(year >= 2001) %>% # filter to include only analysis years
data.frame() %>%
rename(catch = catch_non_human)
# write.csv(mean_catch_FOFM, file.path(here(), paste0("globalprep/np/v", scenario_year, "/int/mean_catch_FOFM.csv")), row.names = FALSE)
readr::write_csv(mean_catch_FOFM, here(current_np_dir, "int", "mean_catch_FOFM.csv"))
# fis_bbmsy <- read_csv(file.path(here(), paste0("globalprep/fis/v", scenario_year, "/output/fis_bbmsy.csv")))
fis_bbmsy <- read_csv(here(scen_year_fis_dir, "output", "fis_bbmsy.csv"))
fis_bbmsy_gf <- read_csv(here(scen_year_fis_dir, "output", "fis_bbmsy_gf.csv"))
catch_FOFM <- read_csv(here(current_np_dir, "int", "mean_catch_FOFM.csv"))
b <- fis_bbmsy %>%
dplyr::mutate(bbmsy = ifelse(bbmsy > 1 , 1, bbmsy)) # no underharvest penalty anymore!
c <- catch_FOFM %>%
dplyr::mutate(stock_id_taxonkey = as.character(stock_id_taxonkey)) %>%
dplyr::mutate(taxon_key = sub('.*_', '', stock_id_taxonkey)) %>%
dplyr::mutate(stock_id = sub('_[^_]*$', '', stock_id_taxonkey)) %>%
dplyr::mutate(catch = as.numeric(catch)) %>%
dplyr::mutate(year = as.numeric(as.character(year))) %>%
dplyr::mutate(region_id = as.numeric(as.character(rgn_id))) %>%
dplyr::mutate(taxon_key = as.numeric(as.character(taxon_key))) %>%
dplyr::select(region_id, year, stock_id, taxon_key, catch)
b <- b %>%
dplyr::mutate(bbmsy = as.numeric(bbmsy),
region_id = as.numeric(as.character(rgn_id)),
year = as.numeric(as.character(year)),
stock_id = as.character(stock_id))
####
# ==== Calculate scores for Bbmsy values ==== ####
# *************NOTE *****************************
# These values can be altered
# ***********************************************
## The upper limit can be deleted, since we already cap pedscores at 1 anyways. We still want to keep the lower buffer.
alpha <- 0.5
beta <- 0.25
lowerBuffer <- 0.95
upperBuffer <- 1.05
b$score = ifelse(
b$bbmsy < lowerBuffer,
b$bbmsy,
ifelse (b$bbmsy >= lowerBuffer &
b$bbmsy <= upperBuffer, 1, NA)
)
b$score = ifelse(!is.na(b$score),
b$score,
ifelse(
1 - alpha * (b$bbmsy - upperBuffer) > beta,
1 - alpha * (b$bbmsy - upperBuffer),
beta
))
####
# Merge the b/bmsy data with catch data ------------------------------
####
data_fis <- c %>%
dplyr::left_join(b, by = c('region_id' = 'rgn_id', 'stock_id', 'year')) %>%
dplyr::select(rgn_id = region_id, stock_id, year, taxon_key, catch, bbmsy, score)
###
# Estimate scores for taxa without b/bmsy values --------------------
# Mean score of other fish in the region is the starting point
# Then a penalty is applied based on the level the taxa are reported at
###
## this takes the mean score within each region and year
data_fis_gf <- data_fis %>%
dplyr::group_by(rgn_id, year) %>%
dplyr::mutate(mean_score = mean(score, na.rm = TRUE)) %>%
dplyr::ungroup()
## this takes the mean score across all regions within a year
# (when no stocks have scores within a region)
data_fis_gf <- data_fis_gf %>%
dplyr::group_by(year) %>%
dplyr::mutate(mean_score_global = mean(score, na.rm = TRUE)) %>%
dplyr::ungroup() %>%
dplyr::mutate(mean_score = ifelse(is.na(mean_score), mean_score_global, mean_score)) %>%
dplyr::select(-mean_score_global)
# *************NOTE *****************************
# In some cases, it may make sense to alter the
# penalty for not identifying fisheries catch data to
# species level.
# ***********************************************
penaltyTable <- data.frame(TaxonPenaltyCode = 1:6,
penalty = c(0.1, 0.25, 0.5, 0.8, 0.9, 1))
data_fis_gf <- data_fis_gf %>%
dplyr::mutate(TaxonPenaltyCode = as.numeric(substring(taxon_key, 1, 1))) %>%
dplyr::left_join(penaltyTable, by = 'TaxonPenaltyCode') %>%
dplyr::mutate(score_gf = mean_score * penalty) %>%
dplyr::mutate(method = ifelse(is.na(score), "Mean gapfilled", NA)) %>%
dplyr::mutate(gapfilled = ifelse(is.na(score), 1, 0)) %>%
dplyr::mutate(score = ifelse(is.na(score), score_gf, score))
gap_fill_data <- data_fis_gf %>%
dplyr::select(rgn_id,
stock_id,
taxon_key,
year,
catch,
score,
gapfilled,
method)
# write.csv(gap_fill_data, here(paste0("globalprep/np/v", scenario_year, "/output/NP_bbmsy_summary_gf.csv")), row.names = FALSE)
readr::write_csv(gap_fill_data, here::here(current_np_dir, "output", "NP_bbmsy_summary_gf.csv"))
score_data <- data_fis_gf %>%
dplyr::select(rgn_id, stock_id, year, catch, score)
###
# Calculate status for each region
###
## Take a catch weighted average of B/Bmsy scores for each region/year.
score_data <- score_data %>%
dplyr::group_by(year, rgn_id) %>%
dplyr::mutate(SumCatch = sum(catch)) %>%
dplyr::ungroup()
score_data <- score_data %>%
dplyr::group_by(rgn_id, year) %>%
dplyr::summarize(score = weighted.mean(score, catch)) %>%
dplyr::ungroup()
summary(score_data)
# write.csv(score_data, here(paste0("globalprep/np/v", scenario_year, "/output/np_fofm_scores.csv")), row.names = FALSE)
readr::write_csv(score_data, here(current_np_dir, "output", "np_fofm_scores.csv"))
old <- readr::read_csv(here::here("globalprep", "np",
# previous year in NP
paste0("v", as.character(as.numeric(scenario_year) - 1)),
"output", "np_fofm_scores.csv"))
summary(old)
new <- readr::read_csv(here::here(current_np_dir, "output", "np_fofm_scores.csv"))
summary(new)
# load ohi regions
region_data()
# Investigating changes in data for same year in different data/scenario years
check <- new %>%
rename("new_score" = "score") %>%
left_join(old, by = c("rgn_id", "year")) %>%
left_join(rgns_all, by = "rgn_id") %>%
select(rgn_id, year, new_score, score, rgn_name) %>%
mutate(diff = new_score - score) %>%
filter(year == 2019)
ab_scores <- ggplot(check, aes(x = score, y = new_score, fill = rgn_id)) +
geom_abline(intercept = 0, slope = 1, color = "#3498DB", linewidth = 1.5) + # Change color and size of line
geom_point(size = 3) + # Increase point size
theme_minimal() +
labs(x = "Score", y = "New Score",
title = "New Score vs. Score",
subtitle = "Year: 2017") +
theme(legend.position = "none")
ggplotly(ab_scores)