This script takes the SAUP 2022 catch data and creates 1 data layer:
IMPORTANT NOTE: the fisheries subgoal data preps will need to be run first before this can be completed. That is where the RAM stock status dataprep and the fisheries catch download occurs.
The SAUP data was not updated in v2023 or v2024, so this script uses the 2019 SAUP data for catch. Raw and intermediate data stored on Mazu is pulled from v2022.
Reference: Pauly D., Zeller D., Palomares M.L.D. (Editors), 2020. Sea Around Us Concepts, Design and Data (seaaroundus.org)
Downloaded: September 27, 2022
Description: Tons per year and SAUP region with information on sector type, industry type, fishing entitity, reporting status and taxonomic information.
Time range: 1950 - 2019
Format: CSV
Additional Information: Methods
Note: the same data was used to prepare fisheries subgoal scores. We will be using the prepped annual catch csv file prepared in the mazu fis/ folder. This means that fis will need to be run before this.
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',message = FALSE, warning = FALSE, echo = TRUE, eval=FALSE)
## Libraries
library(readr)
library(dplyr)
library(parallel)
library(purrr)
library(stringr)
library(tidyr)
library(foreach)
library(here)
library(sf)
library(tidyverse)
library(readxl)
source(here("workflow", "R", "common.R"))
#update as needed
version_year <- "v2024"
raw_data_version_year <- "v2022"
## Paths for data in Mazu
fis_path <- here::here(dir_M,"git-annex","globalprep","fis","v2022","int") # filepath to fisheries path
## Intermediate output directory
int_dir <- here::here("globalprep","ao", version_year,"intermediate")
The SAUP data is separated into commercial, recreational, subsistence, and artisanal fishing. We will only grab the artisanal and subsistence catch for Artisanal Opportunities.
Look at catch data
Aggregate catch per OHI region and FAO area. This catch will be used twice.
The catch is used to weight scores per region. For this we need to use catch records, including those not reported at the species level. See note below.
The catch data at species level is used to calculate stock status (BBmsy) per stock (remember that our definition of a stock is a species caught within a single FAO area).
Note: Save IUU and Reported only (CatchTotal
) as the
catch sum.
NonIndustrial Catch
output_df <- catch %>%
dplyr::filter(discard_id != 1) %>% # filter out discards
dplyr::filter(other_id != 1) %>% # filter out catch not used for humans
dplyr::select(year, rgn_id, fao_rgn, TaxonName, CommonName, human_use_id, fishing_sector, tons, stock_id, TaxonKey) %>%
dplyr::filter(fishing_sector %in% c("Subsistence", "Artisanal")) %>%
dplyr::group_by(year, rgn_id, fao_rgn, TaxonName, CommonName, stock_id, TaxonKey) %>%
dplyr::summarise(tons = sum(tons)) %>%
dplyr::ungroup()
write_csv(output_df, file = here(dir_M,"git-annex","globalprep","ao","v2024","int","ao_stock_catch_by_rgn_taxa.csv"))
test <- output_df %>%
group_by(year) %>%
summarise(sum = sum(tons)) # looks good
Take a look at catch data with missing ohi and fao regions in stock_catch_by_rgn_taxa. These have taxon key matches, but no ohi or fao regions assigned to them.
region_data()
df <- read_csv(here::here(dir_M,'git-annex', 'globalprep', 'ao','v2024','int','ao_stock_catch_by_rgn_taxa.csv')) %>%
left_join(rgns_eez)
# they all have ohi or fao regions; however there are only 197 regions with artisanal or subsistence catch in the SAUP data.
length(unique(df$rgn_id))
unique(df$year)
Mean catch data is used to weight the B/Bmsy values in the fishery subgoal.
file <- here::here(dir_M,'git-annex', 'globalprep', 'ao','v2024','int','ao_stock_catch_by_rgn_taxa.csv')
catch <- read_csv(file) %>%
rename(common = CommonName, fao_id = fao_rgn, species=TaxonName)
summary(catch)
## filter out non ohi eez regions
catch <- catch %>%
filter(!is.na(rgn_id)) %>%
filter(!is.na(fao_id)) %>%
filter(rgn_id <= 250) %>%
filter(rgn_id != 213)
## calculate total annual catch for each stock
catch <- catch %>%
dplyr::select(year, rgn_id, fao_id, stock_id, TaxonKey, tons) %>%
group_by(rgn_id, fao_id, TaxonKey, stock_id, year) %>%
summarize(catch = sum(tons)) %>%
ungroup()
# missing <- c("Belgium", "Latvia", "Estonia", "Kerguelen Islands", "Faeroe Islands", "Iceland", "Finland", "Poland", "Lithuania", "Sweden")
#
# test <- catch %>%
# left_join(rgns_eez) %>%
# filter(rgn_name %in% missing)
# sort(unique(test$rgn_name))
# sort(missing)
Take a look at a few stocks.
For years with no reported catch, add zero values (after first reported catch)
## these data have no zero catch values, so add years with no reported catch to data table:
catch_zeros <- catch %>%
spread(year, catch) %>%
data.frame() %>%
gather("year", "catch", num_range("X", min(catch$year):max(catch$year))) %>%
mutate(year = as.numeric(gsub("X", "", year))) %>%
mutate(catch = ifelse(is.na(catch), 0, catch))
## this part eliminates the zero catch values prior to the first reported non-zero catch
catch_zeros <- catch_zeros %>%
group_by(fao_id, TaxonKey, stock_id, rgn_id) %>%
arrange(year) %>%
mutate(cum_catch = cumsum(catch)) %>%
filter(cum_catch > 0) %>%
dplyr::select(-cum_catch) %>%
ungroup()
Calculate mean catch for ohi regions (using data from 1980 onward). These data are used to weight the RAM b/bmsy values.
mean_catch <- catch_zeros %>%
filter(year >= 1980) %>%
group_by(rgn_id, fao_id, TaxonKey, stock_id) %>%
mutate(mean_catch = mean(catch, na.rm=TRUE)) %>% # mean catch for each stock (in a specific ohi-fao region)
filter(mean_catch != 0) %>% ## some stocks have no reported catch for time period
ungroup()
Check out the data
options(scipen = 999) # to prevent taxonkey from turning into scientific notation
mean_catch_toolbox <- mean_catch %>%
mutate(stock_id_taxonkey = paste(stock_id, TaxonKey, sep="_")) %>%
dplyr::select(rgn_id, stock_id_taxonkey, year, mean_catch) %>%
filter(year >= 2001) %>% # filter to include only analysis years
data.frame()
write_csv(mean_catch_toolbox, here::here(int_dir, "mean_catch.csv")) ## save the total mean catch csv for reference if needed
length(unique(mean_catch_toolbox$rgn_id)) # only 196 regions... We will gapfill the missing regions...
old <- read_csv(here("globalprep","ao","v2021","intermediate","mean_catch.csv"))
length(unique(old$rgn_id)) # only 196 regions