ohi logo
OHI Science | Citation policy

Summary

This script prepares the final B/Bmsy data: 1. Calculates the 5 year running average of B/Bmsy data generated using the CMSY method 2. Obtains a B/Bmsy value for each catch record (each FAO/OHI/year/species combination), prioritizing RAM data

Updates from previous assessment

  • New data (from steps 4a/4b) added in v2024

Data

B/Bmsy values from the RAM Legacy Stock Assessment data are generated in RAM_data_prep.Rmd

B/Bmsy values from the CMSY method are generated in calculate_bbmsy.Rmd

Mean catch data created in catch_data_prep.Rmd


Setup

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',message = FALSE, warning = FALSE, eval=FALSE)
library(dplyr)
library(tidyr)
library(zoo)
library(stringr)
library(readr)
library(here)
library(plotly)

source(here("workflow", "R", "common.R"))

# ---- set year and file path info ----
current_year <- 2024 # Update this in the future!!
version_year <- paste0("v",current_year)
data_dir_version_year <- paste0("d", current_year)

# intermediate directory
int_dir <- here("globalprep", "fis", version_year, "int")

# output directory
output_dir <- here("globalprep", "fis", version_year, "output")

Prepare B/Bmsy values from CMSY model

For the CMSY generated B/Bmsy values we use the five year running mean of the values to smooth the data and to account for model uncertainty.

cmsy <- read_csv(here::here(output_dir, "cmsy_bbmsy.csv")) %>%
  filter(!is.na(bbmsy_mean)) %>%
    dplyr::select(stock_id, year, bbmsy_q2.5,bbmsy_q97.5,bbmsy_sd, bbmsy_mean, model) %>%
    arrange(stock_id, year) %>%
    group_by(stock_id) %>%
    mutate(mean_5year = rollmean(bbmsy_mean, 5, align="right", fill=NA))

write_csv(cmsy, here(int_dir, "cmsy_b_bmsy_mean5yrs.csv"))

Combine RAM and CMSY B/Bmsy values and Watson catch data

A few regions have multiple RAM stocks for the same species (see scientific name). In these cases, we will average the B/Bmsy values of the species, weighted by the area of the RAM stock.

Read in the three data tables:

cmsy <- read_csv(here(int_dir, "cmsy_b_bmsy_mean5yrs.csv")) %>%
  dplyr::select(stock_id, year, cmsy_bbmsy=mean_5year)

ram <- read_csv(here(int_dir, "ram_bmsy.csv")) %>% # final output from RAM_data_prep
  rename(stock_id = stockid) # to match other two data tables

mean_catch <- read_csv(here(output_dir, "mean_catch_minus_feed.csv")) %>% # final output from Watson catch
  mutate(taxon_key = str_extract(stock_id_taxonkey, "(\\d)+$")) %>% # extract ending consecutive digits
  mutate(stock_id = str_extract(stock_id_taxonkey, "^(\\w+).(\\d){1,2}")) 

Check number of Watson stocks that have CMSY or RAM B/Bmsy values:

## SAUP v RAM BBMSY
length(unique(setdiff(ram$stock_id, mean_catch$stock_id))) # v2024: 77 stocks
length(unique(setdiff(mean_catch$stock_id, ram$stock_id))) # v2024: 8479 stocks
length(unique(intersect(ram$stock_id, mean_catch$stock_id))) 
#332 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgn ids) - v2020
#367 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgns ids) - v2021
# 374 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgns ids) - v2022
# 398 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgns ids) - v2023
# 396 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgn ids) - v2024

## SAUP v CMSY
setdiff(cmsy$stock_id, mean_catch$stock_id) # v2024: 7 stocks in cmsy not in mean_catch
length(unique(setdiff(mean_catch$stock_id, cmsy$stock_id))) # v2024: 7853 in mean_catch not in cmsy
length(intersect(mean_catch$stock_id, cmsy$stock_id)) # v2024: 1022 stocks with CMSY-B/Bmsy data
#703 stocks with CMSY-B/Bmsy data - v2020
#1014 stocks with CMSY-B/Bmsy data; WOW! - v2021
#1022 stocks with CMSY-B/Bmsy data; WOW! - v2022
#1022 stocks with CMSY-B/Bmsy data; WOW! - v2023 (didn't change because none of these were updated this year)
# 1022 stocks with CMSY-B/Bmsy data; WOW! - v2024 (didn't change because none of these were updated this year)

Combine Watson to RAM-B/Bmsy:

data <- mean_catch %>%
  left_join(ram, by=c('rgn_id', 'stock_id', "year"))

# 608006 catch records (catch from specific fao and ohi regions) when joined with ram increases to 612599 because there are multiple stocks for some species - v2019
# 704173 catch records (catch from specific fao and ohi regions) when joined with ram increases to 709274 because there are multiple stocks for some species - v2020
# 609501 catch records (catch from specific fao and ohi regions) when joined with ram increases to 615043 because there are multiple stocks for some species; we see this decrease in catch records from last year because we are using new fisheries data (SAUP) this year - v2021
# 648270 catch records (catch from specific fao and ohi regions) when joined with ram increases to 653878 because there are multiple stocks for some species; we see this increase in catch records from last year because we are using an extra year of catch data (SAUP) this year - v2022
# 654524 catch records (catch from specific fao and ohi regions) when joined with ram increases to 653878 because there are multiple stocks for some species; we see this increase in catch records from last year because we are using an extra year of RAM data - v2023
# 648270 catch records (catch from specific fao and ohi regions) when joined with ram increases to 653783 because there are multiple stocks for some species; we see this decrease in catch records from last year because there were more unused stocks than new stocks - v2024


sum(!is.na(data$ram_bmsy))/nrow(data) # about 7.32% of catch records have RAM data - v2024
perc_ram <- sum(data$mean_catch[!is.na(data$ram_bmsy)])/sum(data$mean_catch) *100 # about 46.3% of tons of catch have RAM data (this is less than last year, which was 47%!) - v2024
# to see how much data is sourced from RAM vs CMSY 
compare_ram_cmsy <- data.frame(
  category = c("RAM", "CMSY"),
  percentage = c(perc_ram, 100 - perc_ram)
)

# visualize the distribution of RAM vs CMSY data used
ggplot(compare_ram_cmsy, aes(x = category, y = percentage)) +
  geom_bar(stat = "identity", width = 0.5) +
  coord_flip() +
  scale_y_continuous(limits = c(0, 100), 
                     breaks = seq(0, 100, by = 20)) +
  labs(title = "RAM vs CMSY Data Usage for B/Bmsy (v2024)",
       x = "",
       y = "Percent of total catch") +
  theme_minimal()

Save & view duplicate stocks:

sum(duplicated(paste(data$rgn_id, data$stock_id, data$year, sep="_"))) 
# 7773 regions with multiple RAM stocks (stockid_ram) for the same species (see scientific name in stockid) - v2019
# 8028 regions with multiple RAM stocks (stockid_ram) for the same species - v2020 
# 5542 regions with multiple RAM stocks (stockid_ram) for the same species - v2021
# 5608 regions with multiple RAM stocks (stockid_ram) for the same species - v2022
# 6254 regions with multiple RAM stocks (stockid_ram) for the same species - v2023
# 5513 regions with multiple RAM stocks (stockid_ram) for the same species - v2024


## save the duplicate stock values to take a look at an example
tmp <- data[duplicated(paste(data$rgn_id, data$stock_id, data$year, sep="_")), ]

## Examples of a region with multiple RAM stocks of the same species
filter(data, rgn_id == 9, year == 2001, stock_id == "Thunnus_alalunga-71") # stocks ALBANPAC and ALBASPAC are the same species; mean catch 1350.19 tonnes

Regions with multiple stocks of the same species will have B/Bmsy values averaged, weighted by the area of the RAM stock within the region

## Group by location, year, and species before taking a weighted mean of the catch
data <- data %>%
  group_by(rgn_id, taxon_key, stock_id, year, mean_catch) %>%   
  summarize(ram_bmsy = ifelse(all(!is.na(RAM_area_m2)), weighted.mean(ram_bmsy, RAM_area_m2, na.rm=TRUE), mean(ram_bmsy, na.rm = TRUE)),
            gapfilled = ifelse(all(is.na(gapfilled)), NA, max(gapfilled, na.rm=TRUE)),
            method = paste(method, collapse = ", ")) %>%
  ungroup()

## check that averaging went ok - compare with mean catch values earlier (1350.19)
filter1 <- filter(data, rgn_id == 9, year == 2001, stock_id == "Thunnus_alalunga-71") # v2024: all good, still 1350.19


## check example of duplicate stock catch with ram_bmsy but no RAM_area_m2 value, ram_bmsy should not be NA
filter2 <- filter(data, rgn_id == 224, year == 2019, stock_id ==  "Heterocarpus_reedi-87") # v2024: all good


# add in the B/Bmsy values from the CMSY approach
data <- data %>%
  left_join(cmsy, by=c("stock_id", "year"))

summary(data)
# v2024: 
#        rgn_id       taxon_key           stock_id              year        mean_catch       
#  Min.   :  1.0   Length:648270      Length:648270      Min.   :2001   Min.   :      0.0  
#  1st Qu.: 60.0   Class :character   Class :character   1st Qu.:2006   1st Qu.:      0.3  
#  Median :137.0   Mode  :character   Mode  :character   Median :2011   Median :      9.6  
#  Mean   :124.8                                         Mean   :2010   Mean   :   1800.8  
#  3rd Qu.:183.0                                         3rd Qu.:2015   3rd Qu.:    162.8  
#  Max.   :250.0                                         Max.   :2019   Max.   :2960647.0  
#                                                                                          
#     ram_bmsy        gapfilled         method            cmsy_bbmsy    
#  Min.   : 0.0     Min.   :0.0      Length:648270      Min.   :0.0     
#  1st Qu.: 0.7     1st Qu.:0.0      Class :character   1st Qu.:0.5     
#  Median : 1.1     Median :0.0      Mode  :character   Median :1.1     
#  Mean   : 1.2     Mean   :0.1                         Mean   :1.0     
#  3rd Qu.: 1.6     3rd Qu.:0.0                         3rd Qu.:1.3     
#  Max.   :21.7     Max.   :1.0                         Max.   :1.9     
#  NA's   :605897   NA's   :605897                      NA's   :503545  

A lot of the NAs for both RAM-bmsy and CMSY are due to unmatched SAUP-RAM stocks.

Formatting and saving final data

B/Bmsy values for each catch record are generated (for the species where this is possible) and saved. A corresponding gapfilling dataset is also saved.

 data_gf <- data %>%
   mutate(bmsy_data_source = ifelse(!is.na(ram_bmsy), "RAM", NA)) %>%
   mutate(bmsy_data_source = ifelse(is.na(bmsy_data_source) & !is.na(cmsy_bbmsy), "CMSY", bmsy_data_source)) %>%
   mutate(bbmsy = ifelse(is.na(ram_bmsy), cmsy_bbmsy, ram_bmsy)) %>%
   dplyr::select(rgn_id, stock_id, taxon_key, year, bbmsy, bmsy_data_source, RAM_gapfilled=method, mean_catch) %>%
   filter(year >= 2001)

old <- read_csv(here::here("globalprep","fis","v2023","output","fis_bbmsy_gf.csv"))

summary(old)
summary(data_gf)
length(unique(data_gf$rgn_id)) # v2024: 220

write_csv(data_gf, here(output_dir,"fis_bbmsy_gf.csv")) 

data_gf <- read_csv(here(output_dir, "fis_bbmsy_gf.csv"))

bbmsy <- data_gf %>%
  dplyr::select(rgn_id, stock_id, year, bbmsy) %>%
  dplyr::filter(!is.na(bbmsy))
  
bbmsy_dups_fixed <- bbmsy %>%
  group_by(rgn_id, stock_id, year) %>%
  summarise(bbmsy = mean(bbmsy)) %>% ## account for the duplicated TaxonName/CommonName noted in "catch_data_prep.Rmd" (if there were any)...
  ungroup()
#old <- read_csv(file.path("../v2020/output/fis_bbmsy.csv"))

write_csv(bbmsy_dups_fixed, here(output_dir, "fis_bbmsy.csv")) 


## check against old 
data_gf_old<- read_csv(here("globalprep","fis","v2023","output", "fis_bbmsy_gf.csv"))

bbmsy <- data_gf_old %>%
  dplyr::select(rgn_id, stock_id, year, bbmsy) %>%
  dplyr::filter(!is.na(bbmsy))
  
bbmsy_dups_fixed_old <- bbmsy %>%
  group_by(rgn_id, stock_id, year) %>%
  summarise(bbmsy = mean(bbmsy)) %>% ## account for the duplicated TaxonName/CommonName noted in "catch_data_prep.Rmd" (if there were any)...
  ungroup()
#old <- read_csv(file.path("../v2020/output/fis_bbmsy.csv"))

Data check

## check regions (pitcairn and usa) for changes
old <- read_csv(here("globalprep","fis","v2023","output","fis_bbmsy_gf.csv"))

new <- read_csv(here("globalprep","fis","v2024","output","fis_bbmsy_gf.csv"))

old_pit <- old %>%
  filter(rgn_id == 146)
mean(old_pit$bbmsy, na.rm = TRUE) # 1.135239

new_pit <- new %>%
  filter(rgn_id == 146)
mean(new_pit$bbmsy, na.rm = TRUE) # 1.135239 -- the exact same in v2024

old_usa <- old %>%
  filter(rgn_id == 163)
mean(old_usa$bbmsy, na.rm = TRUE) # 1.136894

new_usa <- new %>%
  filter(rgn_id == 163)
mean(new_usa$bbmsy, na.rm = TRUE) # 1.150863 -- not the exact same, but close! v2024

# v2024: plot differences for better understanding
compare <- new %>%
  left_join(old, by = c("rgn_id", "stock_id", "taxon_key", "year")) %>%
  mutate(diff = bbmsy.x - bbmsy.y) %>%
  filter(year %in% c(2016:2019))

# v2024: points above the red line indicate an increase in B/Bmsy from 2023 to 2024, while points below indicate a decrease
comparison_plot <- ggplot(compare, aes(x = bbmsy.x, y = bbmsy.y)) +
  geom_point(alpha = 0.5) + 
  geom_abline(color = "darkred", linetype = "dashed") +  # reference line
  labs(
    title = "B/Bmsy Values: v2023 vs v2024",
    x = "v2023 B/Bmsy",
    y = "v2024 B/Bmsy"
  ) +
  theme_minimal() +
  coord_fixed(ratio = 1)  +
  geom_smooth(method = "lm", se = FALSE, color = "lightblue") # the trend is slightly downward, meaning there is a slight decrease in B/Bmsy values in 2024 compared to 2023.
comparison_plot

# v2024: check for year 2019 only
compare_2019 <- compare %>%
  filter(year %in% 2019)
  
comparison_plot_2019 <- ggplot(compare_2019, aes(x = bbmsy.x, y = bbmsy.y)) +
  geom_point(alpha = 0.5) + 
  geom_abline(color = "darkred", linetype = "dashed") +  # reference line
  labs(
    title = "B/Bmsy Values in 2019: v2023 vs v2024",
    x = "v2023 B/Bmsy",
    y = "v2024 B/Bmsy"
  ) +
  theme_minimal() +
  coord_fixed(ratio = 1)  +
  xlim(0, 12) +
  geom_smooth(method = "lm", se = FALSE, color = "lightblue") # the trend is slightly downward, meaning there is a slight decrease in B/Bmsy values in 2024 compared to 2023.
comparison_plot_2019

For understanding results:

When B/BMSY = 1, then biomass equals BMSY. If B/BMSY falls below 1, biomass is too low to provide maximum sustainable yield.. This is also seen in Christopher Costello’s 2016 paper, “Global fishery prospects under contrasting management regimes.”

# read in our final data frame saved earlier
final_b_bmsy <- read_csv(here(output_dir, "fis_bbmsy.csv"))

# interactive plotly of b/bmsy for each stock
fis_plot <- final_b_bmsy %>%
  plot_ly(x = ~year, y = ~bbmsy, color = ~stock_id, 
          type = "scatter", mode = "lines") %>%
  layout(title = "B/Bmsy by Stock ID",
         xaxis = list(title = "Year"),
         yaxis = list(title = "B/Bmsy (overfished is < 1)"))
fis_plot

# comparison of how many stocks are over/underfished in total
fish_status <-final_b_bmsy %>% 
  mutate(fish_status = case_when(
    bbmsy < 1 ~ "overfished",
    bbmsy >= 1 ~ "sustainable",
    .default = NA
  ))

fish_status_summary_2019 <- fish_status %>%
  filter(year %in% 2019) %>% 
  group_by(rgn_id, fish_status) %>%
  summarise(count = n()) %>%
  filter(!is.na(fish_status))

ggplot(fish_status_summary_2019, aes(x = fish_status, y = count, fill = fish_status)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(title = "Comparison of Overfished and Sustainable Stocks in 2019",
       x = "B/Bmsy Stock Status",
       y = "Number of stocks") +
  theme_minimal()

fish_status_summary_all_time <- fish_status %>%
  group_by(rgn_id, fish_status) %>%
summarise(count = n()) %>%
  filter(!is.na(fish_status))

ggplot(fish_status_summary_all_time, aes(x = fish_status, y = count, fill = fish_status)) +
  geom_bar(stat = "identity") +
  labs(title = "Comparison of Overfished and Sustainable Stocks 2012 - 2019",
       x = "B/Bmsy Stock Status",
       y = "Number of stocks") +
  theme_minimal()

# ---- 10 most overfished stocks in 2019 (B/Bmsy < 1) ----
overfished_2019 <- fish_status %>% 
  filter(fish_status %in% "overfished") 

# interactive plotly of b/bmsy for each stock
over_fis_plot <- overfished_2019 %>%
  plot_ly(x = ~year, y = ~bbmsy, color = ~stock_id, 
          type = "scatter", mode = "lines") %>%
  layout(title = "B/Bmsy by Stock ID",
         xaxis = list(title = "Year"),
         yaxis = list(title = "B/Bmsy < 1)"))
over_fis_plot

# ---- all sustainable stocks in 2019 (B/Bmsy > 1) ----


# comparison of how many stocks are over/underfished by region