ohi logo
OHI Science | Citation policy

Summary

This script prepares the final B/Bmsy data: 1. Calculates the 5 year running average of B/Bmsy data generated using the CMSY method 2. Obtains a B/Bmsy value for each catch record (each FAO/OHI/year/species combination), prioritizing RAM data

Updates from previous assessment

None.


Data

B/Bmsy values from the RAM Legacy Stock Assessment data are generated in RAM_data_prep.Rmd

B/Bmsy values from the CMSY method are generated in calculate_bbmsy.Rmd

Mean catch data created in catch_data_prep.Rmd


Setup

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',message = FALSE, warning = FALSE, eval=FALSE)
library(dplyr)
library(tidyr)
library(zoo)
library(stringr)

source('../../../workflow/R/common.R')

Prepare B/Bmsy values from CMSY model

For the CMSY generated B/Bmsy values we use the five year running mean of the values to smooth the data and to account for model uncertainty.

cmsy <- read.csv('output/cmsy_bbmsy.csv') %>%
  filter(!is.na(bbmsy_mean)) %>%
    dplyr::select(stock_id, year, bbmsy_q2.5,bbmsy_q97.5,bbmsy_sd, bbmsy_mean, model) %>%
    arrange(stock_id, year) %>%
    group_by(stock_id) %>%
    mutate(mean_5year = rollmean(bbmsy_mean, 5, align="right", fill=NA))

write.csv(cmsy, "int/cmsy_b_bmsy_mean5yrs.csv", row.names=FALSE)

Combine RAM and CMSY B/Bmsy values and Watson catch data

A few regions have multiple RAM stocks for the same species (see scientific name). In these cases, we will average the B/Bmsy values of the species, weighted by the area of the RAM stock.

Read in the three data tables:

cmsy <- read.csv('int/cmsy_b_bmsy_mean5yrs.csv') %>%
  dplyr::select(stock_id, year, cmsy_bbmsy=mean_5year)

ram <- read.csv("int/ram_bmsy.csv") %>% # final output from RAM_data_prep
  rename(stock_id = stockid) # to match other two data tables

mean_catch <- read.csv("output/mean_catch_minus_feed.csv", stringsAsFactors = FALSE) %>% # final output from Watson catch
  mutate(taxon_key = str_extract(stock_id_taxonkey, "(\\d)+$")) %>% # extract ending consecutive digits
  mutate(stock_id = str_extract(stock_id_taxonkey, "^(\\w+).(\\d){1,2}")) 

Check number of Watson stocks that have CMSY or RAM B/Bmsy values:

## SAUP v RAM BBMSY
setdiff(ram$stock_id, mean_catch$stock_id)
setdiff(mean_catch$stock_id, ram$stock_id)
intersect(ram$stock_id, mean_catch$stock_id) #342 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgn ids) - v2019
#332 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgn ids) - v2020
#365 stocks with RAM-B/Bmsy data (although RAM is matched by fao and rgns ids) - v2021

## SAUP v CMSY
setdiff(cmsy$stock_id, mean_catch$stock_id)
setdiff(mean_catch$stock_id, cmsy$stock_id)
length(intersect(mean_catch$stock_id, cmsy$stock_id)) #738 stocks with CMSY-B/Bmsy data - v2019
#703 stocks with CMSY-B/Bmsy data - v2020
#1014 stocks with CMSY-B/Bmsy data; WOW! - v2021

Combine Watson to RAM-B/Bmsy:

data <- mean_catch %>%
  left_join(ram, by=c('rgn_id', 'stock_id', "year"))# 608006 catch records (catch from specific fao and ohi regions) when joined with ram increases to 612599 because there are multiple stocks for some species - v2019
# 704173 catch records (catch from specific fao and ohi regions) when joined with ram increases to 709274 because there are multiple stocks for some species - v2020
# 609501 catch records (catch from specific fao and ohi regions) when joined with ram increases to 615151 because there are multiple stocks for some species; we see this decrease in catch records because we are using new fisheries data (SAUP) this year - v2021

sum(!is.na(data$ram_bmsy))/nrow(data) # about 7% of catch records have RAM data (this is more than previous years!) - v2021
sum(data$mean_catch[!is.na(data$ram_bmsy)])/sum(data$mean_catch) # about 50% of tons of catch have RAM data - v2021

Save & view duplicate stocks:

sum(duplicated(paste(data$rgn_id, data$stock_id, data$year, sep="_")))
# 7773 regions with multiple RAM stocks (stockid_ram) for the same species (see scientific name in stockid)
# 8028 regions with multiple RAM stocks (stockid_ram) for the same species 
# 5650 regions with multiple RAM stocks (stockid_ram) for the same species 

## save the duplicate stock values to take a look at an example
tmp <- data[duplicated(paste(data$rgn_id, data$stock_id, data$year, sep="_")), ]

## Examples of a region with multiple RAM stocks of the same species
filter(data, rgn_id == 9, year == 2001, stock_id == "Thunnus_alalunga-71") # stocks ALBANPAC and ALBASPAC are the same species; mean catch 1051.853 tonnes

Regions with multiple stocks of the same species will have B/Bmsy values averaged, weighted by the area of the RAM stock within the region

## Group by location, year, and species before taking a weighted mean of the catch
data <- data %>%
  group_by(rgn_id, taxon_key, stock_id, year, mean_catch) %>%   
  summarize(ram_bmsy = ifelse(all(!is.na(RAM_area_m2)), weighted.mean(ram_bmsy, RAM_area_m2, na.rm=TRUE), mean(ram_bmsy, na.rm = TRUE)),
            gapfilled = ifelse(all(is.na(gapfilled)), NA, max(gapfilled, na.rm=TRUE)),
            method = paste(method, collapse = ", ")) %>%
  ungroup()

## check that averaging went ok - compare with mean catch values earlier (1051)
filter(data, rgn_id == 9, year == 2001, stock_id == "Thunnus_alalunga-71")


## check example of duplicate stock catch with ram_bmsy but no RAM_area_m2 value, ram_bmsy should not be NA (should be 29256 tonnes, checked)
filter(data, rgn_id == 224, year == 2018, stock_id ==  "Heterocarpus_reedi-87")


# add in the B/Bmsy values from the CMSY approach
data <- data %>%
  left_join(cmsy, by=c("stock_id", "year"))

summary(data)

A lot of the NAs for both RAM-bmsy and CMSY are due to unmatched SAUP-RAM stocks

Formatting and saving final data

B/Bmsy values for each catch record are generated (for the species where this is possible) and saved. A corresponding gapfilling dataset is also saved.

 data_gf <- data %>%
   mutate(bmsy_data_source = ifelse(!is.na(ram_bmsy), "RAM", NA)) %>%
   mutate(bmsy_data_source = ifelse(is.na(bmsy_data_source) & !is.na(cmsy_bbmsy), "CMSY", bmsy_data_source)) %>%
   mutate(bbmsy = ifelse(is.na(ram_bmsy), cmsy_bbmsy, ram_bmsy)) %>%
   dplyr::select(rgn_id, stock_id, taxon_key, year, bbmsy, bmsy_data_source, RAM_gapfilled=method, mean_catch) %>%
   filter(year >= 2001)

old <- read_csv(file.path("../v2021_old/output/fis_bbmsy_gf.csv"))

summary(old)
summary(data_gf)
length(unique(data_gf$rgn_id)) # 220

write.csv(data_gf, "output/fis_bbmsy_gf.csv", row.names=FALSE) 

data_gf <- read.csv("output/fis_bbmsy_gf.csv") 

bbmsy <- data_gf %>%
  dplyr::select(rgn_id, stock_id, year, bbmsy) %>%
  dplyr::filter(!is.na(bbmsy))
  
bbmsy_dups_fixed <- bbmsy %>%
  group_by(rgn_id, stock_id, year) %>%
  summarise(bbmsy = mean(bbmsy)) %>% ## account for the duplicated TaxonName/CommonName noted in "catch_data_prep.Rmd" (if there were any)...
  ungroup()
#old <- read_csv(file.path("../v2020/output/fis_bbmsy.csv"))

write.csv(bbmsy_dups_fixed, "output/fis_bbmsy.csv", row.names=FALSE) 

Data check

## check pitcairn
old <- read_csv(file.path("../v2021_old/output/fis_bbmsy_gf.csv"))

new <- read_csv(file.path("../v2021/output/fis_bbmsy_gf.csv"))

old_pit <- old %>%
  filter(rgn_id == 146)
mean(old_pit$bbmsy, na.rm = TRUE) # 1.221473

new_pit <- new %>%
  filter(rgn_id == 146)
mean(new_pit$bbmsy, na.rm = TRUE) # 1.140603


## split each into bbmsy ranges by 0.2 

new_pit_range <- new_pit %>%
  mutate(bbmsy_range = case_when(
    bbmsy < 0.2 ~ "[0,0.2)",
    bbmsy >=0.2 & bbmsy < 0.4 ~ "[0.2,0.4)",
    bbmsy >=0.4 & bbmsy < 0.6 ~ "[0.4,0.6)",
    bbmsy >=0.6 & bbmsy < 0.8 ~ "[0.6,0.8)",
    bbmsy >=0.8 & bbmsy <1 ~ "[0.8,1)",
    bbmsy >=1 & bbmsy <1.2 ~ "[1,1.2)", 
    bbmsy >=1.2 & bbmsy <1.4 ~ "[1.2,1.4)",
    bbmsy >=1.4 & bbmsy <1.6 ~ "[1.4,1.6)",
    bbmsy >=1.6 & bbmsy <1.8 ~ "[1.6,1.8)",
    bbmsy >=1.8 & bbmsy <2 ~ "[1.8,2)",
    bbmsy >=2 & bbmsy <2.2 ~ "[2,2.2)", 
    bbmsy >=2.2 ~ "[2.2, 3)"
  )) %>%
  group_by(year, bbmsy_range) %>%
  summarise(mean_catch = mean(mean_catch, na.rm = TRUE)) %>%
  ungroup()

old_pit_range <- old_pit %>%
  mutate(bbmsy_range = case_when(
    bbmsy < 0.2 ~ "[0,0.2)",
    bbmsy >=0.2 & bbmsy < 0.4 ~ "[0.2,0.4)",
    bbmsy >=0.4 & bbmsy < 0.6 ~ "[0.4,0.6)",
    bbmsy >=0.6 & bbmsy < 0.8 ~ "[0.6,0.8)",
    bbmsy >=0.8 & bbmsy <1 ~ "[0.8,1)",
    bbmsy >=1 & bbmsy <1.2 ~ "[1,1.2)", 
    bbmsy >=1.2 & bbmsy <1.4 ~ "[1.2,1.4)",
    bbmsy >=1.4 & bbmsy <1.6 ~ "[1.4,1.6)",
    bbmsy >=1.6 & bbmsy <1.8 ~ "[1.6,1.8)",
    bbmsy >=1.8 & bbmsy <2 ~ "[1.8,2)",
    bbmsy >=2 & bbmsy <2.2 ~ "[2,2.2)", 
    bbmsy >=2.2 ~ "[2.2, 3)"
  )) %>%
  group_by(year, bbmsy_range) %>%
  summarise(mean_catch = mean(mean_catch, na.rm = TRUE)) %>%
  ungroup()


## ok, this is making more sense. It seems that in the Watson data, the majority of catch in Pitcairn came from stocks that were doing well (>0.8, <1.2), whereas in the SAUP data, the majority of catch came from stocks that doing poorly (>1.2; and there is a lot more catch in SAUP). This is why we see such a dramatic change in scores for Pitcairn FIS (-85 points).