This analysis converts FAO capture production data into the OHI 2021 targeted harvest pressure data.
2022 - One more year of data
http://www.fao.org/fishery/statistics/software/fishstatj/en#downlApp
Release date: March 2021
FAO Global Capture Production Quantity 1950_2019
Information: http://www.fao.org/fishery/statistics/global-capture-production/en
Reference: United Nations, 2021. FAO Fisheries & Aquaculture - Fishery Statistical Collections - Global Capture Production [WWW Document]. URL http://www.fao.org/fishery/statistics/global-capture-production/en (accessed 4.29.21).
Downloaded: April 25, 2022
Description: Quantity (tonnes) of fisheries capture for each county, species, year.
Time range: 1950-2020
# load libraries, set directories
library(ohicore) #devtools::install_github('ohi-science/ohicore@dev')
library(tidyverse)
library(plotly)
library(here)
library(janitor)
### Load FAO-specific user-defined functions
source(here('workflow/R/fao_fxn.R')) # function for cleaning FAO files (not combined into common.R like most other functions have been at this point)
source(here('workflow/R/common.R')) # directory locations
<- 2022
version_year <- version_year - 2 latest_data_yr
This includes the FAO capture production data and a list of the “target” species.
## FAO capture production data - all columns being parsed as characters and producing error in one column, but not sure which? (read.csv might help avoid this error?)
<- read_csv(
fis_fao_raw file.path(dir_M, 'git-annex/globalprep/_raw_data/FAO_capture',
paste0('d', version_year),
paste0('Global_capture_production_Quantity_1950-', latest_data_yr,'.csv'))
# , na = "..."
)
# List of species included as cetaceans or marine turtles (this probably won't change at all)
<- read_csv(here('globalprep', 'prs_targetedharvest', paste0('v', version_year), 'raw', 'species2group.csv')) %>%
sp2grp ::filter(incl_excl == 'include') %>%
dplyr::select(target, species); head(sp2grp) dplyr
# Rename columns and remove unit column
<- fis_fao_raw %>%
fao_clean ::rename(country = "Country (Name)",
dplyrspecies = "ASFIS species (Name)",
area = "FAO major fishing area (Name)") %>%
::select(-"Unit (Name)") %>%
dplyr::rename_with(~ base::gsub("\\[", "", .)) %>%
dplyr::rename_with(~ base::gsub("\\]", "", .))
dplyr# rename_at(vars(matches("\\[")), ~ str_remove(., "\\[")) %>%
# rename_at(vars(matches("\\]")), ~ str_remove(., "\\]"))
# Gather by year and value to expand and make each line a single observation for country, species and year (tidy data!)
# Pivot_longer by year and value to expand and make each line a single observation for country, species and year (tidy data!)
<- fao_clean %>%
fao_clean ::pivot_longer(cols = -(1:3), names_to = 'year', values_to = 'value', values_drop_na = T) %>%
tidyr# tidyr::gather("year", "value", -(1:3)) %>%
::mutate(year = gsub("X", "", year)) %>%
dplyrfao_clean_data_new()
<- fao_clean %>%
fao_clean ::mutate(
dplyrspecies = as.character(species),
species = stringr::str_replace_all(
string = species,
pattern = "Henslow.*s swimming crab",
replacement = "Henslow's swimming crab")
)# dplyr::mutate(species = ifelse(stringr::str_detect(species, "Henslow.*s swimming crab"), "Henslow's swimming crab", species))
This analysis only includes target species. The warning messages need to be checked and, if necessary, changes should be made to the raw/species2group.csv
# check for discrepancies in species list
<- sort(as.character(unique(fao_clean$species))) # species groups in FAO data
spgroups <- c('turtle', 'whale', 'dolphin', 'porpoise') # seals and sea lions removed from vector (pinnipeds no longer included)
groups
# Going through FAO data species and seeing if they're in our master list of species
## Looking to see if we need to add species that have changed name
for (group in groups) {# group='dolphin'
<- spgroups[grep(group, spgroups)]
possibles <- setdiff(possibles, sp2grp$species)
d_missing_l if (length(d_missing_l) > 0){
cat(sprintf("\nMISSING in the lookup the following species in target='%s'.\n %s\n",
paste(d_missing_l, collapse='\n ')))
group,
}
}
# check for species in lookup not found in data
<- setdiff(sp2grp$species, spgroups)
l_missing_d if (length(l_missing_d) > 0){
cat(sprintf('\nMISSING: These species in the lookup are not found in the FAO data \n'))
print(l_missing_d)
}
#### v2022
# MISSING in the lookup the following species in target='turtle'.
# Chinese softshell turtle - not a marine turtle
# River and lake turtles nei - not a marine turtle
#
# MISSING in the lookup the following species in target='whale'.
# Creek whaler - shark, not a whale
# Velvet whalefish - fish, not a whale
#
# MISSING in the lookup the following species in target='dolphin'.
# Common dolphinfish - fish not a cetacean
# Pompano dolphinfish - fish not a cetacean
## filter data to include only target species ----
<- fao_clean %>%
target_spp ::filter(species %in% sp2grp$species) # this goes from 2384 spp in FAO list to just 72
dplyr
unique(target_spp$area) # confirm these are all marine regions
unique(fao_clean$species) # 2384 species
# pivot wider to expand years
<- target_spp %>%
wide ::pivot_wider(names_from = year, values_from = value) %>%
tidyr::select(-`NA`) %>%
dplyr# tidyr::spread(year, value) %>%
::left_join(sp2grp, by='species'); head(wide)
dplyr
# pivot longer long by target
<- wide %>%
long ::select(-area) %>%
dplyr::pivot_longer(cols = c(-country, -species, -target),
tidyrnames_to = 'year',
values_to = 'value',
values_drop_na = T) %>%
# tidyr::gather(year, value, -country, -species, -target, na.rm=T) %>%
::mutate(year = as.integer(as.character(year))) %>%
dplyr::arrange(country, target, year); head(long)
dplyr
# explore Japan[210] as an example
<- long %>%
japan ::group_by(country, target, year) %>%
dplyr::summarize(value = sum(value)) %>%
dplyr::filter(country == 'Japan', target == 'cetacean', year >= 2000)
dplyr
# summarize totals per region per year - number of individual animals from each spp group?
<- long %>%
sum ::group_by(country, year) %>%
dplyr::summarize(value = sum(value, na.rm=TRUE)) %>%
dplyr::filter(value != 0) %>%
dplyr::ungroup(); head(sum) dplyr
<- sum %>%
sum ::mutate(country = as.character(country)) %>%
dplyr::mutate(country = ifelse(stringr::str_detect(country, "C.*te d'Ivoire"), "Ivory Coast", country))
dplyr
### Function to convert to OHI region ID
<- name_2_rgn(df_in = sum,
m_sum_rgn fld_name='country',
flds_unique=c('year'))
# Check out duplicates based on error message from previous step
::filter(m_sum_rgn, country %in% c("Guadeloupe", "Martinique"))
dplyr# this is ok, we report these two together, so this will be fixed with the summarize in the next step
# They will be summed:
<- m_sum_rgn %>%
m_sum_rgn ::group_by(rgn_id, rgn_name, year) %>%
dplyr::summarize(value = sum(value)) %>%
dplyr::ungroup() dplyr
Data is rescaled by dividing by the 95th quantile of values across all regions from 2011 to 2020 (most recent year of FAO data).
<- m_sum_rgn %>%
target_harvest ::mutate(
dplyrquant_95 = quantile(value[year %in% 2011:latest_data_yr], 0.95, na.rm = TRUE),
score = value / quant_95,
score = ifelse(score > 1, 1, score)) %>%
::select(rgn_id, year, pressure_score = score) %>%
dplyr::arrange(rgn_id, year); head(target_harvest); summary(target_harvest)
dplyr
# v2021 quant_95 = 3409.4
# v2022 quant_95 = 3450.05
# any regions that did not have a catch should have score = 0
<- rgn_master %>%
rgns ::filter(rgn_typ == "eez") %>%
dplyr::select(rgn_id = rgn_id_2013) %>%
dplyr::filter(rgn_id < 255) %>%
dplyr::unique() %>%
base::arrange(rgn_id)
dplyr
# This is just a list of rgn IDS - do we want to update it to a rgn list more recent than 2013?
# Add year; for v2022, min year is 1950, and max year is 2020
<- expand.grid(rgn_id = rgns$rgn_id, year = min(target_harvest$year):max(target_harvest$year))
rgns
# Change NAs in pressure_score column to 0s
<- rgns %>%
target_harvest ::left_join(target_harvest) %>%
dplyr::mutate(pressure_score = ifelse(is.na(pressure_score), 0, pressure_score)) %>%
dplyr::arrange(rgn_id); head(target_harvest); summary(target_harvest)
dplyr
# Write target_harvest to "fao_targeted.csv" in output folder
write_csv(
target_harvest, here('globalprep', 'prs_targetedharvest', paste0('v', version_year), 'output', 'fao_targeted.csv')
)
# Create gapfill dataframe
<- target_harvest %>%
target_harvest_gf ::mutate(gapfill = 0) %>%
dplyr::select(rgn_id, year, gapfill)
dplyr# all zeroes for gapfill column; nothing being gapfilled but need to have a record
# Write target_harvest_gf to "fao_targeted_gf.csv" in output folder
write_csv(
target_harvest_gf,here('globalprep', 'prs_targetedharvest', paste0('v', version_year), 'output', 'fao_targeted_gf.csv')
)
The data from last year and this year should be the same unless there were changes to underlying FAO data or the master species list.
In this case, all of the regions looked very similar.
# pull just 2019 data from target_harvest df, since its the most recent year in common
<- 2019
common_year
<- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year), "output", "fao_targeted.csv")) %>%
new ::filter(year == common_year)
dplyr
<- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year - 1), "output", "fao_targeted.csv")) %>%
old ::filter(year == common_year) %>%
dplyr::select(rgn_id, year, pressure_score_old = pressure_score) %>%
dplyr::left_join(new, by = c("rgn_id", "year"))
dplyr
# Compare pressure_score between last year and this year's assessments
<- ggplot(data = old, aes(x = pressure_score_old, y = pressure_score, label = rgn_id)) +
compare_plot geom_point() +
geom_abline(color = "red")
plot(compare_plot)
ggplotly(compare_plot)
### v2022: Explore outliers
### explore United States [163]
# out_country <- "United States of America"
### explore Indonesia [216]
<- "Indonesia"
out_country
<- long %>%
outlier_country ::group_by(country, target, year) %>%
dplyr::summarize(value = sum(value)) %>%
dplyr::filter(country == out_country, year >= 2000)
dplyr
region_data() ## read in regions
<- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year), "output", "fao_targeted.csv")) %>%
new ::filter(year == 2020) %>%
dplyr::select(-year)
dplyr
<- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year - 1), "output", "fao_targeted.csv")) %>%
old ::filter(year == common_year) %>%
dplyr::select(-year) %>%
dplyr::select(rgn_id, pressure_score_2019 = pressure_score) %>%
dplyr::left_join(new, by = c("rgn_id")) %>%
dplyr::rename(pressure_score_2020 = pressure_score) %>%
dplyrleft_join(rgns_eez)
# Compare pressure_score between last year and this year's assessments
<- ggplot(data = old, aes(x = pressure_score_2019, y = pressure_score_2020, label = rgn_name)) +
compare_plot geom_point() +
geom_abline(color = "red")
plot(compare_plot)
ggplotly(compare_plot)