[REFERENCE RMD FILE: http://ohi-science.org/ohiprep_v2022/globalprep/np/v2021/STEP1b_np_seaweeds_prep.Rmd]
This analysis converts FAO mariculture data into one of the data layers used to calculate OHI 2022 global natural products (NP) scores. We will conduct the overall NP data prep on seaweeds, fish oil/fish meal (FOFM), and ornamentals, however, our final layer from this data prep will only consist of seaweeds.
New year of FAO mariculture data (1950-2020). Replaced deprecated
functions (replace_at()
, spread()
,
gather()
)
Reference:
http://www.fao.org/fishery/statistics/software/fishstatj/en#downlApp
Release date: March 2021 FAO Global Aquaculture Production Quantity
1950_2020 FAO metadata found here
Downloaded: April 25, 2022
Description: Quantity (tonnes) of mariculture for each country, species, year.
Time range: 1950-2020
Reference: https://www.seafoodwatch.org/globalassets/sfw/pdf/whats-new/seafood-watch-complete-recommendation-list.pdf Release date: March 4, 2022
Downloaded: June 22, 2022
Description: Monterey Bay Aquarium Seafood Watch aquaculture recommendations. Sustainability scored from 0-10. Rescaled to 0-1. There is only one value for seaweeds in the data… 0.67
::opts_chunk$set(eval=FALSE)
knitr## load libraries, set directories
library(ohicore) #devtools::install_github('ohi-science/ohicore@dev')
library(zoo)
library(here)
library(tidyverse)
library(plotly)
<- "2022"
version_year
## Load FAO-specific user-defined functions
source(here('workflow/R/fao_fxn.R')) # function for cleaning FAO files
source(here('workflow/R/common.R')) # directory locations
source(here(paste0('globalprep/np/v', version_year, '/R/np_fxn.R')))
source(here(paste0('globalprep/mar/v', version_year, '/mar_fxs.R'))) # functions specific to mariculture dealing with compound countries
Mariculture production in tonnes.
<- read.csv(paste0(dir_M, '/git-annex/globalprep/_raw_data/FAO_mariculture/d', version_year, '/FAO_GlobalAquacultureProduction_Quantity_1950_2020.csv'),
mar check.names=FALSE, stringsAsFactors=FALSE)
head(mar)
Filter freshwater mariculture, make long format, and clean FAO codes.
<- mar %>%
mar ::select(-`Unit (Name)`) %>%
dplyrrename(country = `Country (Name)`,
FAO_name = `ASFIS species (Name)`,
fao = `FAO major fishing area (Name)`,
environment = `Environment (Name)`) %>%
rename_with(~ gsub("\\[", "", .)) %>%
rename_with(~ gsub("\\]", "", .))
table(mar$environment)
## Include only marine and brackishwater environments
<- mar %>%
mar filter(environment %in% c("Brackishwater", "Marine"))
# Change to latest *DATA* year
<- "2020"
latest_year
## Convert to long format and clean FAO codes:
<- mar %>%
mar select(-Unit) %>%
pivot_longer(cols = "1950":latest_year, names_to = "year", values_to = "value") %>%
fao_clean_data_new()
Filter out seaweed species from ‘raw/species_list.csv’ (from ‘globalprep/mar/v2020/raw’), rename columns, and assign proportions to “include” column determined by research of non-human food vs human food seaweed species cultivated in mariculture. For NP, we are only including non-human food seaweed species. Since some species are used for both non-human food and human food purposes, a proportion is assigned based on research and best guess.
## Read in 'species_list.csv' (originally from 'globalprep/mar/v2021/raw'). Filter for 'Taxon_code = AL' (only seaweed species). Rename 'exclude' columns to 'include' since we're now including seaweed species that were excluded in the MAR dataprep (not primarily used as human food). Therefore, "0" means exclude completely (0%), and "1" means include completely (100%).
<- read.csv(here('globalprep/mar/v2021/raw/species_list.csv'), stringsAsFactors=FALSE) %>%
seaweed_sp filter(Taxon_code == 'AL') %>%
rename(include = exclude)
## Save in 'globalprep/np/v2022/raw' as 'species_list_np_seaweeds.csv'.
write.csv(seaweed_sp, "raw/species_list_np_seaweeds.csv", row.names = FALSE)
Update species name in the
raw/species_list_np_seaweeds_edited.csv
file with names in
the mar
dataset. Simplified the species list and cut the
“species” name columns because it wasn’t clear what this was trying to
accomplish and created potential error.
## Read in edited 'species_list_np_seaweeds_edited.csv'.
<- read.csv(here(paste0("globalprep/np/v", version_year, "/raw/species_list_np_seaweeds.csv")), stringsAsFactors = FALSE)
seaweeds
<- seaweeds %>%
seaweeds_sp select(FAO_name, include, alias, Taxon_code, family)
## REMOVE SPECIES not relevant to natural products goal (i.e., human food species)
<- seaweeds_sp %>%
seaweed_np left_join(mar, by="FAO_name") %>%
filter(include > 0)
<- unique(sort(seaweed_np$FAO_name))
spp_1 <- unique(seaweed_np$country)
rgn_1 # went from 45 species to 40 species (5 completely human food species removed) - v2021
## Change names using species alias or FAO species name (global changes)
$species <- ifelse(!is.na(seaweed_np$alias), seaweed_np$alias, seaweed_np$FAO_name)
seaweed_np## Sum production values for each group to account for duplicate rows after name change (remove NA values)
<- seaweed_np %>%
seaweed_np filter(!is.na(value)) %>%
group_by(country, fao, environment, species, year, Taxon_code, family, include) %>%
summarize(value = sum(value)) %>%
ungroup()
<- unique(sort(seaweed_np$species))
spp_2 <- unique(seaweed_np$country)
rgn_2
setdiff(spp_1, spp_2)
# went from 40 species to 36 species (lost "Bright green nori", "Kelp nei", "Giant kelps nei", "Mozuku") due to no production values for those species - v2022
setdiff(rgn_1, rgn_2)
# went from 53 to 52 countries - this is ok... because the one we "lost" was an NA - v2022
## Eliminate country-species data with zero production throughout the time-series (1950-recent)
<- seaweed_np %>%
seaweed_np group_by(country, species) %>%
mutate(total_value = sum(value)) %>%
filter(total_value > 0) %>%
select(-total_value) %>%
ungroup()
## Divide mariculture from countries that we report as separate regions (assume equal production in all regions)
# Netherlands Antilles: Conch restoration among Aruba, Bonaire, Curacao
# Channel Islands: Jersey and Guernsey
# Bonaire/S.Eustatius/Saba
# Yugoslavia SFR: no longer a country after 1992
<- seaweed_np %>%
seaweed_np mutate(country = ifelse(country=="Réunion", "Reunion", country)) %>% # this one is hard to get right; v2020: last year it was "R\xe9union", but this year it was "Réunion" - not present in v2021 data
mar_split() # function in mar_fxs.R
<- name_2_rgn(df_in = seaweed_np,
mar_rgn_all fld_name='country',
flds_unique=c('species', 'fao', 'environment', 'Taxon_code', 'year', 'include'))
## Sum values of regions with multiple subregions
<- mar_rgn_all %>%
mar_rgn group_by(fao, environment, species, year, Taxon_code, family, rgn_id, include) %>%
summarize(value = sum(value)) %>%
ungroup()
# went from 3150 to 3121 obs - v2021 - This is correct; Un. Sov. Soc. Rep. and Russian Federation were combined.
# went from 3233 to 3173 obs - v2022 - Same as 2021 and Zanzibar / Tanzania were combined
setdiff(mar_rgn$rgn_id, mar_rgn_all$rgn_id)
Take a look at the tidied data for a single year and region
data.frame(filter(mar_rgn, rgn_id==182) %>%
filter(year==2016) %>%
arrange(species))
Checked to make sure that there weren’t instances in which it made more sense to carry the previous year’s data forward as a method of gapfilling. This didn’t seem to be the case.
## Pivot wider mar_rgn to create a value for every year-species-region (if missing it will be given a NA)
<- mar_rgn %>%
mar_rgn_wide pivot_wider(names_from = year, values_from = value)
dim(mar_rgn_wide)
## Turn data frame back into long format
<- mar_rgn_wide %>%
mar_rgn_gf pivot_longer(cols = -(fao:include), names_to = "year", values_to = "value") %>%
arrange(rgn_id, species, year, Taxon_code, fao, environment)
## NA values are converted to zero
<- mar_rgn_gf %>%
mar_rgn_gf mutate(year = as.numeric(as.character(year))) %>%
mutate(value_w_0 = ifelse(is.na(value), 0, value)) %>%
group_by(fao, environment, species, Taxon_code, rgn_id) %>%
mutate(cum_value = cumsum(value_w_0)) %>%
ungroup() %>%
filter(cum_value > 0) %>% # eliminates years before mariculture began
mutate(gap_0_fill = ifelse(is.na(value), "NA_to_zero", "0")) %>% # record gapfill
mutate(value = ifelse(is.na(value), 0, value)) %>% # finally, convert all NAs in original column to 0
select(-cum_value, -value_w_0)
See how may NA values were converted to 0
table(mar_rgn_gf$gap_0_fill)
## 422 of these out of 2448+442 cases had NA converted to 0 - v2021 - seems reasonable given the added year of production
## 460 out of 2500+460 had NA converted to 0 - v2022
Remove species-region-environment time series with less than four years of seaweeed mariculture production > 0 tonnes (assume these are not established seaweed mariculture programs).
= mar_rgn_gf %>%
mar_rgn_gf group_by(rgn_id, species, fao, environment) %>%
mutate (not_0 = length(value[value>0])) %>% # length of vector of years greater than 0
filter (not_0>3) %>% # filter for groups that have at least four years of seaweed mariculture production
ungroup() %>%
select(rgn_id, species, fao, environment, year, include, value, Taxon_code, gap_0_fill)
Add a unique identifier per cultivated stock that describes each species, fao region, and environment grouping.
## Add a unique identifier per cultivated stock
= mar_rgn_gf %>%
identifier select(rgn_id, species, fao, environment) %>%
unique() %>%
mutate(species_code = 1:n())
# 93 unique identifiers - v2021
= left_join(mar_rgn_gf, identifier)
mar_rgn_gf <- mar_rgn_gf maric
Find the tonnes per each region/year per each seaweed type (multiplied by “include” proportions).
Used to estimate total seaweed mariculture yield per country.
## Multiply "include" column by "value" column to find tonnes per region/year for each seaweed species
<- maric %>%
maric mutate(tonnes = include*value)
## Save for weighting purposes later on
write.csv(maric,"int/np_seaweeds_tonnes_weighting.csv", row.names = FALSE)
These data describe the sustainability country/species combinations. In cases where these data were not available for a specific county/species, we just used the seafood watch seaweed sustainability score (6.72) (this was all of the seaweed species listed).
## Load in Seafood Watch sustainability scores data from mazu:
<- read.csv(file.path(dir_M, 'git-annex/globalprep/_raw_data/seafood_watch_mar_sustainability/d2022/SFW_Aquaculture_ratings_062222.csv'), check.names = FALSE, stringsAsFactors = FALSE, na.strings = c("NA", ""))
sw_sus head(sw_sus)
Rename columns to match with MAR data and fill in species column
## Rename columns
<- sw_sus %>%
sw_sus rename(report_title = 'ReportTitle',
published_date = 'PublishedDate',
sw_species = 'CommonNames',
genus = 'Genus',
spp = 'Species',
fao_species = 'FAOCommonName',
fda_species = 'FDACommonName',
water_body = 'BOWs',
country = 'Countries',
state_territory = 'CountrySubs',
method = 'Methods',
score = 'AssessmentScore',
escapes_score = 'C6Score',
# Column ↓ formerly called "Overall Recommendation" - chose to stick with the same naming convention - v2022
rec = 'AssessmentColor'
%>%
) ::select(report_title, published_date, sw_species, genus, spp, fao_species, fda_species, country, state_territory, water_body, method, escapes_score, score, rec)
dplyr
## Change species names using FAO species name (fao_species); if NA, use common name (sw_species)
$species <- ifelse(!is.na(sw_sus$fao_species), sw_sus$fao_species, sw_sus$sw_species) sw_sus
## These need to be re-added later (get cut when associated with region ids)
<- filter(sw_sus, is.na(country)|country == "Worldwide")
sw_sus_no_rgn # 118 entries with no country
## Change country names to match OHI region names
<- sw_sus %>%
sw_sus_multiple filter(str_detect(country, "\\|")) %>%
separate_rows(country, sep = " \\| ")
<- sw_sus %>%
sw_sus_df filter(!str_detect(country, "\\|")) %>%
rbind(sw_sus_multiple) %>%
filter(!is.na(country), country!="Worldwide")
## Convert country names to OHI region IDs. (ohicore/R/name_2_rgn.R)
<- name_2_rgn(df_in = sw_sus_df,
sw_sus_rgn fld_name='country',
flds_unique=c('fao_species', 'fda_species', 'sw_species', 'score'),
keep_fld_name = TRUE) # 'country' now shows the original Seafood Watch data name; 'rgn_name' is what we want to use from now on
## Re-add NA countries
<- bind_rows(sw_sus_rgn, sw_sus_no_rgn) %>%
sw_sus_rgn unique()
# 254 obs - v2021
# 273 obs - v2022 - new obs for Norway, China, Taiwan, Japan, etc.
Join the seaweed sustainability data with the mariculture data
<- read_csv("int/np_seaweeds_tonnes_weighting.csv")
maric
<- maric %>%
mar_sw_sus left_join(sw_sus_rgn, by = c("species", "rgn_id")) %>%
::select(rgn_id, year, species, Taxon_code, species_code, score, tonnes, gap_0_fill ) ## none of the specific species match dplyr
Since there are no sustainability scores for any of the species listed, we will gapfill with the seafood watch “Seaweed (Global)” score, which is 6.72.
NOTE FOR v2022: Scale this score the the max in the seafood watch data, like we did for mariculture.
<- mar_sw_sus %>%
mar_sw_sus mutate(Sust = round(6.72/10,2)) %>% ## since none of the species match, we will give the general worldwide seaweed score from seafood watch (6.72)
::select(-score) dplyr
Since some regions have multiple sustainability scores for the same species due to multiple aquaculture methods, but we don’t know what proportions of which methods are used, we take the average of the sustainability scores in these instances.
Average sustainability scores within regions with more than score (due to more than one aquaculture method):
<- mar_sw_sus %>%
mar_sw_sus ::group_by(rgn_id, species) %>%
dplyr::mutate(Sust_avg = mean(Sust, na.rm=TRUE)) %>%
dplyr::ungroup() dplyr
Get rid of duplicates for region/species/year:
<- mar_sw_sus %>%
mar_sw_sus ::distinct(rgn_id, species, year, .keep_all = TRUE) %>%
dplyr::select(-Sust, sust_coeff = Sust_avg, taxon_group = Taxon_code) %>%
dplyrmutate(taxa_code = paste(species, species_code, sep="_"))
Now look at a summary after appending all the Seafood Watch data
summary(mar_sw_sus)
# No NAs in Sust!
## save seaweed mariculture sustainability dataset
<- mar_sw_sus %>%
seaweed_sust ::select(rgn_id, taxa_code, year, sust_coeff)
dplyrwrite_csv(seaweed_sust, "output/np_seaweed_sust.csv")
## Save seaweed mariculture harvest tonnes data ("tonnes" column already incorporated include proportions)
<- mar_sw_sus %>%
seaweed_harvest_tonnes ::select(rgn_id, taxa_code, year, tonnes)
dplyranyDuplicated(seaweed_harvest_tonnes) # check for duplication
write.csv(seaweed_harvest_tonnes, 'output/np_seaweed_harvest_tonnes.csv', row.names=F)
## save a gapfill dataset for FAO tonnes data:
<- mar_sw_sus %>%
mar_FAO_gf rename("gapfill_fao" = "gap_0_fill") %>%
mutate(method = ifelse(gapfill_fao == 0, "none", gapfill_fao),
gapfilled = ifelse(gapfill_fao == 0, 0, 1)) %>%
::select(rgn_id, taxa_code, year, gapfilled, method)
dplyr
write.csv(mar_FAO_gf, "output/np_seaweed_harvest_tonnes_gf.csv", row.names = FALSE)
## save a gapfill dataset for sustainability dataset
<- mar_sw_sus %>%
mar_sust_gf mutate(method = "sfw_seaweed_score",
gapfilled = 1) %>%
::select(rgn_id, year, taxa_code, gapfilled, method)
dplyr
write.csv(mar_sust_gf, "output/np_seaweed_sust_gf.csv", row.names = FALSE)
Datacheck:
## compare the harvest
## in particular, saint lucia's score increased a lot in 2021
## Less drastic increase in 2022 ~22 tons
## Compare yield data for Saint Lucia
<- read.csv("../v2021/output/np_seaweed_harvest_tonnes.csv") %>%
np_old filter(rgn_id == 122, year == 2019) %>%
select(rgn_id, taxa_code, tonnes)
<- read.csv("output/np_seaweed_harvest_tonnes.csv") %>%
np_new filter(rgn_id == 122, year == 2020) %>%
select(rgn_id, taxa_code, tonnes)
<- np_old %>%
yield full_join(np_new, by = c("rgn_id","taxa_code")); View(yield) ## the production increased by ~22 tonnes
## Compare yield data for Vietnam - Vietnam score decreased
<- read.csv("../v2021/output/np_seaweed_harvest_tonnes.csv") %>%
np_old filter(rgn_id == 207, year == 2019) %>%
select(rgn_id, taxa_code, year, tonnes)
<- read.csv("output/np_seaweed_harvest_tonnes.csv") %>%
np_new filter(rgn_id == 207, year == 2020) %>%
select(rgn_id, taxa_code, year, tonnes)
<- np_old %>%
yield full_join(np_new, by = c("rgn_id","taxa_code")); View(yield)