[REFERENCE RMD FILE: http://ohi-science.org/ohiprep_v2021/globalprep/np/v2021/np_dataprep.html]
This analysis converts FAO commodities tonnes data for ornamentals, SAUP fisheries tonnes data for FOFM, and FAO mariculture tonnes data for seaweeds into a weighting scheme per each ohi region, based off of 5 year averages of tonnes and values (in usd) for each product. This weighting scheme will be applied to the scores when updating in ohi global.
Reference:
http://www.fao.org/fishery/statistics/software/fishstatj/en#downlApp App release date: July 2019 FAO raw commodities quantity 1976_2018 FAO raw commodities value 1976_2018 FAO metadata found here
Downloaded: April 29, 2021
Description: Quantity (tonnes) and value (USD) of raw commodities (Exports only) for each country, taxa, year. The FAO data is subset to include commodities in these categories: shells, corals, ornamental fish, fish oil, seaweed and plants, sponges (see: raw/commodities2products.csv for details).
Time range: 1976-2018
Reference: Pauly D., Zeller D., Palomares M.L.D. (Editors), 2020. Sea Around Us Concepts, Design and Data (seaaroundus.org).
Downloaded: August 24, 2021
Description: Tons per year and SAUP region with information on sector type, industry type, fishing entitity, reporting status and taxonomic information.
Time range: 1950 - 2018
Format: CSV
Additional Information: Methods
Reference: Froehlich, H.E., Jacobsen, N.S., Essington, T.E., Clavelle, T., and Halpern, B.S. (2018). Avoiding the ecological limits of forage fish for fed aquaculture. Nature Sustainability 1, 298.
Downloaded: July 7, 2020. Obtained from Melanie Frazier (NCEAS).
Description: List of FOFM species from Watson v3 data.
Native data resolution:
Format: CSV format
Reference: RAM Legacy Stock Assessment Database v4.491
Downloaded: 06/03/2021
Description: B/Bmsy value by stock and year (other data, which we do not use, are also available in the database)
Native data resolution: stock (fish stock, species and region specific)
Time range: 1950 - 2024
Format: CSV format
Additional Information: We use the finalized b/bmsy layer from OHI-global for this data prep. We do not actually read in the raw RAM data here.
Reference: FAO Mariculture data Release date: March 2020 FAO Global Aquaculture Production Quantity 1950_2018 FAO metadata found here
Downloaded: April 29, 2021
Description: Quantity (tonnes) of mariculture for each country, species, year.
Time range: 1950-2019 ***
::opts_chunk$set(eval=FALSE)
knitr
library(here)
library(tidyverse)
source(here('workflow/R/common.R'))
## This file makes it easier to process data for the OHI global assessment
## by creating the following objects:
##
## * dir_M = identifies correct file path to Mazu (internal server) based on your operating system
## * mollCRS = the crs code for the mollweide coordinate reference system we use in the global assessment
## * regions_shape() = function to load global shapefile for land/eez/high seas/antarctica regions
## * ohi_rasters() = function to load two rasters: global eez regions and ocean region
## * region_data() = function to load 2 dataframes describing global regions
## * rgn_syns() = function to load dataframe of region synonyms (used to convert country names to OHI regions)
## * low_pop() = function to load dataframe of regions with low and no human population
## * UNgeorgn = function to load dataframe of UN geopolitical designations used to gapfill missing data
source(here('globalprep/np/v2021/R/np_fxn.R'))
Import prepped FAO commodities data set and find 5 year aaverage $USD / Tonne average value per product per region. Gapfill these based on the UN geopolitical region/global values.
## read in the prepped FAO commodities data set
<- read_csv("int/np_harvest_tonnes_usd.csv") %>%
harvest_tonnes_usd filter(rgn_id != 213)
= max(harvest_tonnes_usd$year)
max_year = max_year - 4
min_year = c(min_year:max_year)
years = data.frame(year = min_year:max_year) ## so that we can get 5 year average
years_df
region_data()
<- rgns_eez %>%
rgns_eez ::select(rgn_id) %>%
dplyrfilter(rgn_id != 213)
## make a data frame with every region and every product
<- data.frame(product = c("seaweeds", "ornamentals", "fish_oil"))
products
<- full_join(rgns_eez, products, by = character()) %>%
region_product full_join(years_df, by = character())
## calculate $/tonne for each ohi region and product and gapfill based on UN geopolitical region
<- harvest_tonnes_usd %>%
harvest_tonnes_usd_geo_rgn full_join(region_product, by = c("rgn_id", "product", "year")) %>%
add_georegion_id() %>%
::filter(year %in% years) %>%
dplyrarrange(rgn_id)
<- harvest_tonnes_usd_geo_rgn %>%
geo_rgn_values_df ::group_by(georgn_id, year, product) %>%
dplyr::mutate(values = usd/tonnes) %>%
dplyrmutate(values = ifelse(values == "Inf", NA, values)) %>% ## assign Inf as NA
summarise(georgn_values = mean(values, na.rm = TRUE)) %>%
ungroup() ## now we have a 5 year average value for geo region
<- harvest_tonnes_usd_geo_rgn %>%
global_values_df ::group_by(product, year) %>%
dplyrmutate(values = usd/tonnes) %>%
mutate(values = ifelse(values == "Inf", NA, values)) %>% ## assign Inf as NA
summarise(global_values = mean(values, na.rm = TRUE)) ## now we have a 5 year average global value
<- harvest_tonnes_usd_geo_rgn %>%
harvest_tonnes_usd_values_gf left_join(geo_rgn_values_df, by = c("georgn_id", "product", "year")) %>%
left_join(global_values_df, by = c("product", "year")) %>%
mutate(values = usd/tonnes) %>%
mutate(values = ifelse(values == "Inf", NA,
ifelse(values == "NaN", NA, values))) %>% ## assign Inf and NaNs as NA
mutate(values_final = ifelse(is.na(values) & georgn_values != "NaN", georgn_values,
ifelse(is.na(values) & georgn_values == "NaN", global_values, values))) %>%## gapfill so that when there is no regional value, take the geo regional value, and if there is no geo regional value, take the global value.
mutate(values_gf_description = ifelse(is.na(values) & georgn_values != "NaN", "geo_region",
ifelse(is.na(values) & georgn_values == "NaN", "global", "none"))) %>%
mutate(values_gf = ifelse(is.na(values) & georgn_values != "NaN", 1,
ifelse(is.na(values) & georgn_values == "NaN", 1, 0))) ## now we have a dataset with gapfilled average values and calculated yearly values... now we will save this, and then calculate the 5 year averages
## save this data frame to int
write.csv(harvest_tonnes_usd_values_gf, "int/harvest_tonnes_usd_weighting_gf.csv", row.names = FALSE)
## now calculate 5 year averages and assign year = 2020
<- harvest_tonnes_usd_values_gf %>%
weighting_usd_values_final ::select(rgn_id, product, year, values_final) %>%
dplyrgroup_by(rgn_id, product) %>%
summarise(value_per_tonne = mean(values_final)) %>%
ungroup()
#save this df
write.csv(weighting_usd_values_final, "int/harvest_weighting_values.csv", row.names = FALSE)
Now prep the tonnes data from each product to be per each region for the 6 year average (2013-2018).
## Read in FOFM tonnes data
<- read_csv("int/mean_catch_FOFM.csv") %>%
fofm_tonnes ::filter(year %in% years) %>%
dplyrmutate(product = "fish_oil") %>%
group_by(rgn_id, year, product) %>%
summarise(tonnes = sum(catch)*0.3) %>% ## Note: multiply by 0.3 to account for water loss when converting fish to fish oil.. about 30% of fish are water and 70% are fish oil
ungroup() %>%
group_by(rgn_id, product) %>%
summarise(tonnes = mean(tonnes)) %>% ##calculate 5 year mean of catch
ungroup() %>%
full_join(rgns_eez) %>%
::select(1:3) %>%
dplyrmutate(product = "fish_oil") %>%
mutate(tonnes = ifelse(is.na(tonnes), 0, tonnes))
## Read in ornamentals tonnes data
<- read_csv("int/np_harvest_tonnes_usd.csv") %>%
np_tonnes filter(rgn_id != 213)
<- region_product %>%
orn_fill_df filter(product == "ornamentals")
<- np_tonnes %>%
orn_tonnes ::filter(year %in% years, product == "ornamentals") %>%
dplyrfull_join(orn_fill_df, by = c("rgn_id", "year", "product")) %>% ## full join to fill in the rest of the regions with 0
mutate(tonnes = ifelse(is.na(tonnes), 0, tonnes),
usd = ifelse(is.na(usd), 0, usd)) %>% ## gapfill the NAs to be 0
select(rgn_id, year, product, tonnes) %>%
group_by(rgn_id, product) %>%
summarise(tonnes = mean(tonnes)) %>% ## calculate 5 year mean of catch
ungroup()
## Read in seaweed tonnes data
<- region_product %>%
sw_fill_df filter(product == "seaweeds")
<- read_csv("int/np_seaweeds_tonnes_weighting.csv") %>%
sw_tonnes mutate(product = "seaweeds") %>%
group_by(rgn_id, year, product) %>%
summarise(tonnes = sum(tonnes, na.rm = TRUE)) %>%
::filter(year %in% years) %>%
dplyrfull_join(sw_fill_df, by = c("rgn_id", "year", "product")) %>%
mutate(tonnes = ifelse(is.na(tonnes), 0, tonnes)) %>% ## gapfill the NAs to be 0
::select(rgn_id, year, product, tonnes) %>%
dplyrungroup() %>%
group_by(rgn_id, product) %>%
summarise(tonnes = mean(tonnes)) %>% ## calculate 5 year average
ungroup()
Now we will calculate the weights per each product. To do this we need to multiply our average $ value for each product * tonnes of each product, and then divide by the total per each region. We will also assign year = 2021 so that this can be read into OHI-global (2021 corresponds to the year that these weights were calculated).
<- read_csv("int/harvest_weighting_values.csv") %>%
harvest_weighting_values ::select(rgn_id, product, value_per_tonne)
dplyr
<- orn_tonnes %>%
prod_weights bind_rows(sw_tonnes) %>%
bind_rows(fofm_tonnes) %>%
left_join(harvest_weighting_values, by = c("rgn_id", "product")) %>%
arrange(rgn_id) %>%
mutate(usd_product = tonnes*value_per_tonne) %>%
group_by(rgn_id) %>%
mutate(total_usd = sum(usd_product)) %>%
ungroup() %>%
mutate(weight = usd_product/total_usd) %>%
::select(-3, -4, -5, -6) %>%
dplyrmutate(year = 2021) %>%
::select(rgn_id, year, product, weight) %>%
dplyrfilter(rgn_id != 213)
write.csv(prod_weights, "output/np_product_weights.csv", row.names = FALSE)
##datacheck
<- read_csv("../v2020/output/np_product_weights.csv")
old_prod_weights
<- read_csv("output/np_product_weights.csv")
prod_weights
<- prod_weights %>%
check rename("new_weight" = "weight") %>%
left_join(old_prod_weights, by = c("rgn_id", "product")) %>%
mutate(diff = new_weight - weight) %>%
left_join(rgns_eez)
plot(check$new_weight, check$weight)
abline(0,1, col="red")
<- check %>%
check_sw filter(product == "seaweeds")
plot(check_sw$new_weight, check_sw$weight)
abline(0,1, col="red")
<- check %>%
check_orn filter(product == "ornamentals") %>%
mutate(difference = new_weight - weight)
plot(check_orn$new_weight, check_orn$weight)
abline(0,1, col="red")
## check saint martin, Niue, Bonaire, Sint Eustasius - should decrease in production
## check djibouti - should increase in production
<- harvest_tonnes_usd %>%
test filter(rgn_id == 46)
<- read_csv("../v2020/int/np_harvest_tonnes_usd.csv") %>%
harvest_tonnes_usd_old filter(rgn_id != 213)
<- harvest_tonnes_usd_old %>%
test2 filter(rgn_id == 46)
<- read_csv("output/np_ornamentals_harvest_tonnes_gf.csv")
gf_orn
<- check %>%
check_fofm filter(product == "fish_oil")
plot(check_fofm$new_weight, check_fofm$weight)
abline(0,1, col="red") ## these change because of new SAUP fisheries data...