[REFERENCE RMD FILE: http://ohi-science.org/ohiprep_v2021/globalprep/np/v2021/STEP1a_np_ornamentals_prep.html]

Summary

This analysis converts FAO commodities data into data layers used to calculate OHI 2024 global natural products scores. We will conduct this data prep on the commodities seaweed, fish oil, and ornamentals, so that we can produce $ values for weighting later on, however, our final layer (saved to the output folder) from this data prep will only consist of ornamental fish. This will also calculate a sustainability layer based off of risk and exposure for ornamental fishing.

Updates from previous assessment

v2024

New year of FAO data (2022)
Updated links to datasets
Updated file paths to be more reproducible, enable cross-platform capability (not dependent on direction of directory separator)
Updated data cleaning to follow tidyverse style, use of janitor::clean_names()
Updated read.csv and write.csv to read_csv and write_csv
Updated fao_fxn.R to use str_detect("...") instead of value == "..." to properly replace FAO “missing value” codes with NAs.
Added extra 0 --> NA correction for specific conditions where this is appropriate (before gapfilling). See section: Add FAO-zero-correction

v2023

New year of FAO data (2021). Replaced deprecated functions (replace_at(), spread(), gather())

Data Source

Reference:
https://www.fao.org/fishery/en/statistics/software/fishstatj App release date: March 2024 FAO raw commodities quantity 1976_2021 FAO raw commodities value 1976_2022 FAO metadata

Global aquatic trade - All partners aggregated - Quantities and Values - 1976-2022 (Release date: July 2024)

Downloaded: 2024-08-07

Description: Quantity (tonnes) and value (USD) of raw commodities (Exports only) for each country, taxa, year. The FAO data is subset to include commodities in these categories: ornamental fish, fish oil, seaweed and plants (see: raw/commodities2products_weighting.csv for details).

Time range: 1976-2022

Files can be found on Mazu: home/shares/ohi/git-annex/globalprep/_raw_data/FAO_commodities/d2024 accompanied by README.md with detailed download instructions.

Methods

knitr::opts_chunk$set(eval = FALSE)

# ======= Load packages ============
if (!require(librarian)) {
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(
  ohicore, #devtools::install_github('ohi-science/ohicore@dev') #if relevant: restart session after reinstalling
  dplyr,
  stringr,
  tidyr,
  here,
  tidyverse,
  zoo,
  ggplot2,
  plotly,
  tictoc,
  RColorBrewer
)

# ======= Set directories ===========
# Update scenario year, set up programmatic scenario year updates
scen_year_number <- 2024
scen_year <- as.character(scen_year_number)
prev_scen_year <- as.character(scen_year_number - 1)
data_dir_year <- paste0("d", scen_year)
prev_data_dir_year <- paste0("d", prev_scen_year)
v_scen_year <- paste0("v", scen_year)

current_np_dir <- here::here("globalprep", "np", v_scen_year)

# Load FAO-specific user-defined functions
source(here::here("workflow", "R", "fao_fxn.R")) # function for cleaning FAO files
source(here::here("workflow", "R", "common.R")) # directory locations

## This file makes it easier to process data for the OHI global assessment
##  by creating the following objects:
## 
##  * dir_M = identifies correct file path to Mazu (internal server) based on your operating system
##  * mollCRS = the crs code for the mollweide coordinate reference system we use in the global assessment
##  * regions_shape() = function to load global shapefile for land/eez/high seas/antarctica regions
##  * ohi_rasters() = function to load two rasters: global eez regions and ocean region
##  * region_data() = function to load 2 dataframes describing global regions 
##  * rgn_syns() = function to load dataframe of region synonyms (used to convert country names to OHI regions)
##  * low_pop() = function to load dataframe of regions with low and no human population
##  * UNgeorgn = function to load dataframe of UN geopolitical designations used to gapfill missing data

source(here::here(current_np_dir, "R", "np_fxn.R")) # function for handling FAO commodity data specific to NP

Import Raw Data: FAO Commodities

Simultaneously read and process FAO commodities value and quantity data.

## NOTE: This can be run as a loop, but the "value" and "quant" datasets need to be run individually to make sure
##  there are no problems (after this check, they can be looped for efficiency)


## list files included in d2020 folder (value and quant datasets)
#dir_fao_data <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', scen_year))
dir_fao_data <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", data_dir_year)

files <- list.files(dir_fao_data, pattern = glob2rx('*.csv'), full.names = TRUE)

## To compare to old data:
#dir_fao_data_old <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', prev_scen_year))
dir_fao_data_old <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", prev_data_dir_year)

files_old <- list.files(dir_fao_data_old, pattern = glob2rx('*.csv'), full.names = TRUE)

# ===== loop ================
for (f in files) {
  #f <- files[2] # un-comment and update to test individual files # 1 = quant, 2 = usd
  cat(sprintf('\n\n\n====\nfile: %s\n', basename(f)))
  
  #d <- read.csv(f, check.names = FALSE, strip.white = TRUE, stringsAsFactors = FALSE) # stringsAsFactors=T
  # checks names syntactically, strips leading and trailing whitespace, prevents conversion of characters to factors 
  
  d <- readr::read_csv(f) # makes this a tibble, not a dataframe (check with is_tibble(d))
  # ignore the warning here -- it's just letting you know about an empty final row (happened quietly in previous read.csv code)
  
  # Specifies that units are 'tonnes' if we are reading in the Commodities Quantity data csv, and 'usd' if we are reading in the Commodities Value data csv
  units <- c("tonnes", "usd")[str_detect(f, c("quant", "value"))] # detect unit name using lowercase American English

  # ---- Preliminary cleaning & tidying ----
  ## gather into long format and clean up FAO-specific data foibles
  ## warning: attributes are not identical across measure variables; they will be dropped: this is fine (didn't get this warning in v2024 once code was updated)
  m <- d %>% 
    janitor::clean_names() %>% 
    dplyr::select(-c(unit_name)) %>% # "Tonnes – net product weight" == TPW
    rename(country = reporting_country_name,
           commodity = commodity_name,
           trade = trade_flow_name) %>%
    rename_with(~ gsub("x", "", .)) %>% # tidy up year column names (clean_names() added "x"s)
    pivot_longer(cols = -c(country, commodity, trade, unit),
                   names_to = "year", values_to = "value") 
  
  # ---- Include only the "Exports" data ----
  m <- m %>%
    filter(trade == "Exports")

  # ---- Run fao data cleaning function ----
  # cleans up flags, swaps out FAO-specific codes for analysis
  m <- m %>%
    fao_clean_data_new() %>%  # swaps out FAO-specific codes. NOTE: optional 
    # parameter 'sub_N' can be passed to control how an 'N' code is interpreted.
    dplyr::select(-c(trade, unit)) %>% # drop 'trade' and 'unit' columns
    arrange(country, commodity, is.na(value), year)
  # warning: NAs introduced by coercion

  
  # ---- Products join ----
  ## attach product categories from com2prod, and filter out all entries that 
  ## do not match a product category.
  ## Note: commodity_lookup is user-defined function to compare commodities
  ##  in data vs commodities in lookup table
  
  # Load lookup for converting commodities to products
  #com2prod <- read.csv(here::here(current_np_dir, "raw", "commodities2products_weighting.csv"), na.strings = '')
  com2prod <- readr::read_csv(here::here(current_np_dir, "raw", "commodities2products_weighting.csv"), na = '')
  
  ## version used in 2019:
  ##    read.csv(here('globalprep/np/v2019/raw/commodities2products.csv'), na.strings='')
  ## version used in 2018:
  ##    com2prod <- read.csv('raw/commodities2products.csv', na.strings='')
  ## version used in 2015: use when testing....
  ##    com2prod <- read.csv('../v2014_test/commodities2products.csv', na.strings='')
    
  ## Check the current commodity-to-product lookup table.  If necessary, make changes to     "raw/commodities2products_weighting.csv"
  ## v2021: Missing a couple in each category. 
  # Make sure to examine the fish oil category thoroughly though. Some of the instances that are caught here are NOT fish oil. For example: "Whole lobsters Homarus spp,  in shell or not, dried, salted/in brine, smoked,  cooked by steaming/boiling in water" is including because "boiling" has "oil" in it. Add the appropriate missing commodities to commodities2products_weighting.csv
  # Don't worry about any that show up under corals, shells, or sponges, since we don't include them in the assessment anyways. We are just keeping them in for now, because we use them as an example in the methods document (see below).
  np_commodity_lookup(m, com2prod) 
    
  ## inner_join will attach product names to matching commodities according to
  ##    lookup table 'com2prod', and eliminate all commodities that do not appear in the lookup table.
  m <- m %>%
      inner_join(com2prod, by = 'commodity')
  # many-to-many relationship is expected
    
    
  ## Special case: user-defined function deals with 
  ##   breaking up Antilles into separate reported rgns
  m <- ohicore::split_regions(m)
    
  ## Some changes to region names that aren't working in name_2_rgn()
  m <- m  %>%
    filter(country != "Azerbaijan") 

  m_rgn <- ohicore::name_2_rgn(df_in = m,
                      fld_name = 'country', 
                      flds_unique = c('commodity', 'product', 'year'))
  
  # check duplicates
  # m_duplicates <- m_rgn[duplicated(m_rgn[, c("rgn_name", "year", "commodity", "product")]),]
  # unique(m_duplicates$country)
  # unique(m_duplicates$rgn_name)

  
  # v2024 removed: 
  # Ascension, Saint Helena and Tristan da Cunha -- new syntax from previous years, will update ohicore::split_regions() to account for this change
  # Yugoslavia SFR  -- this is fine, since it broke up into the six modern countries: Croatia, Montenegro, Serbia, Slovenia, Bosnia and Herzegovina, and Macedonia in 1990-1992, which means the relevant data is still captured.
  
  # v2021 duplicates:  [1] "China" "China, Hong Kong SAR" "China, Macao SAR"  "Guadeloupe"           
  # [5] "Martinique" "Montenegro"  "Russian Federation" "Serbia and Montenegro"
  # [9] "Sudan" "Sudan (former)" "Un. Sov. Soc. Rep."   - these are all fixed in the group by and summarise below  
  
  # v2024 duplicates for quantity and value: 
  # [1] "China, Hong Kong SAR"  "China, Macao SAR"      "Martinique"    
  # [4] "Serbia and Montenegro" "Sudan (former)"        "Un. Sov. Soc. Rep." 
  
  ## combine composite regions
  ## When summarizing the dataset, this function provides a modified way to sum the value column while maintaining NA values when both variables are NA (rather than turning to zero values). The function will sum non-NA values normally.
  sum_function <- function(x) {
    if (sum(is.na(x)) == length(x)) 
      return(NA)
    return(sum(x, na.rm = T))}
  
  m_rgn <- m_rgn %>%
    group_by(rgn_id, rgn_name, commodity, product, year) %>%
    summarize(value = sum_function(value)) %>%
    ungroup()

  ## units: rename value field to units based on filename
  names(m_rgn)[names(m_rgn) == 'value'] <- units  
  
  ## output to .csv - should create two csvs (tonnes.csv and usd.csv)
  harvest_out <- here::here(current_np_dir, "int", sprintf("%s.csv", units))
    #(paste0('globalprep/np/v', scen_year, '/int/%s.csv')), units)
  readr::write_csv(m_rgn, harvest_out, na = '')
}

Data Wrangle

Combining the quantity and value data and a bit of cleaning to remove data prior to first reporting year for each commodity and region.

# Read in quant dataset from intermediate folder
h_tonnes <- read_csv(here(current_np_dir, "int", "tonnes.csv"))

# Read in value dataset from intermediate folder
h_usd <- read_csv(here(current_np_dir, "int", "usd.csv"))

# concatenate h_tonnes and h_usd data
## h includes rgn_name, rgn_id, commodity, product, year, tonnes, usd.
h <- h_usd %>%
    full_join(h_tonnes, by = c('rgn_name', 'rgn_id', 'commodity', 'product', 'year')) %>%
    mutate(commodity = as.character(commodity)) %>%
    arrange(rgn_id, product, commodity, year) 

# clip out years prior to first reporting year, for each commodity per region
h <- h %>% np_harvest_preclip()


# save a file to use in our methods document 
readr::write_csv(h, here(current_np_dir, "int", "h_methods_sum.csv"))

# test <- h %>%
#   group_by(product) %>%
#   summarise(sum_tonnes = sum(tonnes, na.rm = TRUE),
#             sum_usd = sum(usd, na.rm = TRUE)) ## we will show this table in the methods doc

# ---- filter for our commodities of interest ----
h <- h %>%
    dplyr::filter(product %in% c("fish_oil", "ornamentals", "seaweeds"))

Add FAO-zero-correction

Some regions had 0 E and 0.00 (no “…” missing data flag) recorded for tonnes, but simultaneously had non-zero data for USD. Before gap-filling, we need to replace zeros with NAs in two cases:

1.    When `tonnes` is equal to 0 and `usd` is not `NA` and `usd` is greater than 0

2.    When `usd` is equal to 0 and `tonnes` is not `NA` and `tonnes` is greater than 0

# replace certain zeros with NA to flag for gapfilling
h_zero_na <- h %>% 
  mutate(tonnes = ifelse(tonnes == 0 & !is.na(usd) & usd > 0, NA, tonnes)) %>% 
  mutate(usd = ifelse(usd == 0 & !is.na(tonnes) & tonnes > 0, NA, usd))

check_start <- max(h$year) - 5
check_stop <- max(h$year)
# check Singapore
h %>% filter(rgn_id == 208,
             year %in% (check_start:check_stop), # feel free to update dates
             product %in% c("ornamentals")) %>% 
  arrange(desc(year))

h_zero_na %>% filter(rgn_id == 208,
                     year %in% (check_start:check_stop),
                     product %in% c("ornamentals")) %>% 
  arrange(desc(year))

# nice!

Gapfilling

Summary of gapfilling that is performed:

Zero-fill: for observations with NAs for both values (tonnes & usd), fill both as zero. Also cross-fills zeros where one value is zero, other is NA.
Regression fill, first pass: Where enough non-zero paired observations exist at the country level, use country-level data to create regression models (tonnes ~ usd and vice versa) for gapfilling. About 25% success.
Regression fill, second pass: Where pass 1 failed, and enough non-zero paired observations exist at georegional level, use georegional-level data to create regression models (tonnes ~ usd + year, and vice versa) for gapfilling. About 90% success.
Regression fill third pass: Where passes 1 and 2 failed, use global-scale data to create regression models (tonnes ~ usd + year, and vice versa) for gapfilling. 100% success.
End-fill: For years where NAs still exist in final year, carry forward data from prior year (after other gapfilling techniques).

h <- h_zero_na %>% np_harvest_gapflag()
## Adds flag for required gap-filling, based upon NAs in data. 
## NOTE: Does not perform any gap-filling.
## At this point, h includes: 
##    rgn_name   rgn_id   commodity   product   year   tonnes   usd   gapfill
## 'gapfill' will be in (zerofill, endfill, tbd, none)

data_check <- h %>% np_datacheck()
## for each commodity within each region, creates (but doesn't save...) summary info:
##   num_years:        the length of the data series for this commodity in this region
##   usd_unique_nz:    (or 'tns') number of unique non-zero values for usd or tonnes 
##   usd_na & tns_na:  number of NA occurrences
##   paired_obs:       number of non-zero paired observations
##   usd_unique_pairs: (or 'tns') within set of paired observations, count of unique usd and tonnes
##   unique_pairs:     lesser of usd_unique_pairs and tns_unique_pairs
##   count_no_data:    number of paired NAs - years with no value reported

h <- h %>% np_zerofill()
## for post-reporting years with NA for both tonnes and USD, fill zero - 
##    assumes that non-reporting indicates zero harvest to report.
## Also cross-fills zeros where one side is 0, other is NA (not flagged as gapfill)

h <- h %>% np_lowdata_filter()
## Exclude commodities (within a region) that have few non-zero data points.
## Optional parameter with default: nonzero_h_yr_min = 4
## NOTE: This filter has consequences for the regression, but also has meaning in terms of 
##    not inflicting a penalty on regions trying, and then stopping, an experimental harvest.

## Melanie's script to add a georegional ID tag based on country keys and IDs.
h <- h %>%
  add_georegion_id()

h <- h %>% np_regr_fill(years_back = 10, vars = 'td', scope = 'rgn_id')
h <- h %>% np_regr_fill(vars = 'tdy', scope = 'georgn_id')
h <- h %>% np_regr_fill(vars = 'tdy', scope = 'global')
## np_regr_fill() is a generalized regression gapfill function. Parameters (with defaults):
## * years_back=50 (int):     This determines how far back in the time series to include within the regression.
## * min_paired_obs=4 (int):  This determines how many paired observations are required to attempt a regression.
## * scope = 'rgn_id' (str):  ('rgn_id', 'georgn_id', 'global') Determines grouping scale for regression.
## * vars = 'tdy' (str):      ('td', 'tdy') Determines model: (tonnes ~ usd) or (tonnes ~ usd + year) [and vice versa]

h <- h %>% np_end_fill()
## For final year of data, if both usd and tonnes originally reported as NA, pull forward
##    values for usd and tonnes from the previous year.  This should happen after regression fill.

h_comm <- h
## Store commodity-level data, before moving on to the product-level smoothing.

## Output gapfilling report to .csv files.
## Very few usd gapfilling, and none in recent years (data used to weight contributions), so will ignore this: gapfill=="r2_u_gr"
h_gap <- h %>%
  mutate(gapfill = ifelse(gapfill == "r2_u_gr", "none", gapfill)) %>%   # focusing only on tonnes gapfilling
  dplyr::select(rgn_id, commodity, product, year, gapfill)

h_gap_ornamentals <- h_gap %>%
  filter(product == "ornamentals")


readr::write_csv(h_gap, here(current_np_dir, "output", "np_ornamentals_harvest_tonnes_gf.csv"), na = '')

Final Data Wranglng

Summarize values

Summarize each product per country per year, e.g., all corals in Albania in 2011. And, do some error checking.

# summarize product values per country per year
h_prod <- h_comm %>% # copy-on-modify triggered for h_comm (defined earlier)
  group_by(rgn_name, rgn_id, product, year) %>%
  summarize(tonnes = sum(tonnes, na.rm = TRUE), 
            usd = sum(usd, na.rm = TRUE))
          
## Error-checking and table exports to see if there are duplicates
stopifnot(sum(duplicated(h_prod[ , c("rgn_id", "product", "year")])) == 0)

Quick Data Check

Look at wide format with all commodities and product subtotal (where commodity column value is “Z_TOTAL”), compared with the input data prior to summing.

h_x_tonnes <- h_comm %>% 
  bind_rows(mutate(h_prod, commodity = "Z_TOTAL")) %>%
  dplyr::select(rgn_name, rgn_id, commodity, product, year, tonnes) %>%
  arrange(rgn_name, product, commodity, year) %>%
  spread(year, tonnes)

h_x_usd <- h_comm %>% 
  bind_rows(mutate(h_prod, commodity = "Z_TOTAL")) %>%
  dplyr::select(rgn_name, rgn_id, commodity, product, year, usd) %>%
  arrange(rgn_name, product, commodity, year) %>%
  spread(year, usd)

## Check a random country and commodity
australia <- h_x_usd %>% filter(product == "ornamentals", rgn_name == "Australia") 
australia

## Can open up in Excel to compare subtotals per country-product-year
readr::write_csv(h_x_tonnes, here(current_np_dir, "int", "np_harvest_tonnes_wide.csv"), na = 'NA')

readr::write_csv(h_x_usd, here(current_np_dir, "int", "np_harvest_usd_wide.csv"), na = 'NA')

Calculate Rolling Averages

Determine rolling averages for tonnes and USD in order to determine peak values. This is based upon total harvests by product group, not individual commodity.

# Find max year in the summarized data table
year_max <- max(h_prod$year)

roll_prod <- h_prod %>%
  arrange(rgn_id, product, year) %>%
  group_by(rgn_id, product) %>%
  mutate(
      tonnes_rollmean = rollapply(tonnes, width = 4, FUN = mean, align = 'right', partial = TRUE, na.rm = FALSE),
      usd_rollmean    = rollapply(   usd, width = 4, FUN = mean, align = 'right', partial = TRUE, na.rm = FALSE)) %>%
  rename(
      tonnes_orig = tonnes, # prevent overwriting of reported and gapfilled values
      usd_orig    = usd) %>% # prevent overwriting of reported and gapfilled values
  mutate(
      tonnes = ifelse(!is.na(tonnes_rollmean), tonnes_rollmean, tonnes_orig),
      usd    = ifelse(!is.na(usd_rollmean),    usd_rollmean,    usd_orig)) %>%
  dplyr::select(rgn_id, rgn_name, product, year, tonnes, usd, tonnes_orig, usd_orig)

Score Harvest Relative to Peaks

Score harvest (tonnes and usd) relative to peaks. Output values as .csvs. Perform this for all given scenarios, using a for loop.

# Find peak harvest per region-product and apply conservative buffer (scale down)
## Find max USD value over the last 10 years 
peak_prod <- roll_prod %>%
    group_by(rgn_id, product) %>%
    mutate(tonnes_peak = max(tonnes, na.rm = T)) %>%
    ungroup() 

## Determine relative status:
smooth_prod <- peak_prod %>% 
    mutate(tonnes_rel = ifelse(tonnes >= tonnes_peak, 1, tonnes / tonnes_peak))

Save data layer

## Write entire data frame to .csv:
readr::write_csv(smooth_prod, here(current_np_dir, "int", "np_harvest_smoothed_data.csv"), na = '')

## Save tonnes/usd data for weighting purposes 
tonnes <- smooth_prod %>%
  dplyr::select(rgn_id, product, year, tonnes, usd) 

readr::write_csv(tonnes, here(current_np_dir, "int", "np_harvest_tonnes_usd.csv"), na = '')

## Save relative tonnes data for the ornamentals layer 
tonnes_ornamentals_rel <- smooth_prod %>%
  dplyr::filter(product == "ornamentals") %>%
  dplyr::select(rgn_id, product, year, tonnes_rel)

readr::write_csv(tonnes_ornamentals_rel, here(current_np_dir, "output", "np_ornamentals_harvest_tonnes_rel.csv"), na = '')

Save exposure and risk layers to ornamentals (so that we can calculate sustainability in ohi-global functions.R)

# ==== calculates NP exposure based on habitats (ornamentals) =============
    ### Returns the first input data frame with a new column for exposure:
    ### [rgn_id rgn_name product year tonnes tonnes_rel prod_weight exposure]
    #########################################.
    
np_harvest_orn <- read_csv(here(current_np_dir, "int", "np_harvest_tonnes_usd.csv")) %>%
  dplyr::select(year, rgn_id, product, tonnes) %>%
  filter(product == "ornamentals") 

### Determine Habitat Areas for Exposure

hab_rocky <- read_csv(here("globalprep", "hab_rockyreef", "v2012", "data", "habitat_extent_rocky_reef_updated.csv")) %>%
  dplyr::select(rgn_id, km2) %>%
  dplyr::filter(km2 > 0)


hab_coral <- read_csv(here("globalprep", "hab_coral", "v2021", "data", "habitat_extent_coral_updated.csv")) %>%
  dplyr::select(rgn_id, km2) %>%
  dplyr::filter(km2 > 0)

### area for products in both coral and rocky reef habitats: shells, ornamentals, sponges
area_dual_hab <- np_harvest_orn %>%
  dplyr::left_join(hab_coral %>%
                     dplyr::rename(coral_km2 = km2),
                   by = c('rgn_id')) %>%
  left_join(hab_rocky %>%
              dplyr::rename(rocky_km2 = km2),
            by = c('rgn_id')) %>%
  dplyr::rowwise() %>%
  dplyr::mutate(km2 = sum(c(rocky_km2, coral_km2), na.rm = TRUE)) %>%
  dplyr::filter(km2 > 0) %>%
  dplyr::select(-coral_km2, -rocky_km2)

### Determine Exposure
### exposure: combine areas, get tonnes / area, and rescale with log transform
np_exp_orn <- area_dual_hab %>%
  dplyr::mutate(expos_raw = ifelse(tonnes > 0 &
                                     km2 > 0, (tonnes / km2), 0)) %>%
  dplyr::group_by(product) %>%
  dplyr::mutate(expos_prod_max = max(expos_raw, na.rm = TRUE)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(exposure = (log(expos_raw + 1) / log(expos_prod_max + 1)),
                exposure = ifelse(exposure > 1, 1, exposure)) %>%
  dplyr::select(-km2, -expos_raw, -expos_prod_max)

### clean up columns
gap_fill <- np_exp_orn %>%
  dplyr::mutate(gapfilled = ifelse(is.na(exposure), 1, 0)) %>%
  dplyr::mutate(method = ifelse(is.na(exposure), "prod_average", NA)) %>%
  dplyr::select(rgn_id = rgn_id, product, year, gapfilled, method)

readr::write_csv(gap_fill, here(current_np_dir, "output", "np_exposure_ornamentals_gf.csv"))

### add exposure for countries with (habitat extent == NA)
np_exp_orn <- np_exp_orn %>%
  dplyr::group_by(product) %>%
  dplyr::mutate(mean_exp = mean(exposure, na.rm = TRUE)) %>%
  dplyr::mutate(exposure = ifelse(is.na(exposure), mean_exp, exposure)) %>%
  dplyr::select(-mean_exp) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(product = as.character(product)) %>%
  dplyr::select(rgn_id, year, product, exposure)

##save exposure as a layer
readr::write_csv(np_exp_orn, here(current_np_dir, "output", "np_exposure_ornamentals.csv"))


### calculates NP risk based on:
###   ornamentals:      risk = 1 if blast or cyanide fishing
### Returns a data frame of risk, by product, by region:
###
#########################################.


### Determine Risk

r_cyanide <- 
  read_csv(here("globalprep", "np_prs_poison_blast_fishing", "v2013", "data", "gl_thr_poison_3nm_rgn2013.csv")) %>%
  #AlignDataYears(layer_nm = "np_cyanide", layers_obj = layers) %>%
  dplyr::filter(!is.na(score) & score > 0) %>%
  dplyr::select(rgn_id,
                #year = scenario_year,
                cyanide = score)

r_blast <-
  read_csv(here("globalprep", "np_prs_poison_blast_fishing", "v2013", "data", "gl_thr_blast_3nm_rgn2013.csv")) %>%
  #AlignDataYears(layer_nm = "np_blast", layers_obj = layers)  %>%
  filter(!is.na(score) & score > 0) %>%
  dplyr::select(rgn_id,
         #year = scenario_year,
         blast = score)


### risk for ornamentals set to 1 if blast or cyanide fishing present, based on Nature 2012 code
###  despite Nature 2012 Suppl saying Risk for ornamental fish is set to the "relative intensity of cyanide fishing"
risk_orn <- r_cyanide %>%
  full_join(r_blast, by = c("rgn_id")) %>%
  mutate(ornamentals = 1) %>%
  dplyr::select(rgn_id, ornamentals)

### risk as binary
np_risk_orn <-
  expand.grid(
    rgn_id  = unique(np_harvest_orn$rgn_id),
    year = unique(np_harvest_orn$year)
  ) %>%
  ### ornamentals
  left_join(risk_orn, by = c("rgn_id"))  %>%
  mutate(ornamentals = ifelse(is.na(ornamentals), 0, ornamentals)) %>%
  gather(product, risk, -rgn_id, -year) %>%
  mutate(product = as.character(product))

np_risk_gf <- np_risk_orn %>%
  mutate(gapfilled = 0, method = NA) %>%
  dplyr::select(-risk)

readr::write_csv(np_risk_gf, here(current_np_dir, "output", "np_risk_ornamentals_gf.csv"))

## write this as a risk layer 
readr::write_csv(np_risk_orn, here(current_np_dir, "output", "np_risk_ornamentals.csv"))

OHI 2024: Natural Products: Ornamentals data prep

Compiled on Wed Aug 28 19:43:47 2024 by ramji