1 Summary

This document outlines the process for developing linear models to gapfill fisheries management index scores to use as a resilience layer for the 2019 global assessment.

2 Data Sources

The following data are used:

2.1 Fisheries Management Index Data

Fisheries Management Index data were first introduced in a 2017 paper by Melnychuk et al., Fisheries Management Impacts on Target Species Status. Scores range from 0-1 and rate management effectiveness of fisheries in distinct regions on a stock-by-stock basis. The scores are determined by expert surveys that characterize attributes of research, management, enforcement, and socioeconomic factors. The first survey was conducted in 2016 in 28 major fishing countries that collectively account for >80% of global catch. Another survey was performed in 2018 on 40 countries, the scores from which we are using for this OHI resilience layer.

Date retreived: 12 July 2019

Method: Data are not accessible in csv format from website, so points were manually entered into excel and saved as a csv (found in v2019/raw). Because FMI scores only exist for 40 out of the 220 OHI regions, we trained linear models using scores from the AO need layer (rescaled GDP per capita per person purchasing power), GDP per capita, World Governance Index (WGI), Social Progress Indicator (SPI), and UN georegion labels to determine which would be most effective for gapfilling the missing data. After comparing the models, we determined SPI and UN georegions to be the best predictors of FMI, and used these models to gapfill FMI scores for the remaining 80 regions. 20 of these regions remain NA as they are uninhabited.

2.2 SPI

Note: These data were not updated for v2019 at the time of completing the FMI resilience layer.

Citation: http://www.socialprogress.org/

Stern, S., A. Wares and T. Epner. 2018. Social Progress Index: 2018 Methodology Report.

Source information: http://www.socialprogress.org/ –> Download Data

Date Downloaded: 9/21/2018

Time range: 2014-2018

Native data resolution: country scores

Format: Excel file

Description: Social Progress Index scores and components for countries.

2.3 UN georegions

UNgeorgn() loads a dataframe from common.R with UN geopolitical designations, and is commonly used in OHI to gapfill missing data. The distinct regions are derived from the United Nations Statistics Division. Each region is assigned four labels with increasing granularity/specificity: r0_label = World (1 level), r1_label = continental regions (7 levels: Oceania, Asia, Africa, Europe, Southern Islands, Latin America and the Caribbean, and Americas), r2_label = georegions (22 levels for subregions and intermediary regions).

3 Updates from previous assessment

These data have not been updated since 2013, so this is an entirely new method for establishing resilience values.

4 Initial set-up code

#library(devtools)
#devtools::install_github("ohi-science/ohicore@dev")
library(ohicore)
library(tidyverse)
library(stringr)
library(WDI) # for accessing World Bank data 
library(here) 
library(plotly)
library(psych) # for correlation testing

source('https://raw.githubusercontent.com/OHI-Science/ohiprep_v2019/gh-pages/workflow/R/common.R')

5 Load and wrangle FMI data

fmi_raw <- read_csv(here("globalprep/res_fmi/v2019/raw/FMI_data_raw.csv")) %>%
  rename("2016" = fmi_2016) %>% 
  rename("2018" = fmi_2018) %>% 
  gather(key = "year", value = "fmi", -country)

# Add region ID
fmi_rgn <- name_2_rgn(df_in = fmi_raw, 
                       fld_name='country', 
                       flds_unique=c('fmi', 'year')) %>% 
  select(rgn_id, rgn_name, year, fmi) %>% 
  filter(year == 2018) # remove 2016 points so that they don't skew the model

fmi_rgn$year <- as.numeric(fmi_rgn$year)

5.1 Load data from Social Progress Index (SPI) + UN Georegions, create linear models for gapfilling

## Load SPI data

spi <- read_csv(here("globalprep/prs_res_spi/v2018/output/spi_res.csv"))

fmi_spi <- fmi_rgn %>% 
  left_join(spi, by=c("rgn_id","year")) %>% 
  select(-rgn_name)

# Load UN georegion data 
UNgeorgn()

georegions <- UNgeorgn %>%
  select(rgn_id, rgn_name = rgn_label, r0_label, r1_label, r2_label)

fmi_georegions <- fmi_rgn %>% 
  left_join(georegions, by=c("rgn_id","rgn_name"))

all_layers <- fmi_spi %>% 
  rename(spi = resilience_score) %>% 
  left_join(fmi_georegions, by = c("rgn_id", "year", "fmi")) %>% 
  select(rgn_id, rgn_name, fmi, spi, r0_label, r2_label, r1_label)
  
mod6 <- lm(fmi ~ spi + r2_label, data=all_layers)
summary(mod6)  # r^2 = 0.6583
plot(predict(mod6), all_layers$fmi)
abline(0,1, col="red")

mod6.5 <- lm(fmi ~ spi + r1_label, data=all_layers)
summary(mod6.5)  # r^2 = 0.5776
plot(predict(mod6.5), all_layers$fmi)
abline(0,1, col="red")

6 Gapfilling regions without FMI scores using best fit model

More information about the linear model process development can be found in fmi_model_compare.Rmd and the Github issue from v2019.

fmi_gf <- UNgeorgn %>% 
  merge(spi) %>%
  filter(year==2018) %>% 
  left_join(fmi_rgn, by=c("rgn_id", "year")) %>%
  mutate(rgn_label = as.character(rgn_label)) %>% 
  mutate(rgn_label = ifelse(str_detect(rgn_label, "R_union"), "Reunion", rgn_label)) %>% 
  select(-rgn_name, -r0_label, spi=resilience_score)


# Create array of predicted FMI values using fmi ~ spi + r2_label

mod_r2 <- lm(fmi ~ r2_label + spi, data=fmi_gf)
## have to do the predict in a more complicated fashion because some r2 categories have no data, this returns an NA for these
fmi_gf$fmi_pred_r2 <- 
  sapply(1:nrow(fmi_gf), 
           function(i) 
               tryCatch(predict(mod_r2, fmi_gf[i,]), 
                           error=function(e) NA))

# get predictions for the regions not represented by r2 regions:
mod_r1 <- lm(fmi ~ r1_label + spi, data=fmi_gf)
## have to do the predict in a more complicated fashion because some r2 categories have no data, this returns an NA for these
fmi_gf$fmi_pred_r1 <- 
  sapply(1:nrow(fmi_gf), 
           function(i) 
               tryCatch(predict(mod_r1, fmi_gf[i,]), 
                           error=function(e) NA))

# final data and gapfilling recordkeeping

fmi_gf_all <- fmi_gf %>%
  dplyr::mutate(gapfilled = ifelse(is.na(fmi), "1", 0)) %>%
  dplyr::mutate(method = ifelse(is.na(fmi) & !is.na(fmi_pred_r2), "SPI + UN_geopolitical region r2", NA)) %>%
  dplyr::mutate(method = ifelse(is.na(fmi) & is.na(fmi_pred_r2), "SPI + UN_geopolitical region r1" , method)) %>%
  dplyr::mutate(fmi2 = ifelse(is.na(fmi), fmi_pred_r2, fmi)) %>% 
  dplyr::mutate(fmi2 = ifelse(is.na(fmi2), fmi_pred_r1, fmi2)) %>%
  dplyr::mutate(fmi = fmi2) %>%
  dplyr::select(-fmi2)


# make sure all low/no population regions are NA 
low_pop()
low_pop <- low_pop %>%
  filter(est_population < 3000 | is.na(est_population)) #filter out regions that have populations > 3000 and keep NA values 

fmi_gf_low_pop <- fmi_gf_all %>%
  dplyr::filter(rgn_id %in% low_pop$rgn_id)
summary(fmi_gf_low_pop)
  

## Make sure all other regions have an fmi value
fmi_gf_with_pop <- fmi_gf_all %>%
  dplyr::filter(!(rgn_id %in% low_pop$rgn_id))
summary(fmi_gf_with_pop)


# Correct gapfilling info to low pop regions
fmi_gf_all <- fmi_gf_all %>% 
  mutate(gapfilled = ifelse(rgn_id %in% low_pop$rgn_id, 0, gapfilled)) %>% 
  mutate(method = ifelse(rgn_id %in% low_pop$rgn_id, NA, method))

## format final data
fmi_final <- fmi_gf_all %>%
  select(rgn_id, year, value=fmi)
dim(fmi_final) # all regions represented
summary(fmi_final) # 20 NA values


# Save the data 
write_csv(fmi_final, here("globalprep/res_fmi/v2019/output/fmi_res.csv"))


# Save dataframe with gapfilled method and status information
final_fmi_gf <- fmi_gf_all %>%
  select(rgn_id, year, gapfilled, method)
write_csv(final_fmi_gf, here("globalprep/res_fmi/v2019/output/fmi_res_gf.csv"))

OHI 2019 - Fisheries Management Index (Resilience)

Compiled on Thu Aug 22 11:55:39 2019 by mwilliams