4 Methods

## load necessary packages and set up path directories
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
# library(devtools)
# devtools::install_github("ohi-science/ohicore@dev") 
library(ohicore)

# setwd('globalprep/cw_trend_trash/v2016') #comment out when knitting

source('../../../src/R/common.R')

Raw data is on Mazu (NCEAS private server):

## get and format data:
trash <- read.csv(file.path(dir_M, 'git-annex/globalprep/_raw_data/MarinePlastics_Jambeck/d2016/1260352_SupportingFile_Suppl_modified.csv')) %>%
  select(Country, mpw_2010 = Mismanaged_plastic_waste_in_2010, mpw_2025=Mismanaged_plastic_waste_in_2025) %>%
  mutate(mpw_2010 = as.character(mpw_2010)) %>%
  mutate(mpw_2010 = gsub(",", "", mpw_2010)) %>%
  mutate(mpw_2010 = as.numeric(mpw_2010)) %>%
  mutate(mpw_2025 = as.character(mpw_2025)) %>%
  mutate(mpw_2025 = gsub(",", "", mpw_2025)) %>%
  mutate(mpw_2025 = as.numeric(mpw_2025)) %>%
  gather("mpw", "value", starts_with("mpw"))

Covert country names to OHI regions:

## fix a few regions that are reported as one (these will be given the same trend)
# Dhekelia: Small British overseas territory on Cyprus
# 

# Channel Islands: Jersey and Guernsey (also reported individually...delete)
trash <- trash %>%
  filter(!(Country %in% c("Channel Islands")))

antilles <- data.frame(Country = "Netherlands Antilles", country2 = c("Bonaire", "Sint Eustatius", "Saba"))%>% # already included: Curacao, Sint Maarten)
  mutate(country2 = as.character(country2)) %>%
  mutate(Country = as.character(Country))

trash_country_mod <- trash %>%
  left_join(antilles, by="Country") %>%
  mutate(country2 = ifelse(is.na(country2), Country, country2)) %>%
  select(Country=country2, mpw, value)

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector

trash_rgn <- name_2_rgn(df_in = trash_country_mod, 
                       fld_name = 'Country',
                       flds_unique = 'mpw')

## 
## These data were removed for not having any match in the lookup tables:
## 
##                                         Dhekelia 
##                                                1 
##                                      Isle of Man 
##                                                1 
## Palestine (Gaza Strip is only part on the coast) 
##                                                1 
##                                         Svalbard 
##                                                1 
## 
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
## < table of extent 0 x 0 >

## 
## DUPLICATES found. Consider using collapse2rgn to collapse duplicates (function in progress).

## [1] "China"                    "Guadeloupe"              
## [3] "Guam"                     "Hong Kong"               
## [5] "Macao"                    "Martinique"              
## [7] "Northern Mariana Islands" "Puerto Rico"             
## [9] "USVI"

### For duplicate regions, weight by region area
weights <- data.frame(Country = c("Puerto Rico", "USVI", 
                         "Northern Mariana Islands", "Guam",
                         "China", "Hong Kong", "Macao",
                         "Guadeloupe", "Martinique"),
             rgn_name =c("Puerto Rico and Virgin Islands of the United States", "Puerto Rico and Virgin Islands of the United States",
                         "Northern Mariana Islands and Guam", "Northern Mariana Islands and Guam",
                         "China", "China", "China",
                         "Guadeloupe and Martinique", "Guadeloupe and Martinique"),
             weight = c(3515, 134, 179, 210, 3705000, 426, 11, 629, 436))

trash_rgn <- trash_rgn %>%
  left_join(weights, by=c("Country", "rgn_name")) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight)) %>%
  group_by(rgn_id, rgn_name, mpw) %>%
  summarize(value = weighted.mean(value, weight)) %>%
  data.frame()

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

Gapfilling missing data using a linear model with population data as a predictor:

population <- read.csv('../../le_world_bank/v2016/output/population_2016.csv') %>%
  select(rgn_id, count)

trash_gf <- spread(trash_rgn, mpw, value)

trash_gf <- population %>%
  left_join(trash_gf, by="rgn_id")

### Predicting the 2010 data:
plot(log(mpw_2010+1) ~ log(count+1), data=trash_gf)

mod_2010 <- lm(log(mpw_2010+1) ~ log(count+1), data=trash_gf)
summary(mod_2010)

## 
## Call:
## lm(formula = log(mpw_2010 + 1) ~ log(count + 1), data = trash_gf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2835 -1.1762  0.1051  1.1552  3.2547 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.79059    0.51102  -5.461 1.54e-07 ***
## log(count + 1)  0.81952    0.03418  23.979  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.428 on 182 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.7596, Adjusted R-squared:  0.7583 
## F-statistic:   575 on 1 and 182 DF,  p-value: < 2.2e-16

trash_gf$predict_ln_2010 <- predict(mod_2010, newdata = trash_gf)
trash_gf$predict_2010 <- exp(predict(mod_2010, newdata = trash_gf))


### Predicting the 2025 data:
plot(log(mpw_2025+1) ~ log(count+1), data=trash_gf)

mod_2025 <- lm(log(mpw_2025+1) ~ log(count+1), data=trash_gf)
summary(mod_2025)

## 
## Call:
## lm(formula = log(mpw_2025 + 1) ~ log(count + 1), data = trash_gf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8501 -1.3522  0.0712  1.2582  3.4442 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.80305    0.52952  -5.294 3.43e-07 ***
## log(count + 1)  0.86168    0.03541  24.332  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.479 on 182 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.7649, Adjusted R-squared:  0.7636 
## F-statistic:   592 on 1 and 182 DF,  p-value: < 2.2e-16

trash_gf$predict_ln_2025 <- predict(mod_2025, newdata = trash_gf)
trash_gf$predict_2025 <- exp(predict(mod_2025, newdata = trash_gf))


## save corresponding gapfilling dataset:
gf_trash_data <- trash_gf %>%
  mutate(gapfilled = ifelse(is.na(mpw_2010), 1, 0)) %>% 
  mutate(method = ifelse(gapfilled == 1, "lm_population", NA)) %>%
  mutate(error = NA) %>%
  select(rgn_id, gapfilled, method, error)

write.csv(gf_trash_data, 'output/cw_trash_trend_gf.csv', row.names=FALSE)


## get gapfilled values for 2010 and 2025 estimates:
trash_trend_data <- trash_gf %>%
  mutate(mpw_2010 = ifelse(is.na(mpw_2010), predict_2010, mpw_2010)) %>% 
  mutate(mpw_2025 = ifelse(is.na(mpw_2025), predict_2025, mpw_2025)) %>% 
  mutate(mpw_2010 = ifelse(mpw_2010<1, 0, mpw_2010)) %>% 
  mutate(mpw_2025 = ifelse(mpw_2025<1, 0, mpw_2025)) %>%
  select(rgn_id, mpw_2010, mpw_2025)

Final calculations. The data are presented for 2010 and 2025. We calculate the proportional change over this 15 year period as:

Pchange_15year = (trash_2025 - trash_2010)/trash_2010

To determine the change per year we divide the Pchange_15year by 15 years. To calculate trend over a 5 year period, this value is multiplied by 5.

## Final calculations:
trash_trend <-  trash_trend_data %>%
  mutate(trend_15yr = (mpw_2025 - mpw_2010)/mpw_2010) %>%
  mutate(trend_5yr = trend_15yr/15*5) %>%
  mutate(trend_5yr = ifelse(trend_5yr > 1, 1, trend_5yr)) %>%
  mutate(trend_5yr = ifelse(trend_5yr < -1, -1, trend_5yr)) %>%
  select(rgn_id, trend = trend_5yr)
write.csv(trash_trend, 'output/cw_trash_trend.csv', row.names=FALSE)

OHI: Clean Waters/Trash trend

Compiled on Tue Dec 13 13:27:18 2016 by frazier

1 Summary

2 Updates from previous assessment

3 Data Source

4 Methods