ohi logo
OHI Science | Citation policy

[REFERENCE RMD FILE: https://cdn.rawgit.com/OHI-Science/ohiprep/master/globalprep/CW_trend_trash/v2016/cw_trend_trash_dataprep.html]

1 Summary

We describe how we calculated global trash trends for the clean water goal of the OHI 2016 assessment.

2 Updates from previous assessment

Previously, we used trends in coastal population as a proxy for trash trends. This new data describes trends in mismanaged plastic waste for 2010 and projected for 2025.


3 Data Source

Reference: Jambeck et al. 2016. Plastic waste inputs from land into the ocean. Science 347:768-771.

Description: Describes (Supplementary data) mismanaged plastic waste in 2010 and estimates mismanaged plastic waste in 2025. Using these data, we estimate trash trends during a 5 year period.

Native data resolution: Country scale

Format: Excel table in Supplementary data.


4 Methods

## load necessary packages and set up path directories
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
# library(devtools)
# devtools::install_github("ohi-science/ohicore@dev") 
library(ohicore)

# setwd('globalprep/cw_trend_trash/v2016') #comment out when knitting

source('../../../src/R/common.R')

Raw data is on Mazu (NCEAS private server):

## get and format data:
trash <- read.csv(file.path(dir_M, 'git-annex/globalprep/_raw_data/MarinePlastics_Jambeck/d2016/1260352_SupportingFile_Suppl_modified.csv')) %>%
  select(Country, mpw_2010 = Mismanaged_plastic_waste_in_2010, mpw_2025=Mismanaged_plastic_waste_in_2025) %>%
  mutate(mpw_2010 = as.character(mpw_2010)) %>%
  mutate(mpw_2010 = gsub(",", "", mpw_2010)) %>%
  mutate(mpw_2010 = as.numeric(mpw_2010)) %>%
  mutate(mpw_2025 = as.character(mpw_2025)) %>%
  mutate(mpw_2025 = gsub(",", "", mpw_2025)) %>%
  mutate(mpw_2025 = as.numeric(mpw_2025)) %>%
  gather("mpw", "value", starts_with("mpw"))

Covert country names to OHI regions:

## fix a few regions that are reported as one (these will be given the same trend)
# Dhekelia: Small British overseas territory on Cyprus
# 

# Channel Islands: Jersey and Guernsey (also reported individually...delete)
trash <- trash %>%
  filter(!(Country %in% c("Channel Islands")))

antilles <- data.frame(Country = "Netherlands Antilles", country2 = c("Bonaire", "Sint Eustatius", "Saba"))%>% # already included: Curacao, Sint Maarten)
  mutate(country2 = as.character(country2)) %>%
  mutate(Country = as.character(Country))

trash_country_mod <- trash %>%
  left_join(antilles, by="Country") %>%
  mutate(country2 = ifelse(is.na(country2), Country, country2)) %>%
  select(Country=country2, mpw, value)
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
trash_rgn <- name_2_rgn(df_in = trash_country_mod, 
                       fld_name = 'Country',
                       flds_unique = 'mpw')
## 
## These data were removed for not having any match in the lookup tables:
## 
##                                         Dhekelia 
##                                                1 
##                                      Isle of Man 
##                                                1 
## Palestine (Gaza Strip is only part on the coast) 
##                                                1 
##                                         Svalbard 
##                                                1 
## 
## These data were removed for not being of the proper rgn_type (eez,ohi_region) or mismatching region names in the lookup tables:
## < table of extent 0 x 0 >
## 
## DUPLICATES found. Consider using collapse2rgn to collapse duplicates (function in progress).
## [1] "China"                    "Guadeloupe"              
## [3] "Guam"                     "Hong Kong"               
## [5] "Macao"                    "Martinique"              
## [7] "Northern Mariana Islands" "Puerto Rico"             
## [9] "USVI"
### For duplicate regions, weight by region area
weights <- data.frame(Country = c("Puerto Rico", "USVI", 
                         "Northern Mariana Islands", "Guam",
                         "China", "Hong Kong", "Macao",
                         "Guadeloupe", "Martinique"),
             rgn_name =c("Puerto Rico and Virgin Islands of the United States", "Puerto Rico and Virgin Islands of the United States",
                         "Northern Mariana Islands and Guam", "Northern Mariana Islands and Guam",
                         "China", "China", "China",
                         "Guadeloupe and Martinique", "Guadeloupe and Martinique"),
             weight = c(3515, 134, 179, 210, 3705000, 426, 11, 629, 436))

trash_rgn <- trash_rgn %>%
  left_join(weights, by=c("Country", "rgn_name")) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight)) %>%
  group_by(rgn_id, rgn_name, mpw) %>%
  summarize(value = weighted.mean(value, weight)) %>%
  data.frame()
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

Gapfilling missing data using a linear model with population data as a predictor:

population <- read.csv('../../le_world_bank/v2016/output/population_2016.csv') %>%
  select(rgn_id, count)

trash_gf <- spread(trash_rgn, mpw, value)

trash_gf <- population %>%
  left_join(trash_gf, by="rgn_id")

### Predicting the 2010 data:
plot(log(mpw_2010+1) ~ log(count+1), data=trash_gf)

mod_2010 <- lm(log(mpw_2010+1) ~ log(count+1), data=trash_gf)
summary(mod_2010)
## 
## Call:
## lm(formula = log(mpw_2010 + 1) ~ log(count + 1), data = trash_gf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2835 -1.1762  0.1051  1.1552  3.2547 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.79059    0.51102  -5.461 1.54e-07 ***
## log(count + 1)  0.81952    0.03418  23.979  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.428 on 182 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.7596, Adjusted R-squared:  0.7583 
## F-statistic:   575 on 1 and 182 DF,  p-value: < 2.2e-16
trash_gf$predict_ln_2010 <- predict(mod_2010, newdata = trash_gf)
trash_gf$predict_2010 <- exp(predict(mod_2010, newdata = trash_gf))


### Predicting the 2025 data:
plot(log(mpw_2025+1) ~ log(count+1), data=trash_gf)

mod_2025 <- lm(log(mpw_2025+1) ~ log(count+1), data=trash_gf)
summary(mod_2025)
## 
## Call:
## lm(formula = log(mpw_2025 + 1) ~ log(count + 1), data = trash_gf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8501 -1.3522  0.0712  1.2582  3.4442 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.80305    0.52952  -5.294 3.43e-07 ***
## log(count + 1)  0.86168    0.03541  24.332  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.479 on 182 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.7649, Adjusted R-squared:  0.7636 
## F-statistic:   592 on 1 and 182 DF,  p-value: < 2.2e-16
trash_gf$predict_ln_2025 <- predict(mod_2025, newdata = trash_gf)
trash_gf$predict_2025 <- exp(predict(mod_2025, newdata = trash_gf))


## save corresponding gapfilling dataset:
gf_trash_data <- trash_gf %>%
  mutate(gapfilled = ifelse(is.na(mpw_2010), 1, 0)) %>% 
  mutate(method = ifelse(gapfilled == 1, "lm_population", NA)) %>%
  mutate(error = NA) %>%
  select(rgn_id, gapfilled, method, error)

write.csv(gf_trash_data, 'output/cw_trash_trend_gf.csv', row.names=FALSE)


## get gapfilled values for 2010 and 2025 estimates:
trash_trend_data <- trash_gf %>%
  mutate(mpw_2010 = ifelse(is.na(mpw_2010), predict_2010, mpw_2010)) %>% 
  mutate(mpw_2025 = ifelse(is.na(mpw_2025), predict_2025, mpw_2025)) %>% 
  mutate(mpw_2010 = ifelse(mpw_2010<1, 0, mpw_2010)) %>% 
  mutate(mpw_2025 = ifelse(mpw_2025<1, 0, mpw_2025)) %>%
  select(rgn_id, mpw_2010, mpw_2025)

Final calculations. The data are presented for 2010 and 2025. We calculate the proportional change over this 15 year period as:

Pchange_15year = (trash_2025 - trash_2010)/trash_2010

To determine the change per year we divide the Pchange_15year by 15 years. To calculate trend over a 5 year period, this value is multiplied by 5.

## Final calculations:
trash_trend <-  trash_trend_data %>%
  mutate(trend_15yr = (mpw_2025 - mpw_2010)/mpw_2010) %>%
  mutate(trend_5yr = trend_15yr/15*5) %>%
  mutate(trend_5yr = ifelse(trend_5yr > 1, 1, trend_5yr)) %>%
  mutate(trend_5yr = ifelse(trend_5yr < -1, -1, trend_5yr)) %>%
  select(rgn_id, trend = trend_5yr)
write.csv(trash_trend, 'output/cw_trash_trend.csv', row.names=FALSE)