ohi logo
OHI Science | Citation policy

[REFERENCE RMD FILE: https://cdn.rawgit.com/OHI-Science/ohiprep_v2018/master/globalprep/np/v2018/np_dataprep.html]

1 Summary

This analysis converts FAO commodities data into data layers used to calculate OHI 2018 global natural products scores.

2 Updates from previous assessment

New year of FAO data (1976-2015), but no changes to general methods.


3 Data Source

Reference:
http://www.fao.org/fishery/statistics/software/fishstatj/en#downlApp Release date: November 2017 FAO raw commodities quantity 1976_2015 FAO raw commodities value 1976_2015 FAO metadata found here

Downloaded: May 12 2018

Description: Quantity (tonnes) and value (USD) of raw commodities (Exports only) for each country, taxa, year. The FAO data is subset to include commodities in these categories: shells, corals, ornamental fish, fish oil, seaweed and plants, sponges (see: raw/commodities2products.csv for details).

Time range: 1976-2015


4 Methods

knitr::opts_chunk$set(eval=FALSE)

## load libraries, set directories
library(ohicore)  #devtools::install_github('ohi-science/ohicore@dev')
library(dplyr)
library(stringr)
library(tidyr)
library(zoo)  
library(ggplot2)
setwd(here::here("globalprep","np","v2018"))


## Load FAO-specific user-defined functions
source('../../../src/R/fao_fxn.R') # function for cleaning FAO files
source('../../../src/R/common.R') # directory locations
source('R/np_fxn.R')

5 Import Raw Data: FAO Commodities

Simultaneously read and process FAO commodities value and quantity data.

## NOTE: This can be run as a loop, but the "value" and "quant" datasets need to be run individually to make sure
## there are no problems (after this check, they can be looped for efficiency)

dir_fao_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/FAO_commodities/d2018')

files <- list.files(dir_fao_data, pattern=glob2rx('*.csv'), full.names=T)

## To compare to old data:
# dir_fao_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/FAO_commodities/d2016')
 
# files <- list.files(dir_fao_data, pattern=glob2rx('*.csv'), full.names=T)

for (f in files){ # f = files[1]
  cat(sprintf('\n\n\n====\nfile: %s\n', basename(f)))
  
  
  d <- read.csv(f, check.names=FALSE, strip.white=TRUE, stringsAsFactors = FALSE) #          stringsAsFactors=T

  
  ## Specifies that units are tonnes if we are reading in the Commodities Quantity data      csv, and usd if we are reading in the Commodities Value data csv
  units <- c('tonnes','usd')[str_detect(f, c('quant','value'))] # using American English,    lowercase

  ## gather into long format and clean up FAO-specific data foibles
  ## warning: attributes are not identical across measure variables; they will be dropped:   this is fine
  m <- d %>% 
    rename(country   = `Country (Country)`,
           commodity = `Commodity (Commodity)`,
           trade     = `Trade flow (Trade flow)`) %>%
    gather(year, value, -country, -commodity, -trade, -Unit)
  
  ## Include only the "Exports" data:
  m <- m %>%
    filter(trade == "Exports")

  m <- m %>%
    fao_clean_data() %>%  # swaps out FAO-specific codes. NOTE: optional parameter 'sub_0_0' can be passed to control how a '0 0' code is interpreted.
    select(-trade, -Unit) %>% # eliminate 'trade' column
  arrange(country, commodity, is.na(value), year)

  
  ## Products join: attach product categories from com2prod, and
  ##   filter out all entries that do not match a product category.
  ## Note: commodity_lookup is user-defined function to compare 
  ##   commodities in data vs commodities in lookup table
  
  ## load lookup for converting commodities to products
  com2prod <- read.csv('raw/commodities2products.csv', na.strings='')
    
  ## version used in 2015: use when testing....
  ## com2prod <- read.csv('../v2014_test/commodities2products.csv', na.strings='')
    
  ## Check the current commodity-to-product lookup table.  If necessary, make changes to     "raw/commodities2products.csv"
  np_commodity_lookup(m, com2prod)
    
  ## inner_join will attach product names to matching commodities according to
  ## lookup table 'com2prod', and eliminate all commodities that do not appear in the lookup table.
  m <- m %>%
      inner_join(com2prod, by='commodity')
    
    
  ## Special case: user-defined function deals with 
  ##   breaking up Antilles into separate reported rgns
  m <- np_split_antilles(m)
    
  ## Some changes to region names that aren't working
  m <- m %>%
    mutate(country = ifelse(country == "C\xf4te d'Ivoire", "Ivory Coast", country)) %>%
    mutate(country = ifelse(country == "Cura\xe7ao","Curacao", country)) %>%
    mutate(country = ifelse(country == "R\xe9union", "Reunion", country))
               
    
  m_rgn <- name_2_rgn(df_in = m,
                      fld_name='country', 
                      flds_unique=c('commodity', 'product', 'year'))
    
    
  
  ## combine composite regions
  ## When summarizing the dataset, this function provides a modified way to sum the value column while maintaining NA values when both variables are NA (rather than turning to zero values). The function will sum non-NA values normally.
  sum_function <- function(x) {
    if (sum(is.na(x)) == length(x)) 
      return(NA)
    return(sum(x, na.rm = T))}
  
  m_rgn <- m_rgn %>%
    group_by(rgn_id, rgn_name, commodity, product, year) %>%
    summarize(value = sum_function(value)) %>%
    ungroup()

  ## units: rename value field to units based on filename
  names(m_rgn)[names(m_rgn) == 'value'] <- units  
  
  ## output to .csv
  harvest_out <- sprintf('int/%s.csv', units)
  write.csv(m_rgn, harvest_out, row.names = FALSE, na = '')
}

6 Data Wrangle

Combining the quantity and value data and a bit of cleaning to remove data prior to first reporting year for each commodity and region.

h_tonnes <- read.csv('int/tonnes.csv')
# h_tonnes_old <- read.csv('../v2014_test/intermediate/tonnes.csv')

h_usd <- read.csv('int/usd.csv')

## concatenates h_tonnes and h_usd data
## h includes rgn_name, rgn_id, commodity, product, year, tonnes, usd.
h <- h_usd %>%
    full_join(h_tonnes, by=c('rgn_name', 'rgn_id', 'commodity', 'product', 'year')) %>%
    mutate(commodity = as.character(commodity)) %>%
    arrange(rgn_id, product, commodity, year)

## clips out years prior to first reporting year, for each commodity per region
h <- h %>% np_harvest_preclip

7 Gapfilling

See issue #397 for details and debate and pretty graphs. Summary of gapfilling that is performed:

h <- h %>% np_harvest_gapflag  
## Adds flag for required gap-filling, based upon NAs in data. 
## NOTE: Does not perform any gap-filling.
## At this point, h includes: 
## rgn_name   rgn_id   commodity   product   year   tonnes   usd   gapfill
## 'gapfill' will be in (zerofill, endfill, tbd, none)

data_check <- h %>% np_datacheck()
## for each commodity within each region, creates (but doesn't save...) summary info:
##   num_years:        the length of the data series for this commodity in this region
##   usd_unique_nz:    (or 'tns') number of unique non-zero values for usd or tonnes 
##   usd_na & tns_na:  number of NA occurrences
##   paired_obs:       number of non-zero paired observations
##   usd_unique_pairs: (or 'tns') within set of paired observations, count of unique usd and tonnes
##   unique_pairs:     lesser of usd_unique_pairs and tns_unique_pairs
##   count_no_data:    number of paired NAs - years with no value reported

h <- h %>% np_zerofill
## for post-reporting years with NA for both tonnes and USD, fill zero - 
## assumes that non-reporting indicates zero harvest to report.
## Also cross-fills zeros where one side is 0, other is NA (not flagged as gapfill)

h <- h %>% np_lowdata_filter()
## Exclude commodities (within a region) that have few non-zero data points.
## Optional parameter with default: nonzero_h_yr_min = 4
## NOTE: This filter has consequences for the regression, but also has meaning in terms of 
## not inflicting a penalty on regions trying, and then stopping, an experimental harvest.

h <- h %>% add_georegion_id()
## Melanie's script to add a georegional ID tag based on country keys and IDs.


h <- h %>% np_regr_fill(years_back = 10, vars = 'td', scope = 'rgn_id')
h <- h %>% np_regr_fill(vars = 'tdy', scope = 'georgn_id')
h <- h %>% np_regr_fill(vars = 'tdy', scope = 'global')
## np_regr_fill() is a generalized regression gapfill function. Parameters (with defaults):
## * years_back=50 (int):     This determines how far back in the time series to include within the regression.
## * min_paired_obs=4 (int):  This determines how many paired observations are required to attempt a regression.
## * scope = 'rgn_id' (str):  ('rgn_id', 'georgn_id', 'global') Determines grouping scale for regression.
## * vars = 'tdy' (str):      ('td', 'tdy') Determines model: (tonnes ~ usd) or (tonnes ~ usd + year) [and vice versa]


h <- h %>% np_end_fill()
## For final year of data, if both usd and tonnes originally reported as NA, pull forward
## values for usd and tonnes from the previous year.  This should happen after regression fill.

h_comm <- h
## Store commodity-level data, before moving on to the product-level smoothing.


## Output gapfilling report to .csv files.
## Very few usd gapfilling, and none in recent years (data used to weight contributions), so will ignore this: gapfill=="r2_u_gr"
h_gap <- h %>%
  mutate(gapfill = ifelse(gapfill == "r2_u_gr", "none", gapfill)) %>%   # focusing only on tonnes gapfilling
  select(rgn_id, commodity, product, year, gapfill)

write.csv(h_gap, 'output/np_harvest_tonnes_gf.csv', row.names = FALSE, na = '')

8 Final Data Wranglng

8.1 Summarize values

Summarize each product per country per year, e.g., all corals in Albania in 2011. And, do some error checking.

h_prod <- h_comm %>%
  group_by(rgn_name, rgn_id, product, year) %>%
  summarize(tonnes = sum(tonnes, na.rm = TRUE), 
            usd = sum(usd, na.rm = TRUE))
          
## Error-checking and table exports to see if there are duplicates
stopifnot(sum(duplicated(h_prod[ , c('rgn_id', 'product', 'year')])) == 0)

8.2 Quick Data Check

Look at wide format with all commmodities and product subtotal (where commodity column value is “Z_TOTAL”), compared with the input data prior to summing.

h_x_tonnes <- h_comm %>% 
  bind_rows(mutate(h_prod, commodity='Z_TOTAL')) %>%
  select(rgn_name, rgn_id, commodity, product, year, tonnes) %>%
  arrange(rgn_name, product, commodity, year) %>%
  spread(year, tonnes)

h_x_usd <- h_comm %>% 
  bind_rows(mutate(h_prod, commodity='Z_TOTAL')) %>%
  select(rgn_name, rgn_id, commodity, product, year, usd) %>%
  arrange(rgn_name, product, commodity, year) %>%
  spread(year, usd)

## Check a random country and commodity
australia <- h_x_usd %>% filter(product == "ornamentals", rgn_name == "Australia") 
australia

## Can open up in Excel to compare subtotals per country-product-year
write.csv(h_x_tonnes, 'int/np_harvest_tonnes_wide.csv', row.names = FALSE, na = 'NA')
write.csv(h_x_usd,    'int/np_harvest_usd_wide.csv',    row.names = FALSE, na = 'NA')

8.3 Calculate Rolling Averages

Determine rolling averages for tonnes and USD in order to determine peak values. This is based upon total harvests by product group, not individual commodity.

# Find max year in the summarized data table
year_max <- max(h_prod$year)

roll_prod <- h_prod %>%
  arrange(rgn_id, product, year) %>%
  group_by(rgn_id, product) %>%
  mutate(
      tonnes_rollmean = rollapply(tonnes, width=4, FUN=mean, align='right', partial=TRUE, na.rm=FALSE),
      usd_rollmean    = rollapply(   usd, width=4, FUN=mean, align='right', partial=TRUE, na.rm=FALSE)) %>%
  rename(
      tonnes_orig = tonnes, # prevent overwriting of reported and gapfilled values
      usd_orig    = usd) %>% # prevent overwriting of reported and gapfilled values
  mutate(
      tonnes = ifelse(!is.na(tonnes_rollmean), tonnes_rollmean, tonnes_orig),
      usd    = ifelse(!is.na(usd_rollmean),    usd_rollmean,    usd_orig)) %>%
  select(rgn_id, rgn_name, product, year, tonnes, usd, tonnes_orig, usd_orig)

8.4 Score Harvest Relative to Peaks

Score harvest (tonnes and usd) relative to peaks. Output values as .csvs. Perform this for all given scenarios, using a for loop.

buffer  <-  0.35 # 35% buffer (from OHI Methods)
recent_years  <-  10

## Find peak harvest per region-product and apply conservative buffer (scale down)
## Find max USD value over the last 10 years 
peak_prod <- roll_prod %>%
    group_by(rgn_id, product) %>%
    mutate(tonnes_peak = max(tonnes, na.rm=T)  * (1 - buffer)) %>%
    mutate(usd_peak = max(usd[year >= (year_max - recent_years)], na.rm=T)) %>%
    ungroup() 

## for each product, all years (within a region) have the same usd_peak values, but some years don't have all the products. Use the most recent year as this is considered the most current product list. 
prod_weights <- peak_prod %>%
    filter(year==year_max) %>% 
    group_by(rgn_id) %>%
    mutate(
      usd_peak_allproducts = sum(usd_peak, na.rm=T),
      prod_weight = usd_peak / usd_peak_allproducts) %>%
    ungroup() %>%
  mutate(year = year_max) %>% 
  select(rgn_id, year, product, weight = prod_weight)

## Determine relative status:
  smooth_prod <- peak_prod %>% 
    mutate(tonnes_rel = ifelse(tonnes >= tonnes_peak, 1, tonnes / tonnes_peak))

8.5 Save data layer

## Write entire data frame to .csv:
write.csv(smooth_prod, 'int/np_harvest_smoothed_data.csv', row.names = FALSE, na = '')

## Write individual data layers:
## Write NP weights layer also used to calculate pressures and resilience:
write.csv(prod_weights, 'output/np_harvest_weights_from_usd.csv', row.names = FALSE, na = '')

## Save tonnes data
tonnes <- smooth_prod %>%
  select(rgn_id, product, year, tonnes) 
write.csv(tonnes, 'output/np_harvest_tonnes.csv', row.names = FALSE, na = '')

## Save relative tonnes data
tonnes_rel <- smooth_prod %>%
  select(rgn_id, product, year, tonnes_rel) 
write.csv(tonnes_rel, 'output/np_harvest_tonnes_rel.csv', row.names = FALSE, na = '')

8.6 Final data check

Comparing against last year’s data. For example, compare the data for 1976-2013 for the 2017 and 2018 assessment year. Do not need to look at new years of data (2014-2015), which is only available in the 2018 assessment year, since those did not exist in the previous year’s assessment.

Note: Republique du Congo, due to revision of 2011 data for ornamentals (and only one product that isn’t a big producer)

## Look at Commodities TONNES for a few countries
## Look at a few regions that previously had no scores: Malta, Cayman Islands, Curacao
## Previous assessment only went up to 2013??

## Malta
new <- read.csv("int/tonnes.csv") %>% 
  filter(rgn_id==68, year %in% c(1990:2015))
old <- read.csv("../v2016/int/tonnes.csv") %>%  # CHANGE YEAR
  filter(rgn_id==68, year %in% c(1990:2015)) %>% 
  rename(tonnes_old = tonnes)

compare <- old %>% 
full_join(new, by = c("rgn_id","rgn_name","commodity","product","year"))

## Cayman Islands
new <- read.csv("int/tonnes.csv") %>% 
  filter(rgn_id==113, year %in% c(1976:2015))
old <- read.csv("../v2016/int/tonnes.csv") %>%  # CHANGE YEAR
  filter(rgn_id==113, year %in% c(1976:2015)) %>% 
  rename(tonnes_old = tonnes)

compare <- old %>% 
full_join(new, by = c("rgn_id","rgn_name","commodity","product","year"))

## Curacao Islands
new <- read.csv("int/tonnes.csv") %>% 
  filter(rgn_id==244, year %in% c(1976:2015))
old <- read.csv("../v2016/int/tonnes.csv") %>%  # CHANGE YEAR
  filter(rgn_id==244, year %in% c(1976:2015)) %>% 
  rename(tonnes_old = tonnes)

compare <- old %>% 
full_join(new, by = c("rgn_id","rgn_name","commodity","product","year"))



## Look at a few regions that had large differences in status values: Vanuatu, Tunisia, Cuba, North Korea
## Take a look at Vanuatu: no diff in int tonnes, but significant diff in np_harvest_tonnes, checked gapfilling - gapfilled all '0' and NA values for tonnes using regression with georegion
new <- read.csv("int/tonnes.csv")
old <- read.csv("../v2016/int/tonnes.csv") # CHANGE YEAR
new_c <- filter(new, rgn_id==6) %>%
  arrange(commodity, year) %>% 
  rename(tonnes_new = "tonnes")
old_c <- filter(old, rgn_id==6) %>%
  arrange(commodity, year)

compare <- old_c %>% 
left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$tonnes_new, compare$tonnes)
abline(0,1, col="red")

## Look at Vanuatu final output: decreases in ornamental values from old assessment year to new assessment year
new_tonnes <- read.csv("output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==6, year %in% c(2010:2013))
old_tonnes <- read.csv("../v2016/output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==6, year %in% c(2010:2013)) %>% 
  rename(tonnes_old = tonnes)

compare <- old_tonnes %>% 
full_join(new_tonnes, by = c("rgn_id","product","year"))



## Take a look at Tunisia harvest tonnes:
new <- read.csv("output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==61, year %in% c(2010:2013))
old <- read.csv("../v2016/output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==61, year %in% c(2010:2013)) %>% 
  rename(tonnes_old = tonnes)

compare <- old %>% 
full_join(new, by = c("rgn_id","product","year"))

## Tunisia harvest smoothed data to see rel tonnes
smooth_new <- read.csv("int/np_harvest_smoothed_data.csv") %>% 
  filter(rgn_id==61, year %in% c(2010:2013)) %>% 
  select(rgn_id, product, year, tonnes, tonnes_rel)
smooth_old <- read.csv("../v2016/int/np_harvest_smoothed_data.csv") %>% 
  filter(rgn_id==61, year %in% c(2010:2013)) %>%
  select(rgn_id, product, year, tonnes, tonnes_rel) %>% 
  rename(tonnes_old = tonnes, tonnes_rel_old = tonnes_rel)

smoothed <- smooth_old %>% 
  full_join(smooth_new, by = c("rgn_id","product","year"))




## Take a look at Cuba: no diff in intermediate tonnes file
new <- read.csv("int/tonnes.csv")
old <- read.csv("../v2016/int/tonnes.csv") # CHANGE YEAR
new_c <- new %>% 
  filter(rgn_id==112, year %in% c(2007:2013)) %>%
  arrange(commodity, year) %>% 
  rename(tonnes_new = "tonnes")
old_c <- old %>% 
  filter(rgn_id==112, year %in% c(2007:2013)) %>%
  arrange(commodity, year)

compare <- old_c %>% 
  left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))
compare_full <- old_c %>% 
  full_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$tonnes_new, compare$tonnes)
abline(0,1, col="red")

## Just look at Cuba sponges, all years
new <- read.csv("int/tonnes.csv")
old <- read.csv("../v2016/int/tonnes.csv") # CHANGE YEAR
new_c <- new %>% 
  filter(rgn_id==112, product == "sponges") %>%
  arrange(commodity, year) %>% 
  rename(tonnes_new = "tonnes")
old_c <- old %>% 
  filter(rgn_id==112, product == "sponges") %>%
  arrange(commodity, year)

compare <- old_c %>% 
left_join(new_c, by = c("rgn_id","rgn_name","commodity","product","year"))


## Look at Cuba final tonnes output: minor diff in final output
new_tonnes <- read.csv("output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==112, year %in% c(2007:2013))
old_tonnes <- read.csv("../v2016/output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==112, year %in% c(2007:2013)) %>% 
  rename(tonnes_old = tonnes)

compare2 <- old_tonnes %>% 
  left_join(new_tonnes, by = c("rgn_id","product","year"))





## Take a look at Cuba smoothed data to find tonnes rel
smooth_new <- read.csv("int/np_harvest_smoothed_data.csv") %>% 
  filter(rgn_id==112, year %in% c(2010:2013)) %>% 
  select(rgn_id, product, year, tonnes, tonnes_rel)
smooth_old <- read.csv("../v2016/int/np_harvest_smoothed_data.csv") %>% 
  filter(rgn_id==112, year %in% c(2010:2013)) %>% 
  select(rgn_id, product, year, tonnes_old = tonnes, tonnes_rel_old = tonnes_rel)

compare <- smooth_old %>% 
left_join(smooth_new, by = c("rgn_id","product","year"))



## Look at North Korea final tonnes output:
new_tonnes <- read.csv("output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==21)
old_tonnes <- read.csv("../v2016/output/np_harvest_tonnes.csv") %>% 
  filter(rgn_id==21) %>% 
  rename(tonnes_old = tonnes)

compare <- old_tonnes %>% 
left_join(new_tonnes, by = c("rgn_id","product","year"))
plot(compare$tonnes_old, compare$tonnes)
abline(0,1, col="red")

## Take a look at North Korea smoothed data to find tonnes rel
smooth_new <- read.csv("int/np_harvest_smoothed_data.csv") %>% 
  filter(rgn_id==21, year %in% c(2010:2013)) %>% 
  select(rgn_id, product, year, tonnes, tonnes_rel)
smooth_old <- read.csv("../v2016/int/np_harvest_smoothed_data.csv") %>% 
  filter(rgn_id==21, year %in% c(2010:2013)) %>% 
  select(rgn_id, product, year, tonnes_old = tonnes, tonnes_rel_old = tonnes_rel)

compare <- smooth_old %>% 
left_join(smooth_new, by = c("rgn_id","product","year"))






## Look at a few random regions
## Take a look at Dominican Republic
new_c <- filter(new, rgn_id==115) %>%
  arrange(commodity, year) %>% 
  rename(tonnes_new = "tonnes")
old_c <- filter(old, rgn_id==115) %>%
  arrange(commodity, year)

compare <- old_c %>% 
left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$tonnes_new, compare$tonnes)
abline(0,1, col="red")


## Look at tonnes for Senegal
new_c <- filter(new, rgn_id==66) %>%
  arrange(commodity, year) %>% 
  rename(tonnes_new = "tonnes")
old_c <- filter(old, rgn_id==66) %>%
  arrange(commodity, year)

compare <- old_c %>% 
left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$tonnes_new, compare$tonnes)
abline(0,1, col="red")


## Look at tonnes for Greenland
new_c <- filter(new, rgn_id==145) %>%
  arrange(commodity, year) %>% 
  rename(tonnes_new = "tonnes")
old_c <- filter(old, rgn_id==145) %>%
  arrange(commodity, year)

compare <- old_c %>% 
left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$tonnes_new, compare$tonnes)
abline(0,1, col="red")


## Look at tonnes for Indonesia
new_c <- filter(new, rgn_id==216) %>%
  arrange(commodity, year) %>% 
  rename(tonnes_new = "tonnes")
old_c <- filter(old, rgn_id==216) %>%
  arrange(commodity, year)

compare <- old_c %>% 
left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$tonnes_new, compare$tonnes)
abline(0,1, col="red")





## Look at Commodities USD value for a few countries
new <- read.csv("int/usd.csv")
old <- read.csv("../v2016/int/usd.csv") # CHANGE YEAR

## Look at Republique du Congo
new_c <- filter(new, rgn_id==100) %>%
  arrange(commodity, year) %>%
  rename(usd_new = "usd")
old_c <- filter(old, rgn_id==100) %>%
  arrange(commodity, year)

compare <- old_c %>%
  left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$usd_new,compare$usd)
abline(0,1, col="red")

## Look at Bahamas
new_c <- filter(new, rgn_id==110) %>%
  arrange(commodity, year) %>%
  rename(usd_new = "usd")
old_c <- filter(old, rgn_id==110) %>%
  arrange(commodity, year)

compare <- old_c %>%
  left_join(new_c, by = c("commodity","year","rgn_id","rgn_name","product"))

plot(compare$usd_new,compare$usd)
abline(0,1, col="red")


## Look at final smoothed tonnes data for Republique du Congo
## Rolling average changes values slightly - make sure new data is not too different from previous year
new <- read.csv('output/np_harvest_tonnes.csv')
old <- read.csv('../v2016/output/np_harvest_tonnes.csv') # CHANGE YEAR


## Look at  Republique du Congo
new_c <- new %>% 
  filter(rgn_id==100) %>% 
  rename(tonnes_new = tonnes)
old_c <- old %>% 
  filter(rgn_id==100)

compare <- old_c %>%
  left_join(new_c, by = c("year","rgn_id","product"))

plot(compare$tonnes_new,compare$tonnes)
abline(0,1, col="red")


## Look at Bahamas
new_c <- new %>% 
  filter(rgn_id==110) %>% 
  rename(tonnes_new = tonnes)
old_c <- old %>% 
  filter(rgn_id==110)

compare <- old_c %>%
  left_join(new_c, by = c("year","rgn_id","product"))

plot(compare$tonnes_new,compare$tonnes)
abline(0,1, col="red")