This layer preparation script does the following for newly available SLR data.
This process is completed entirely within this script. The raw data is downloaded externally and held on a server at NCEAS. Although the raw data is not provided, this script can be used on the data downloaded from Aviso here. You will need to register with Aviso in order to get a username and password for data access.
The source data are monthly mean sea level anomalies, in meters. These anomalies are calculated by subtracting the current absolute sea level for each month from the average sea level for that month calculated from 1993 - 2015.
Reference: The altimeter products were produced and
distributed by Aviso (http://www.aviso.altimetry.fr/), as part of the Ssalto
ground processing segment. AVISO
MSLA heights, monthly means
Downloaded: June 14, 2024
Description: Monthly mean sea level anomaly (meters
above mean sea level) (msla_h) Native data resolution:
0.25 degree grid cells
Time range: January 1993 - December 2022
Format: NetCDF
Citation information The altimeter products were
produced and distributed by Aviso (http://www.aviso.altimetry.fr/), as part of the Ssalto
ground processing segment. AVISO
MSLA heights, monthly means
required_packages <- c("httr", "R.utils", "raster", "tidyverse", "sf", "RColorBrewer", "googleVis", "maps", "parallel", "foreach", "doParallel", "fasterize", "rasterVis", "terra", "tictoc", "here", "plotly") # create list of required packages (add any new packages here)
install.packages(setdiff(required_packages, installed.packages()[,"Package"])) # install not installed packages
lapply(required_packages, require, character.only = TRUE) # check all are installed
# load packages
library(httr)
library(R.utils)
library(raster)
library(tidyverse)
library(sf)
library(RColorBrewer)
library(googleVis)
library(maps)
library(parallel)
library(foreach)
library(doParallel)
library(fasterize)
library(rasterVis)
library(terra)
library(tictoc)
library(plotly)
library(ggplot2)
setGDALconfig("GDAL_PAM_ENABLED", "FALSE")
#source('../../../workflow/R/common.R')
library(here) # sets working directory to project (ohiprep_v202X) base directory
source(here("workflow", "R", "common.R"))
## define paths and variables to use throughout data prep
scen_year <- 2024 # change to reflect assessment year!
dir_anx_aviso <- file.path(dir_M, "git-annex/globalprep/_raw_data/AVISO_slr")
dir_prs_slr <- sprintf("%s/git-annex/globalprep/prs_slr/v%s", dir_M, scen_year)
cols <- rev(colorRampPalette(brewer.pal(9, "Spectral"))(255)) # rainbow color scheme for maps
# assign the mollweide projection label, this does not transform the crs (for that we would use the function project(raster_name) after we create the raster)
mollCRS <- CRS("+proj=moll")
## read in ocean raster with cells at 1km -- template for resampling (held on an NCEAS server)
ocean <- terra::rast(file.path(dir_M, "git-annex/globalprep/spatial/v2017/ocean.tif"))
These chunks can be used to download the data from AVISO. You will need an AVISO account to do this, it might take a few days for the account to be active, or use the account of a prior fellow.
If you want to view the files to see if a full previous year of data is available to download, go here: https://tds.aviso.altimetry.fr/thredds/catalog/dataset-duacs-climatology-global/delayed-time/monthly_mean/msla_h/catalog.html
or here: ftp://yourlogin@ftp-access.aviso.altimetry.fr/climatology
And you will be prompted to either open up a FTP application, like Cyberduck, or to just plug in your username and password.
If you don’t want to go through the trouble of running the download process in R, you can do it manually by just click-and-drag between two cyberduck-2 browsers.
# This layer lags by 2 years, since we need a full year of data.
## need AVISO username and password, define the following line of code in console (as is), then when prompted, enter the username and password string, don't save the login information here in the script
## if either password or username contain the @ symbol, replace with '%40' (percent-encoding for reserved characters)
userpasswd <- readline("Type AVISO username and password, separated by colon no space:")
Define file paths for data download:
year <- 2022 # data year to download; lags by 2 years. 2023 goes to May as of now, and we need a full year's worth of data for v2024
months <- str_pad(1:12, 2, pad = "0") #if they upload the rest just change the numbers here to reflect rest of months
## download data from FTP and collect each month/year in raster stack
## if you are connecting to the data source through cyberduck rather than this script, the following string is what you enter for the server
url <- "ftp://ftp-access.aviso.altimetry.fr/climatology/global/delayed-time/monthly_mean/msla_h"
ub <- sprintf("ftp://%s@%s", userpasswd, substr(url, 7, 87)) # here, substr extracts the url string characters 7-87, which encompasses everything in `url` after "ftp://"
Pull monthly files from the source into the data directory:
# within dir_anx_aviso, create new folder for this scenario year of data so the following line runs, name the folder d20?? (scen_year)
folder_name_anx_aviso <- paste0("d", scen_year)
folder_path_anx_aviso <- file.path(dir_anx_aviso, folder_name_anx_aviso)
dir.create(folder_path_anx_aviso)
## download is quite fast, takes just a minute for 12 files
for (mo in months){
#mo="01" # for testing
## retrieve the data (compressed, gzipped files prior to 2021 data, and just .nc files starting with 2021 data)
# sprintf() contains %s in two places, which represent the two spots that will get filled with the two strings specified in the following arguments `ub` and `mo`
u <- sprintf("%s/dt_global_allsat_msla_h_y2022_m%s.nc", ub, mo)
u_filename <- file.path(dir_anx_aviso, paste0("d", scen_year), substr(u, 121, 157)) # this directs the file into the correct data folder (d202?) and names the file with the value of substr(u, 121, 156)
res <- httr::GET(u, write_disk(u_filename))
}
closeAllConnections()
Unzip the files you’ve just downloaded, if the data is prior to 2020:
# zipfiles <- list.files(file.path(dir_anx_aviso, paste0("d", scen_year)),
# full.names = TRUE, pattern = "*nc.gz")
# for(zipfile in zipfiles){
# message("Unzipping file: ", zipfile)
# R.utils::gunzip(zipfile, remove = TRUE, skip = TRUE)
# }
2020 data is stored in the source as unzipped .nc files, so the above code chunk was not necessary. This is likely the case for all years (past and future) since the url data source was updated.
All NetCDF files for each month are rasterized.
## d2016/msla_monthly_mean has data for 1993-2015
## also include list.files for d2017 through the data folder for current scenario year
nc_files <- c(list.files(file.path(dir_anx_aviso, "d2024"),
full.names = TRUE, pattern = ".nc"),
list.files(file.path(dir_anx_aviso, "d2023"),
full.names = TRUE, pattern = ".nc"),
list.files(file.path(dir_anx_aviso, "d2022"),
full.names = TRUE, pattern = ".nc"),
list.files(file.path(dir_anx_aviso, "d2021"),
full.names = TRUE, pattern = ".nc"),
list.files(file.path(dir_anx_aviso, "d2019"),
full.names = TRUE, pattern = ".nc"),
list.files(file.path(dir_anx_aviso, "d2018"),
full.names = TRUE, pattern = ".nc"),
list.files(file.path(dir_anx_aviso, "d2017"),
full.names = TRUE, pattern = ".nc"),
list.files(file.path(dir_anx_aviso, "d2016/msla_monthly_mean"),
full.names = TRUE, pattern = ".nc"))
The raw monthly data looks like this:
# removed axes = F argument
plot(terra::rast(nc_files[3]), col = cols,
main = paste("Year", substr(nc_files[3], 90, 93), "Month", substr(nc_files[3], 96, 97)))
The following code is used to:
The output is saved in the folder int/msla_monthly
# create the folders for the below loop
dir.create(dir_prs_slr)
folder_path_prs_slr_int <- file.path(dir_prs_slr, "int")
dir.create(folder_path_prs_slr_int)
folder_path_prs_slr_int_msla_monthly <- file.path(folder_path_prs_slr_int, "msla_monthly")
dir.create(folder_path_prs_slr_int_msla_monthly)
tic()
# do not run in parallel because it is not compatible with terra::project() (package might be updated in the future to accommodate parallelization)
# rotates each monthly file, sets the long/lat projection, and keeps only coastal cells - saved to GitHub
# takes ~201-370 seconds to run (~6 minutes)
for (i in seq_along(nc_files)){
m_yr <- substr(nc_files[i], nchar(nc_files[i])-10, nchar(nc_files[i])-3) # m_yr value example: "2015_m12"
# supress suxiliary files
setGDALconfig("GDAL_PAM_ENABLED", "FALSE")
## read in month raster
r <- terra::rast(nc_files[i]) %>%
rotate()
## define projection of the raster before reprojecting
terra::project(r, "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
## write raster to int folder in prs_slr
fp <- sprintf("%s/int/msla_monthly/msla_monthly_%s.tif", dir_prs_slr, m_yr)
terra::writeRaster(r, filename = fp, overwrite = TRUE)
# remove all extraneous auxillary files the end in .aux.json
unlink(sprintf("%s/int/msla_monthly/msla_monthly_%s.tif.aux.json", dir_prs_slr, m_yr))
}
toc()
# v2024 - 410.867 sec elapsed
Annual mean sea level anomaly rasters are calculated from the monthly data.
## create 'msla_annual_mean' folder in 'int'
folder_path_prs_slr_int_msla_annual_mean <- file.path(folder_path_prs_slr_int, "msla_annual_mean")
dir.create(folder_path_prs_slr_int_msla_annual_mean)
tic()
msla_files <- list.files(sprintf("%s/int/msla_monthly", dir_prs_slr),
full.names = TRUE)
maxyr <- substr(msla_files, 83, 86) %>% as.numeric() %>% max()
## stack all rasters for this year, and calc annual mean, then write as raster
registerDoParallel(6)
foreach(yr = c(1993:maxyr)) %dopar% {
files <- msla_files[str_detect(msla_files, as.character(yr))]
rast_annual_mean <- stack(files) %>%
calc(mean, na.rm = TRUE) %>%
terra::writeRaster(filename = sprintf("%s/int/msla_annual_mean/msla_annual_%s.tif", dir_prs_slr, yr),
overwrite = TRUE)
}
toc()
# v2024 - 10.983 sec elapsed
Since we are only interested in the increase in sea level near the coasts, we apply a mask to the raster layers that removes all cells farther than 3nm offshore. This mask was created previously for a former OHI global assessment.
## 3nm offshore raster to select only nearshore cells
#ocean_mask_prev <- sprintf("%s/int/ocean_mask.tif", dir_prs_slr, scen_year-1)
ocean_mask_prev <- file.path(dir_M, "git-annex/globalprep/prs_slr/v2019/int/ocean_mask.tif")
if(file.exists(ocean_mask_prev)){
file.copy(ocean_mask_prev, file.path(dir_prs_slr, "int"))
} else {
poly_3nm <- read_sf(file.path(dir_M, "git-annex/Global/NCEAS-Regions_v2014/data"), "rgn_offshore3nm_mol")
poly_3nm[duplicated(poly_3nm$rgn_id), ] # check to make sure there are no duplicated regions
## create rasterize 3 nautical miles offshore rasters if cannot copy from previous assessment folder
s <- terra::rasterize(poly_3nm, ocean, field = "rgn_id")
s <- calc(s, fun = function(x) {ifelse(x > 0, 1, NA)})
terra::writeRaster(s, sprintf("%s/int/ocean_mask.tif", dir_prs_slr, scen_year))
}
s <- terra::rast(sprintf("%s/int/ocean_mask.tif", dir_prs_slr, scen_year))
plot(s, col = "red")
annual_means <- list.files(file.path(dir_prs_slr, "int/msla_annual_mean"), full = TRUE)
# v2022 - tried to use for each with terra: Error in x@ptr$nrow() : external pointer is not valid
## create 'msla_annual_mol' folder in 'int'
folder_path_prs_slr_int_msla_annual_mol <- file.path(folder_path_prs_slr_int, "msla_annual_mol")
dir.create(folder_path_prs_slr_int_msla_annual_mol)
tic()
# unparallelize loop because working with `terra::project()` and `foreach()` threw the error "external pointer is not valid"
# total time for loop: 34.996 minutes, v2024
for(i in seq_along(annual_means)) {
yr <- str_sub(annual_means[i], -8, -5)
msla_int <- file.path(dir_prs_slr, "int")
rast_data <- terra::rast(annual_means[i]) %>%
terra::project(y = mollCRS) %>% # terra::project does not have an argument 'over' that was present in raster::projectRaster() that was set to TRUE to avoid mapping the same area twice, which is desirable for global data, but I do not think this is an issue
terra::resample(ocean, method = "ngb",
filename = sprintf("%s/msla_annual_mol/msla_annual_mol_%s.tif", # fixed mlsa to msla
msla_int, yr), overwrite = TRUE)
}
toc()
# v2024 - 2099.743 sec elapsed
## create 'msla_annual_mol_coastal' folder in 'int'
folder_path_prs_slr_int_msla_annual_mol_coastal <- file.path(folder_path_prs_slr_int, "msla_annual_mol_coastal")
dir.create(folder_path_prs_slr_int_msla_annual_mol_coastal)
tic()
annual_mol <- list.files(file.path(dir_prs_slr, "int/msla_annual_mol"), full = TRUE)
foreach(file = annual_mol) %dopar% { # file = annual_mol[2]
yr <- str_sub(file,-8,-5)
rast <- terra::rast(file)
terra::mask(terra::rast(file), s, filename = sprintf("%s/int/msla_annual_mol_coastal/msla_annual_mol_coastal_%s.tif",
dir_prs_slr, yr), overwrite = TRUE)
}
toc()
# v2024 - 1800.464 sec elapsed
plot(terra::rast(file.path(dir_prs_slr, "int/msla_annual_mol_coastal/msla_annual_mol_coastal_2010.tif")))
The reference point is the 99.99th quantile of the entire data distribution from 1993 - 2015. (This value has been updated due to changes in the source data, previously was 0.246225 m, currently is 0.3359385 m).
coastal_rasts <- list.files(file.path(dir_prs_slr, "int/msla_annual_mol_coastal"), pattern = "tif", full.names = TRUE)
## get data across all years to 2015
## takes a really long times; added foreach dopar to speed...
## doesn't really need to be recalcuated each year unless there's reason to believe source updated past years data
## script was updated in 2022 to convert from raster to terra, so terra functions were added to the following loop but not run because no reason to believe source updated past years data
# tic()
# registerDoParallel(8)
#
# # vals <- foreach(i = 1993:2015, .combine = c) %dopar% { # i = 1993
# # coastal_rasts[which(str_sub(coastal_rasts, -8, -5) == i)] %>%
# # #raster() %>%
# # terra::rast() %>%
# # #getValues() %>%
# # terra::values() #%>%
# # #na.omit()
# # }
#
# # possibly better code, used by Gage in v2023:
# vals <- foreach(i = 1993:2015, .combine = c) %dopar% { # i = 1993
# coastal_rasts[which(str_sub(coastal_rasts, -8, -5) == i)] %>%
# terra::rast() %>%
# terra::values() %>%
# as.data.frame() %>%
# filter(!is.na(layer))
#
# # na.omit()
# }
# toc()
# for first code option:
# ref_point_slr <- quantile(vals, 0.9999)
# for second code option:
# ref_point_slr <- quantile(unlist(vals), 0.9999)
# v2023 - we recalculated ref_point_slr and were getting varying numbers, but a lot similar to the v2019 reference point below and the source does not seem to have been updated, so we will continue with the ref_point_slr below
# v2024 - even with parallel computing and utilizing multiple cores, my R session would abnormally terminate every time it was run (three times, each taking about 20 minutes). I will assume the reference point is correct as the source data has no indication of change. I will continue to use the v2019 reference point (0.335938483476639). -SL
## If not rerunning the above, use this (taken from v2019 reference point csv)
ref_point_slr <- 0.335938483476639
dir_refpt <- "../../supplementary_information"
if(file.exists(sprintf("%s/v%s/reference_points_pressures.csv", dir_refpt, scen_year))){
## if already created and been partially updated for this assessment, don't want to overwrite with v2016 csv...
ref_tab <- read_csv(sprintf("%s/v%s/reference_points_pressures.csv", dir_refpt, scen_year))
} else {
## grab ref file from v2016 if doesn't exist yet in current assessment 'supplementary information' folder
ref_tab <- read_csv(file.path(dir_refpt, "v2016/reference_points_pressures.csv"))
}
ref_tab$ref_point[ref_tab$pressure == "Sea Level Rise"] <- ref_point_slr # set sea level rise reference to the 99.99 percentile
# create v20?? folder in supplementary_information (only use if needed)
# used to create v2024
# folder_name_supplementary_information_v_scen_year <- paste0("v", scen_year)
# folder_path_supplementary_information_v_scen_year <- file.path(dir_refpt, folder_name_supplementary_information_v_scen_year)
# dir.create(folder_path_supplementary_information_v_scen_year)
# write to reference_points_pressures.csv
write.csv(ref_tab, sprintf("%s/v%s/reference_points_pressures.csv", dir_refpt, scen_year), row.names = FALSE)
## grab reference value from the supp_info csv
ref <- read_csv(sprintf("%s/v%s/reference_points_pressures.csv", dir_refpt, "2024")) %>%
filter(pressure == "Sea Level Rise") %>%
.$ref_point %>%
as.numeric()
ref <- read_csv("globalprep/supplementary_information/v2024/reference_points_pressures.csv") %>%
filter(pressure == "Sea Level Rise") %>%
.$ref_point %>%
as.numeric()
Each annual raster is rescaled from 0 to 1 using the reference point. If a value is greater than the reference point, it is automatically given a value of 1.
# create output folder in prs_slr v20??
folder_path_prs_slr_output <- file.path(dir_prs_slr, "output")
dir.create(folder_path_prs_slr_output)
# foreach() & dopar did not work well with terra in this context
# first define functions outside of app()
neg_vals <- function(x){ifelse(x < 0, 0, x)}
greater_vals <- function(x){ifelse(x > ref, 1, x/ref)}
tic()
for(i in seq_along(coastal_rasts)) { # i = coastal_rasts[26]
yr <- str_sub(coastal_rasts[i], -8,-5)
if(file.exists(sprintf("%s/output/slr_%s.tif", dir_prs_slr, yr))){
message("skipping")
}else{
terra::rast(coastal_rasts[i]) %>%
terra::app(fun = neg_vals) %>% # set all negative values to 0
terra::app(fun = greater_vals, # set equal to one if greater than ref, otherwise scale
filename = sprintf("%s/output/slr_%s.tif", dir_prs_slr, yr), overwrite = TRUE)
}
}
toc()
# v2024 - 4259.81 sec elapsed
r <- terra::rast(sprintf("%s/output/slr_%s.tif", dir_prs_slr, scen_year - 2))
plot(ocean, col = "cornsilk2", axes = FALSE, box = FALSE, main = "Sea Level Rise Pressure 2020", legend = FALSE)
plot(r, col = cols, axes = FALSE, box = FALSE, add = TRUE)
# # get current scenario year most recent year of data
# r_new_raster <- raster::raster(sprintf("%s/output/slr_%s.tif", dir_prs_slr, scen_year - 2))
# # get current scenario year most recent year of data IN COMMON (ie. will likely be one more year back)
# r_new_old_raster <- raster::raster(sprintf("%s/output/slr_%s.tif", dir_prs_slr, scen_year - 3))
# # get previous scenario year most recent year of data IN COMMON (ie. will likely be one more year back)
# r_old_raster <- raster::raster(sprintf("%s/git-annex/globalprep/prs_slr/v%s/output/slr_%s.tif", dir_M, scen_year - 1, scen_year - 3))
# # changed above lines back to raster package because terra::rast was causing the histograms not to work
# # Error in UseMethod("histogram") :
# # no applicable method for 'histogram' applied to an object of class "data.frame"
#
# rasterVis::histogram(r_new_raster, main = sprintf("Sea Level Pressure %s current version (v%s) data", scen_year - 2, scen_year)) # v2023 - plots v2023 2021 data
# rasterVis::histogram(r_new_old_raster, main = sprintf("Sea Level Pressure %s current version (v%s) data", scen_year - 3, scen_year)) # v2023 - plots v2023 2020 data
# rasterVis::histogram(r_old_raster, main = sprintf("Sea Level Pressure %s previous version (v%s) data", scen_year - 3, scen_year - 1)) # v2023 - plots v2022 2020 data
# updated viz route for histograms if wanting to use terra
# get current scenario year most recent year of data
r_new_terra <- terra::rast(sprintf("%s/output/slr_%s.tif", dir_prs_slr, scen_year - 2))
# get current scenario year most recent year of data IN COMMON (ie. will likely be one more year back)
r_new_old_terra <- terra::rast(sprintf("%s/output/slr_%s.tif", dir_prs_slr, scen_year - 3))
# get previous scenario year most recent year of data IN COMMON (ie. will likely be one more year back)
r_old_terra <- terra::rast(sprintf("%s/git-annex/globalprep/prs_slr/v%s/output/slr_%s.tif", dir_M, scen_year - 1, scen_year - 3))
terra::hist(r_new_terra, main = sprintf("Sea Level Pressure %s current version (v%s) data", scen_year - 2, scen_year), breaks = 70) # v2024 - plots v2024 2022 data
terra::hist(r_new_old_terra, main = sprintf("Sea Level Pressure %s current version (v%s) data", scen_year - 3, scen_year), breaks = 70) # v2024 - plots v2024 2021 data
terra::hist(r_old_terra, main = sprintf("Sea Level Pressure %s previous version (v%s) data", scen_year - 3, scen_year - 1), breaks = 70) # v2024 - plots v2023 2021 data
## raster/zonal data, zones tifs created & spatial rgn updated in 2017
slr_loc <- file.path(dir_prs_slr, "output")
rasts <- list.files(slr_loc, full.names = TRUE) %>% str_subset(pattern = ".tif$")
#stack_slr <- terra::rast(rasts) # read in raster files
stack_slr <- stack(rasts)
zones <- raster(file.path(dir_M, "git-annex/globalprep/spatial/v2017/regions_eez_with_fao_ant.tif"))
rgn_data <- read_sf(file.path(dir_M, "git-annex/globalprep/spatial/v2017"), "regions_2017_update") %>%
st_set_geometry(NULL) %>%
dplyr::filter(rgn_type == "eez") %>%
dplyr::select(rgn_id = rgn_ant_id, rgn_name)
## extract data for each region
## fyi takes awhile... about 2 hours for v2019....
#terra::extent(stack_slr) <- terra::extent(zones)
tic()
regions_stats <- zonal(stack_slr, zones, fun = "mean", na.rm = TRUE, progress = "text") %>% data.frame()
toc()
# v2024 - 3188.695 sec elapsed
setdiff(regions_stats$zone, rgn_data$rgn_id) # High Seas regions are in there, makes sense....no land
#[1] 260 261 262 263 264 266 267 269 270 272 273 274 275 276 277
setdiff(rgn_data$rgn_id, regions_stats$zone) #integer(0)
regions_stats_clean <- regions_stats %>%
rename(rgn_id = zone) %>%
filter(rgn_id <= 250) %>%
gather("year", "pressure_score", -1) %>%
mutate(year = as.numeric(as.character(substring(year, 7, 8)))) %>%
mutate(year = 1993 + year - 1) # convert from numbers to actual years
write.csv(regions_stats_clean, "output/slr.csv", row.names = FALSE)
regions_stats_clean <- read_csv("output/slr.csv")
## visualize data
plotData <- regions_stats_clean %>%
left_join(rgn_data, by = "rgn_id") %>%
dplyr::select(rgn_name, year, pressure_score) %>%
dplyr::arrange(rgn_name, year) %>%
data.frame()
# this does not work at the moment, probably due to lack of Flash support
# Motion <- gvisMotionChart(plotData, idvar = "rgn_name", timevar = "year")
# plot(Motion)
# print(Motion, file = "slr.html")
# line charts for all regions (hover for names; useful for seeing general trend info)
line_plot <- plot_ly(plotData, x = ~year, y = ~pressure_score, color = ~rgn_name, type = "scatter", mode = "lines") %>%
layout(title = "All Regions: Pressure Score Line Charts",
xaxis = list(title = "Year"),
yaxis = list(title = "Pressure Score"))
# display plot
line_plot
# save plot
htmlwidgets::saveWidget(line_plot, file = "slr.html")
new_data <- read.csv("output/slr.csv") %>%
dplyr::select(rgn_id, year, new_pressure_score = pressure_score)
old_data <- read.csv(sprintf("../v%s/output/slr.csv", scen_year - 1)) %>%
left_join(new_data, by = c("year", "rgn_id"))
plot(old_data$pressure_score, old_data$new_pressure_score, ylab = "new score", xlab = "old score")
abline(0, 1, col = "red")
# create data frame with scores that changed by more than 0.05
old_data_pscore <- old_data %>%
filter(year == "2021") %>%
select(rgn_id, year, pressure_score)
new_data_pscore <- new_data %>%
filter(year == "2022")
slr_new_old <- new_data_pscore %>%
left_join(old_data_pscore, by = 'rgn_id')
mjr_score_change <- slr_new_old %>%
mutate(diff = new_pressure_score - pressure_score) %>%
filter(abs(diff) > 0.05) %>%
mutate(abs_diff = abs(diff))
# save major score changes to output folder as csv
write.csv(mjr_score_change, "output/major_changes_2024.csv")
slr_status_plot <- ggplot(slr_new_old,
aes(x = pressure_score, y = new_pressure_score, key = rgn_id)) +
geom_point(alpha = .6) +
theme(legend.position = 'none') +
geom_abline(slope = 1, intercept = 0, color = 'red') +
labs(x = 'SLR status v2023 (data through Dec 2021)',
y = 'SLR status v2024 (data through Dec 2022)',
title = 'Comparing SLR status: 2024 vs 2023')
slr_status_plot
library(plotly)
interactive_plot <- ggplotly(slr_status_plot)
interactive_plot
bar_plot <- ggplot(mjr_score_change, aes(x = factor(rgn_id), y = diff)) +
geom_bar(stat = 'identity', fill = 'cornflowerblue', alpha = 0.8) +
labs(x = 'Region ID',
y = 'Difference in Pressure Score',
title = 'Major Score changes between New (v2024) and Old (v2023) Pressure Scores') +
theme_minimal(base_size = 8) +
theme(axis.text.x = element_text(angle = 90, size = 5, margin = margin(t = 0, r = -3, b = 0, l = -3)),
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
bar_plot # bar plot of all 118 major score changes
boxplot_diff <- ggplot(mjr_score_change, aes(y = diff)) +
geom_boxplot(fill = 'cornflowerblue', alpha = 0.7) +
labs(y = 'Difference in Pressure Score (v2024 vs v2023)',
title = 'Distribution of Differences between New and Old Pressure Scores') +
theme_minimal(base_size = 11) +
theme(axis.title.y = element_text(size = 9))
boxplot_diff