This script creates the Sea Surface Temperature (SST) layer for the 2021 global Ocean Health Index assessment.
For the 2021 assessment, data year will match 2020, so data year 2020 represents year 2019, and so on.
Data comes from CoRTAD version 6
See prs_sst/v2015/dataprep.R
for preparation of the “annual_pos_anomalies” data.
Native Data Resolution: ~4km
Description: Cortadv6_SSTA.nc = SST anomalies (weekly SST minus weekly climatological SST), weekly data for all years, degrees Kelvin Cortadv6_weeklySST.nc = SST, weekly data for all years, degrees Kelvin
Time Range: 1982 - 2020 (weekly averages across all years)
Format: NetCDF Downloaded: August 10, 2021
::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/', message = FALSE, warning = FALSE)
knitr
library(raster)
library(RColorBrewer)
library(tidyverse)
library(rgdal)
library(doParallel)
library(foreach)
library(sf)
library(ncdf4)
library(httr)
library(lubridate)
library(animation)
library(ggplot2)
library(plotly)
library(here)
# spatial files, directories, etc
source("~/github/ohiprep_v2021/workflow/R/common.R")
<- file.path(dir_M, "git-annex/globalprep/_raw_data/CoRTAD_sst/d2021")
dir_data <- file.path(dir_M, "git-annex/globalprep/prs_sst/v2021/int")
dir_int <- file.path(dir_M, "git-annex/globalprep/prs_sst/v2021/output")
dir_output
ohi_rasters()
regions_shape()
<- 1982:2020
yrs
<- rev(colorRampPalette(brewer.pal(11, 'Spectral'))(255)) # rainbow color scheme
cols <- regions %>% subset(rgn_type %in% c("land", "land-disputed", "land-noeez")) land
## download URL
<- "https://data.nodc.noaa.gov/cortad/Version6"
url
## retrieve the netcdf data, SSTA (~98GB) and WeeklySST (~28GB)
## these take like 2 hours, it's a lot of data!!!
<- sprintf("%s/cortadv6_SSTA.nc", url)
ssta <- file.path(dir_M, "git-annex/globalprep/_raw_data/CoRTAD_sst/d2021/cortadv6_SSTA.nc")
ssta_filename <- httr::GET(ssta, write_disk(ssta_filename))
ssta_res
<- sprintf("%s/cortadv6_WeeklySST.nc", url)
weekly_sst <- file.path(dir_M, "git-annex/globalprep/_raw_data/CoRTAD_sst/d2021/cortadv6_WeeklySST.nc")
weekly_sst_filename <- httr::GET(weekly_sst, write_disk(weekly_sst_filename))
weekly_sst_res
closeAllConnections()
We consider anomalies the mean plus one standard deviation; these are the thresholds used to identify ‘extreme events’. Since the sea surface temperature anomaly data downloaded from CoRTAD is just the mean, we calculate standard deviation and count cases where the anomaly data exceeds the standard deviation (?)
## load netcdf uv radiation data
<- stack(list.files(dir_data, pattern = "SSTA.nc",
ssta full.names = TRUE), varname = "SSTA")
<- stack(list.files(dir_data, pattern = "WeeklySST.nc",
weekly_sst full.names = TRUE), varname = "WeeklySST")
<- names(ssta)
names_ssta <- names(weekly_sst)
names_weekly
<- names_ssta %>% # View(ssta_df)
ssta_df data.frame() %>%
rename(name = ".") %>%
mutate(year = substr(name, 2, 5),
month = substr(name, 7, 8),
day = substr(name, 10, 11)) %>%
mutate(week = week(as.Date(sprintf("%s-%s-%s", year, month, day))))
## the next for-loop takes a long time, ~22min for each of 53 layers
## create weekly standard deviations across all years
for(i in 5:53){
# i = 3
= Sys.time()
t0 print(paste("calculating sd for week", i, "-- started at", t0))
= stack()
s
for (j in yrs){ # FOR APPROACH OF USING REF PERIOD TO CALC EXTREME EVENTS: CHANGE 'YRS' HERE TO INCLUDE JUST REFERENCE YEARS
= which(substr(names_weekly, 2, 5) == j)[i]
w if(is.na(w)) next() # most yrs don't have 53 weeks; 'next' works in for loop but not foreach+dopar
= weekly_sst[[w]]
w_week = stack(s, w_week)
s
}
rasterOptions(todisk = FALSE)
# raster::calc(s, fun = function(x){sd(x, na.rm = TRUE)},
# progress ="text",
# filename = file.path(dir_int, sprintf("sd_sst_week_%s.tif", i)))
## try with parallel processing 10 cores.. this will speed it up significantly. 10 cores is a lot though, so if you NOT are running it overnight, maybe decrease to 6-8 cores (don't wanna hog cores during the workday).
::beginCluster(n = 10)
raster
<- raster::clusterR(s, fun = calc,
parallel_sd args = list(fun = sd, na.rm = TRUE))
endCluster()
# parallel_sd
# plot(parallel_sd)
writeRaster(parallel_sd, filename = file.path(dir_int, sprintf("sd_sst_week_%s.tif", i)))
# test <- raster::raster(file.path(dir_int, sprintf("sd_sst_week_%s.tif", "2")))
# plot(test)
# test
}
registerDoParallel(5)
= function(x){sum(x, na.rm = TRUE)} # takes raster stack object as x arg
combine_fun
# yrs <- yrs[12:39]
## calculate annual positive anomalies; ~17 minutes per year with 5 cores
for(j in yrs){
= Sys.time()
t0 print(paste("calculating anomaly for", j, "-- started at", t0))
= stack()
s
= ssta_df %>%
wks filter(year == j) %>%
select(week)
<- foreach(i = wks$week, .packages = c("raster", "ncdf4", "rgdal"), .combine = "stack") %dopar% {
s = raster::raster(file.path(dir_int, sprintf("sd_sst_week_%s.tif", i)))
sd_sst = which(substr(names_ssta, 2, 5) == j)[i]
w = ssta[[w]]
w_ssta ::overlay(w_ssta, sd_sst,
rasterfun = function(x, y){ifelse(is.na(x) | is.na(y), 0, ifelse(x > y, 1, 0))})
}
= combine_fun(s)
yr ::writeRaster(yr, filename = file.path(dir_int, sprintf("annual_pos_anomalies_sd_%s.tif", j)))
raster }
<- list.files(dir_int, pattern = "annual_pos_anomalies", full.names = TRUE)
anom_files
## time period for historical comparison (1985-1989)
<- c()
ref_years for(i in 1985:1989){ref_years = c(ref_years, grep(i, anom_files))}
<- stack(anom_files[ref_years]) %>% sum(.)
ref
## create land mask
= "+proj=longlat +ellps=WGS84 +no_defs"
anom_proj <- land %>% st_geometry %>% st_transform(anom_proj) %>% as("Spatial") # sf multipoly not working for mask...
land_mask
## calculate difference between recent 5-year cumulative sum and historical 1985-1989; writes in chunks of 5 files, every 5 files takes about 2 hrs and 15 mins. So with a total of 31 files should take ~14 hours to complete
registerDoParallel(5)
= Sys.time()
t0 foreach(i = seq(1986, max(yrs)-4)) %dopar% {
= i:(i + 4)
years
overlay(stack(anom_files[substr(anom_files, 81, 84) %in% years]) %>% sum(.),
ref, fun = function(x, y){x - y}) %>%
mask(land_mask, inverse = TRUE) %>%
writeRaster(filename = sprintf("%s/sst_diff_ocean_%s-%s.tif",
1], years[5]), overwrite = TRUE)
dir_int, years[
}Sys.time() - t0
## get data across all years
<- list.files(dir_int, pattern = "diff", full.names = TRUE)
diffs <- c()
vals
for(i in 1:length(diffs)){
= diffs[i] %>% raster() %>% getValues()
m = c(vals, m)
vals
}
## get min, max, and 99.99th quantile
<- min(vals, na.rm = TRUE) # -142 in v2018; -159 in v2021
min_val <- max(vals, na.rm = TRUE) # 182 in v2018; 228 in v2021
max_val <- quantile(vals, prob = 0.9999, na.rm = TRUE) # 128 for v2018; 148 for v2021
resc_num
## write the reference point; only if changed since the last assessment
<- "~/github/ohiprep_v2021/globalprep/supplementary_information/v2021" # should reflect assessment year
sup_info
<- read.csv(file.path(sup_info, "reference_points_pressures.csv"))
rescale $ref_point[rescale$pressure == "Sea Surface Temperature"] <- resc_num
rescalewrite.csv(rescale, file.path(sup_info, "reference_points_pressures.csv"), row.names = FALSE)
The minimum value is min_v
, maximum value is max_v
and the reference point is resc_num
.
## get diff files to rescale
<- list.files(dir_int, pattern = "diff.*tif", full.names = TRUE)
diffs
## read rescaling number from pressures reference points files
<- read.csv(file.path(sup_info, "reference_points_pressures.csv")) %>%
resc_num filter(pressure == "Sea Surface Temperature") %>%
$ref_point
.<- as.numeric(as.character(resc_num))
resc_num
registerDoParallel(6)
if(!file.exists(dir_output)){dir.create(path = dir_output)} # create directory if doesn't exist
foreach(i = 1:length(diffs)) %dopar% {
= raster(diffs[i])
r = substr(diffs[i], 72, 80)
y
if(file.exists(sprintf("%s/sst_%s_1985-1989.tif", dir_output, y))){
print(paste("skipping"))
else{
}
projection(r) = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
= projectRaster(r, crs = mollCRS, over = TRUE) %>%
out calc(., fun = function(x){ifelse(x > 0, ifelse(x > resc_num, 1, x/resc_num), 0)}) %>%
resample(., ocean, method = "ngb",
filename = sprintf("%s/sst_%s_1985-1989.tif", dir_output, y),
overwrite = TRUE)
} }
## compare 2008-2012 extremes between v2021 and v2018 assessement
<- raster(file.path(dir_output, "sst_2008-2012_1985-1989.tif"))
sst_v2021_test <- raster(sprintf("%s/git-annex/globalprep/prs_sst/v2018/output/%s",
sst_v2018_test "sst_2008-2012_1985-1989.tif"))
dir_M, <- data.frame(v2021 = getValues(sst_v2021_test),
df_tmp v2018 = getValues(sst_v2018_test))
<- df_tmp %>% mutate(rowcol = rownames(.)) %>%
df_samp filter(rowcol %in% sample(1:length(rowcol), 70000))
plot(df_samp$v2018, df_samp$v2021, "n")
points(cbind(df_samp$v2018, df_samp$v2021),
col = rgb(0, 0, 1, alpha = 0.01), pch = 16, cex = 2)
## load zonal data, load and check relevant rasters
<- file.path(dir_M, "git-annex/Global/NCEAS-Regions_v2014/data/sp_mol_raster_1km")
rast_loc <- read.csv(file.path(rast_loc, "regionData.csv"))
rgn_data
# rgns <- zones %>% getValues %>% unique
# length(rgns[rgns < 300 & is.na(rgns) == FALSE])
# zones <- raster(file.path(rast_loc, "sp_mol_raster_1km.tif")) # v2018 used zones=regions_eez_with_fao_ant.tif sourced from spatial.common instead
## sst rasters
<- list.files(dir_output, pattern = "sst_.*1985-1989.tif", full.names = TRUE)
sst_rasters plot(raster(sst_rasters[[length(sst_rasters)]])) # length(sst_rasters) # plot to check
## apply ice mask
<- raster(file.path(dir_M, "git-annex/Global/NCEAS-Pressures-Summaries_frazier2013/ice_mask_resampled"))
ice_mask
registerDoParallel(3)
foreach(rast = sst_rasters) %dopar% {
overlay(raster(rast), ice_mask,
fun = function(x, y) {x * y},
filename = file.path(sprintf("%s/sst_%s_rescaled_icemask.tif", dir_output,
substr(rast, 64, 82))),
overwrite = TRUE)
}
## rescaled and masked sst data
<- list.files(dir_output, "sst.*_rescaled_icemask.tif", full.names = TRUE)
sst_res_mask
## create gif visualizing the rescaled and masked sst
saveGIF({
for(i in 1:length(sst_res_mask)){
= sprintf("SST Pressure %s",
n substr(sst_res_mask[i], 64, 72))
plot(raster(sst_res_mask[i]),
zlim = c(0, 1), # fix zlimits
axes = FALSE, box = FALSE,
main = n)}},
ani.width = 750,
ani.height = 400,
movie.name = sprintf("%s/sst.gif", dir_output)) ## couldnt get this to work - v2021
## sst pressure, stack rasters
<- stack(list.files(dir_output,
sst_stack pattern = "sst_.*_rescaled_icemask.tif",
full.names = TRUE))
## some exploring
plot(sst_stack[[nlayers(sst_stack)]])
click(sst_stack[[nlayers(sst_stack)]])
plot(sst_stack[[31]])
## extract data by region
<- zonal(sst_stack, zones, fun = "mean", na.rm = TRUE,
regions_stats progress = "text") %>% data.frame
setdiff(regions_stats$zone, rgn_data$rgn_id) # check; antarctica high seas (268, 271, 278), 265 NA high seas region, conflict areas 255...
setdiff(rgn_data$rgn_id, regions_stats$zone)
## wrangle and save
<- merge(rgn_data, regions_stats, all.y = TRUE, by.x = "rgn_id", by.y = "zone") %>%
data write.csv(file.path(dir_output, "rgn_sst_prs.csv"), row.names = FALSE)
<- read.csv(file.path(dir_output, "rgn_sst_prs.csv"), stringsAsFactors = FALSE) data
## save data for the toolbox
for(years in c(2012:max(yrs))){
= sprintf("sst_%s.%s_1985.1989_rescaled_icemask", years-4, years)
scenario
= data %>%
eez filter(sp_type == "eez") %>%
select(rgn_id, contains(scenario)) %>%
rename(pressure_score = contains(scenario))
write.csv(eez, sprintf("output/sst_eez_%s.csv", years), row.names = FALSE)
# fao = filter(data, sp_type == "fao")
# fao = fao[, c("rgn_id", scenario)]
# names(fao)[names(fao) == scenario] = "pressure_score"
# write.csv(fao, sprintf("output/sst_fao_%s.csv", years), row.names = FALSE)
}
## sst has no gapfilling...
<- read.csv("output/sst_eez_2020.csv")
sst <- mutate(sst, pressure_score_gf = 0)
sst write.csv(sst, "output/sst_eez_2020_gf.csv", row.names = FALSE)
<- data.frame()
sst_final
for (year in 2012:2020){ # year = 2012
<- read.csv(sprintf("output/sst_eez_%s.csv", year))
prs
<- prs %>%
prs mutate(year = year) %>%
select(rgn_id, year, pressure_score)
<- rbind(sst_final, prs)
sst_final
}
write.csv(sst_final, "output/sst_updated.csv", row.names=FALSE)
## plot results to check, for most recent year
<- list.files(dir_output, pattern = "sst.*icemask.tif", full.names = TRUE)
res
plot(raster(res[[length(res)]]), axes = FALSE,
main = "Sea Surface Temperature Pressure Layer \n OHI 2021")
## compare with last year's data
<- read.csv("../v2018/output/sst_updated.csv")
old_sst <- old_sst %>%
compare filter(year == 2017) %>%
::select(rgn_id, old_pressure_score = pressure_score) %>%
dplyrleft_join(data, by = "rgn_id") %>%
filter(!(is.na(rgn_name))) %>%
filter(sp_type == "eez") %>%
::select(rgn_id, rgn_name, old_pressure_score,
dplyrmatches("sst_.*2012_1985.1989.*"),
matches("sst_.*2013_1985.1989.*"),
matches("sst_.*2014_1985.1989.*"),
matches("sst_.*2015_1985.1989.*"),
matches("sst_.*2016_1985.1989.*"),
matches("sst_.*2017_1985.1989.*"),
matches("sst_.*2018_1985.1989.*"),
matches("sst_.*2019_1985.1989.*"),
matches("sst_.*2020_1985.1989.*")) %>%
rename(sst_2012 = matches("sst_.*2012_1985.1989.*"),
sst_2013 = matches("sst_.*2013_1985.1989.*"),
sst_2014 = matches("sst_.*2014_1985.1989.*"),
sst_2015 = matches("sst_.*2015_1985.1989.*"),
sst_2016 = matches("sst_.*2016_1985.1989.*"),
sst_2017 = matches("sst_.*2017_1985.1989.*"),
sst_2018 = matches("sst_.*2018_1985.1989.*"),
sst_2019 = matches("sst_.*2019_1985.1989.*"),
sst_2020 = matches("sst_.*2020_1985.1989.*"))
<- ggplot(compare, aes(x = old_pressure_score, y = sst_2017, label = rgn_name)) +
p geom_point(shape = 19) +
theme_bw() +
geom_abline(intercept = 0, slope = 1) +
labs(title = "SST comparison")
ggplotly(p)
ggplot(compare, aes(x = sst_2020)) +
geom_histogram(fill = "gray", color = "black") +
theme_bw() +
labs(title = "SST 2021")
quantile(compare$sst_2017)
After combining sst data into the output table sst_updated.csv
, compare to last year. Compare scenario years that used data year 2017, the most recent shared year between v2021 and v2018.
<- read.csv("../v2018/output/sst_updated.csv") %>%
old_sst filter(year == 2017) %>%
select(rgn_id, old_prs_score = pressure_score)
<- read.csv("output/sst_updated.csv") %>%
sst filter(year == 2017) %>%
select(rgn_id, prs_score = pressure_score)
<- sst %>%
combine left_join(old_sst, by = "rgn_id")
plot(combine$old_prs_score, combine$prs_score)
<- ggplot(combine, aes(x = old_prs_score, y = prs_score, label = rgn_id)) +
p geom_point(shape = 19) +
theme_bw() +
geom_abline(intercept = 0, slope = 1) +
labs(title = "SST comparison v2021 and v2018 \n (data year 2017)")
ggplotly(p)
Selig, E.R., K.S. Casey, and J.F. Bruno (2010), New insights into global patterns of ocean temperature anomalies: implications for coral reef health and management, Global Ecology and Biogeography, DOI: 10.1111/j.1466-8238.2009.00522.x.