ohi logo
OHI Science | Citation policy

Summary

This script creates the Sea Surface Temperature (SST) layer for the 2022 global Ocean Health Index assessment.


Updates from previous assessment

Used updated 2021 data from NOAA


Data Source

Data come from NOAA’s NCEI: CoRTAD version 6

See prs_sst/v2015/dataprep.R for preparation of the “annual_pos_anomalies” data.

Native Data Resolution: ~4km
Description: Cortadv6_SSTA.nc = SST anomalies (weekly SST minus weekly climatological SST), weekly data for all years, degrees Kelvin Cortadv6_weeklySST.nc = SST, weekly data for all years, degrees Kelvin
Time Range: 1982 - 2021 (weekly averages across all years)
Format: NetCDF Downloaded: June 22, 2022


Methods

  1. Extreme events per year based calculated as number of times SST anomaly exceeds SST Standard Deviation based on weekly values (annual_pos_anomalies data, see v2015/dataprep.R for analysis).
  2. Sum extreme events for five year periods to control for yearly variation.
  3. Change in extreme events: Subtract number of extreme events for each five year period from control period (1985-1989).
  4. Rescale “Change in extreme events” data to values between 0 and 1 by dividing by the 99.99th quantile among all years of data.

Setup

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/', message = FALSE, warning = FALSE)


library(raster)
library(RColorBrewer)
library(tidyverse)
library(rgdal)
library(doParallel)
library(foreach)
library(sf)
library(ncdf4)
library(httr)
library(lubridate)
library(animation)
library(plotly)
library(here)


# spatial files, directories, etc
source(here("workflow/R/common.R"))

scen_year <- "2022"

dir_data <- paste0(dir_M, "/git-annex/globalprep/_raw_data/CoRTAD_sst/d", scen_year)
dir_int  <- paste0(dir_M, "/git-annex/globalprep/prs_sst/v", scen_year, "/int")
dir_output  <- paste0(dir_M, "/git-annex/globalprep/prs_sst/v", scen_year, "/output")


ohi_rasters()
regions_shape()

yrs <- 1982:2021

cols <- rev(colorRampPalette(brewer.pal(11, 'Spectral'))(255)) # rainbow color scheme
land <- regions %>% subset(rgn_type %in% c("land", "land-disputed", "land-noeez"))

Get new data if available

## download URL
url <- "https://data.nodc.noaa.gov/cortad/Version6"

## retrieve the netcdf data, SSTA (~98GB) and WeeklySST (~28GB)
## these take like 2 hours, it's a lot of data!!!
ssta <- sprintf("%s/cortadv6_SSTA.nc", url)
ssta_filename <- paste0(dir_M, "/git-annex/globalprep/_raw_data/CoRTAD_sst/d", scen_year, "/cortadv6_SSTA.nc")
ssta_res <- httr::GET(ssta, write_disk(ssta_filename))

weekly_sst <- sprintf("%s/cortadv6_WeeklySST.nc", url)
weekly_sst_filename <- paste0(dir_M, "/git-annex/globalprep/_raw_data/CoRTAD_sst/d", scen_year, "/cortadv6_WeeklySST.nc")
weekly_sst_res <- httr::GET(weekly_sst, write_disk(weekly_sst_filename))

closeAllConnections()

Generate annual positive anomalies

We consider anomalies the mean plus one standard deviation; these are the thresholds used to identify ‘extreme events’. Since the sea surface temperature anomaly data downloaded from CoRTAD is just the mean, we calculate standard deviation and count cases where the anomaly data exceeds the standard deviation (?)

## load netcdf uv radiation data
ssta         <- stack(list.files(dir_data, pattern = "SSTA.nc",
                                 full.names = TRUE), varname = "SSTA")
weekly_sst   <- stack(list.files(dir_data, pattern = "WeeklySST.nc",
                                 full.names = TRUE), varname = "WeeklySST")

names_ssta   <- names(ssta)
names_weekly <- names(weekly_sst)

ssta_df <- names_ssta %>% # View(ssta_df)
  data.frame() %>% 
  rename(name = ".") %>% 
  mutate(year = substr(name, 2, 5), 
         month = substr(name, 7, 8), 
         day = substr(name, 10, 11)) %>% 
  mutate(week = week(as.Date(sprintf("%s-%s-%s", year, month, day))))

## the next for-loop takes a long time, ~22min for each of 53 layers (2021)
## 2022 - upped to 20 cores and ran over night - averaged ~14 min per layer

## create weekly standard deviations across all years

for(i in 1:53){
  
 # i = 3
  t0 = Sys.time()
  print(paste("calculating sd for week", i, "-- started at", t0))
  s = stack()
  
  for (j in yrs){ # FOR APPROACH OF USING REF PERIOD TO CALC EXTREME EVENTS: CHANGE 'YRS' HERE TO INCLUDE JUST REFERENCE YEARS
    w = which(substr(names_weekly, 2, 5) == j)[i]
    if(is.na(w)) next() # most yrs don't have 53 weeks; 'next' works in for loop but not foreach+dopar
    w_week = weekly_sst[[w]]
    s = stack(s, w_week)
  }
  

  rasterOptions(todisk = FALSE) 
  
  # raster::calc(s, fun = function(x){sd(x, na.rm = TRUE)},
  #                progress ="text",
  #                filename = file.path(dir_int, sprintf("sd_sst_week_%s.tif", i)))
  
  
  ## try with parallel processing 10 cores.. this will speed it up significantly. 10 cores is a lot though, so if you NOT are running it overnight, maybe decrease to 6-8 cores (don't wanna hog cores during the workday). 
  
  raster::beginCluster(n = 10)
  
  parallel_sd <- raster::clusterR(s, fun = calc,
                            args = list(fun = sd, na.rm = TRUE))

  endCluster()
  
  # parallel_sd
  # plot(parallel_sd)
  
  writeRaster(parallel_sd, filename = file.path(dir_int, sprintf("sd_sst_week_%s.tif", i)))


# test <- raster::raster(file.path(dir_int, sprintf("sd_sst_week_%s.tif", "2")))
# plot(test)
# test
#   

}

# when running this overnight I tried bumping up the number of cores to greater than 5 and ran into errors several times adjusting the number until I returned the number of cores to 5 and then it ran error free... 
registerDoParallel(5)
combine_fun = function(x){sum(x, na.rm = TRUE)} # takes raster stack object as x arg

yrs <- 1999:2021

## calculate annual positive anomalies; ~17 minutes per year with 5 cores
for(j in yrs){
  t0 = Sys.time()
  print(paste("calculating anomaly for", j, "-- started at", t0))
  s = stack()
  
  wks = ssta_df %>% 
    filter(year == j) %>% 
    select(week)
  
  s <- foreach(i = wks$week, .packages = c("raster", "ncdf4", "rgdal"), .combine = "stack") %dopar% {
    sd_sst = raster::raster(file.path(dir_int, sprintf("sd_sst_week_%s.tif", i))) 
    w = which(substr(names_ssta, 2, 5) == j)[i]
    w_ssta = ssta[[w]]
    raster::overlay(w_ssta, sd_sst, 
                    fun = function(x, y){ifelse(is.na(x) | is.na(y), 0, ifelse(x > y, 1, 0))})
  }
  
  yr = combine_fun(s)
  raster::writeRaster(yr, filename = file.path(dir_int, sprintf("annual_pos_anomalies_sd_%s.tif", j)))
}

Create 5 year cumulative sum of extreme events and calculate difference from historical

anom_files <- list.files(dir_int, pattern = "annual_pos_anomalies", full.names = TRUE)

## time period for historical comparison (1985-1989)
ref_years <- c()
for(i in 1985:1989){ref_years = c(ref_years, grep(i, anom_files))}
ref <- stack(anom_files[ref_years]) %>% sum(.)

## create land mask
anom_proj = "+proj=longlat +ellps=WGS84 +no_defs"
land_mask <- land %>% st_geometry %>% st_transform(anom_proj) %>% as("Spatial") # sf multipoly not working for mask...

## 2021 took ~14 hours to complete
## 2022 ***took over 72 hours to complete*** for efficiency's sake I created these rasters in chunks overnight and during the weekend. Need to diagnose the drastic time increase.

registerDoParallel(5)
t0 = Sys.time()
# foreach(i = seq(2013, max(yrs)-4)) %dopar% {
foreach(i = seq(1986, 2021)) %dopar% {
  years = i:(i + 4)
  
  overlay(stack(anom_files[substr(anom_files, 81, 84) %in% years]) %>% sum(.), 
          ref, 
          fun = function(x, y){x - y}) %>%
    mask(land_mask, inverse = TRUE) %>% 
    writeRaster(filename = sprintf("%s/sst_diff_ocean_%s-%s.tif", 
                                   dir_int, years[1], years[5]), overwrite = TRUE)
}
Sys.time() - t0

Reference Point

## get data across all years
diffs <- list.files(dir_int, pattern = "diff", full.names = TRUE)
vals <- c()

for(i in 1:length(diffs)){
  m = diffs[i] %>% 
    raster() %>% 
    getValues()
  vals = c(vals, m)
}

## get min, max, and 99.99th quantile
min_val   <- min(vals, na.rm = TRUE) # -142 in v2018; -159 in v2021; 157 in v2022
max_val   <- max(vals, na.rm = TRUE) # 182 in v2018; 228 in v2021; 247 in v2022
resc_num  <- quantile(vals, prob = 0.9999, na.rm = TRUE) # 128 for v2018; 148 for v2021; 148 for v2022

## write the reference point; only if changed since the last assessment
sup_info <- here(paste0("globalprep/supplementary_information/v", scen_year)) # should reflect assessment year

rescale <- read.csv(file.path(sup_info, "reference_points_pressures.csv"))
 rescale$ref_point[rescale$pressure == "Sea Surface Temperature"] <- resc_num 
 write.csv(rescale, file.path(sup_info, "reference_points_pressures.csv"), row.names = FALSE)

The minimum value is min_v, maximum value is max_v and the reference point is resc_num.

Rescaling

## get diff files to rescale
diffs <- list.files(dir_int, pattern = "diff.*tif", full.names = TRUE)

## read rescaling number from pressures reference points files
resc_num <- read.csv(file.path(sup_info, "reference_points_pressures.csv")) %>%
  filter(pressure == "Sea Surface Temperature") %>%
  .$ref_point
resc_num <- as.numeric(as.character(resc_num))

registerDoParallel(6)
if(!file.exists(dir_output)){dir.create(path = dir_output)} # create directory if doesn't exist

foreach(i = 1:length(diffs)) %dopar% {
  

  r = raster(diffs[i])
  y = substr(diffs[i], 72, 80)
  
  if(file.exists(sprintf("%s/sst_%s_1985-1989.tif", dir_output, y))){
    
    print(paste("skipping"))
  }else{

  projection(r) = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
  
  out = projectRaster(r, crs = mollCRS, over = TRUE) %>%
        calc(., fun = function(x){ifelse(x > 0, ifelse(x > resc_num, 1, x/resc_num), 0)}) %>%
        resample(., ocean, method = "ngb", 
                 filename = sprintf("%s/sst_%s_1985-1989.tif", dir_output, y), 
                 overwrite = TRUE)
  
  }
}
## compare 2008-2012 extremes between v2021 and v2018 assessement 
sst_v2022_test <- raster(file.path(dir_output, "sst_2008-2012_1985-1989.tif"))
sst_v2021_test <- raster(sprintf("%s/git-annex/globalprep/prs_sst/v2021/output/%s", 
                                      dir_M, "sst_2008-2012_1985-1989.tif"))
df_tmp <- data.frame(v2022 = getValues(sst_v2022_test), 
                     v2021 = getValues(sst_v2021_test))
df_samp <- df_tmp %>% mutate(rowcol = rownames(.)) %>% 
  filter(rowcol %in% sample(1:length(rowcol), 70000))
plot(df_samp$v2021, df_samp$v2022, "n")
points(cbind(df_samp$v2021, df_samp$v2022), 
       col = rgb(0, 0, 1, alpha = 0.01), pch = 16, cex = 2)

Zonal extraction of SST data

## load zonal data, load and check relevant rasters
rast_loc <- file.path(dir_M, "git-annex/Global/NCEAS-Regions_v2014/data/sp_mol_raster_1km")
rgn_data <- read.csv(file.path(rast_loc, "regionData.csv")) 

# rgns <- zones %>% getValues %>% unique
# length(rgns[rgns < 300 & is.na(rgns) == FALSE])
# zones <- raster(file.path(rast_loc, "sp_mol_raster_1km.tif")) # v2018 used zones=regions_eez_with_fao_ant.tif sourced from spatial.common instead
## sst rasters
sst_rasters <- list.files(dir_output, pattern = "sst_.*1985-1989.tif", full.names = TRUE)
plot(raster(sst_rasters[[length(sst_rasters)]])) # length(sst_rasters) # plot to check

## apply ice mask
ice_mask <- raster(file.path(dir_M, "git-annex/Global/NCEAS-Pressures-Summaries_frazier2013/ice_mask_resampled"))


## v2022 - took a little over an hour
registerDoParallel(3)
foreach(rast = sst_rasters) %dopar% {
  overlay(raster(rast), ice_mask, 
          fun = function(x, y) {x * y}, 
          filename = file.path(sprintf("%s/sst_%s_rescaled_icemask.tif", dir_output, 
                                       substr(rast, 64, 82))),
          overwrite = TRUE)
}
## rescaled and masked sst data
sst_res_mask <- list.files(dir_output, "sst.*_rescaled_icemask.tif", full.names = TRUE)

## create gif visualizing the rescaled and masked sst
saveGIF({
  for(i in 1:length(sst_res_mask)){
    n = sprintf("SST Pressure %s", 
                substr(sst_res_mask[i], 64, 72))
    plot(raster(sst_res_mask[i]), 
         zlim = c(0, 1), # fix zlimits
         axes = FALSE, box = FALSE, 
         main = n)}}, 
  ani.width = 750,
  ani.height = 400,
  movie.name = sprintf("%s/sst.gif", dir_output)) # v2022 - output weird gibberish below but successfully created the gif
## sst pressure, stack rasters
sst_stack <- stack(list.files(dir_output, 
                              pattern = "sst_.*_rescaled_icemask.tif", 
                              full.names = TRUE))

## some exploring; run these two lines together, potentially in console - only worked when rendered in the "Plots" pane not as an inline output in the document
plot(sst_stack[[nlayers(sst_stack)]])
click(sst_stack[[nlayers(sst_stack)]])

plot(sst_stack[[32]])

## extract data by region
## v2022 - ~45 min to run
regions_stats <- zonal(sst_stack, zones, fun = "mean", na.rm = TRUE,
                       progress = "text") %>% data.frame()

setdiff(regions_stats$zone, rgn_data$rgn_id) # check; antarctica high seas (268, 271, 278), 265 NA high seas region, conflict areas 255...  
setdiff(rgn_data$rgn_id, regions_stats$zone)

## wrangle and save
data <- merge(rgn_data, regions_stats, all.y = TRUE, by.x = "rgn_id", by.y = "zone") %>% 
  write.csv(file.path(dir_output, "rgn_sst_prs.csv"), row.names = FALSE)
data <- read.csv(file.path(dir_output, "rgn_sst_prs.csv"), stringsAsFactors = FALSE)

Write final pressure layer and gapfilling record

## save data for the toolbox
for(years in c(2012:max(yrs))){
  
  scenario = sprintf("sst_%s.%s_1985.1989_rescaled_icemask", years-4, years)

  eez = data %>% 
    filter(sp_type == "eez") %>% 
    select(rgn_id, contains(scenario)) %>% 
    rename(pressure_score = contains(scenario))
  
  write.csv(eez, sprintf("output/sst_eez_%s.csv", years), row.names = FALSE)

  # fao = filter(data, sp_type == "fao")
  # fao = fao[, c("rgn_id", scenario)]
  # names(fao)[names(fao) == scenario] = "pressure_score"
  # write.csv(fao, sprintf("output/sst_fao_%s.csv", years), row.names = FALSE)
}
## sst has no gapfilling...
sst <- read.csv("output/sst_eez_2021.csv")
sst <- mutate(sst, pressure_score_gf = 0)
write.csv(sst, "output/sst_eez_2021_gf.csv", row.names = FALSE)

Save altogether

sst_final <- data.frame()

for (year in 2012:2021){ # year = 2012

    prs <- read.csv(sprintf("output/sst_eez_%s.csv", year))
  
  prs <- prs %>%
    mutate(year = year) %>%
    select(rgn_id, year, pressure_score)
  
  sst_final <- rbind(sst_final, prs)
  
}


write.csv(sst_final, "output/sst_updated.csv", row.names=FALSE)

Results

## plot results to check, for most recent year
res <- list.files(dir_output, pattern = "sst.*icemask.tif", full.names = TRUE)

plot(raster(res[[length(res)]]), axes = FALSE, 
     main = paste0("Sea Surface Temperature Pressure Layer \n OHI ", scen_year))
## compare with last year's data
old_sst <- read.csv("../v2021/output/sst_updated.csv")

compare <- old_sst %>%
  filter(year == 2020) %>% 
  dplyr::select(rgn_id, old_pressure_score = pressure_score) %>%
  left_join(data, by = "rgn_id") %>%
  filter(!(is.na(rgn_name))) %>%
  filter(sp_type == "eez") %>%
  dplyr::select(rgn_id, rgn_name, old_pressure_score,
                # regex to select columns 2012 up to latest data year
                matches("sst_.*20(1[2-9])|(2[0-9])_1985.1989.*"))

# Renaming columns "sst_" followed by data year
names(compare)[4:ncol(compare)] <- paste0("sst_", 2012:as.numeric(scen_year) - 1)


p <- ggplot(compare, aes(x = old_pressure_score, y = sst_2020, label = rgn_name)) +
  geom_point(shape = 19) + 
  theme_bw() + 
  geom_abline(intercept = 0, slope = 1) + 
  labs(title = "SST comparison")
ggplotly(p)

ggplot(compare, aes(x = sst_2021)) +
  geom_histogram(fill = "gray", color = "black") + 
  theme_bw() + 
  labs(title = "SST 2022")
quantile(compare$sst_2020)

Compare final outputs of v2022 and v2021

After combining sst data into the output table sst_updated.csv, compare to last year. Compare scenario years that used data year 2020, the most recent shared year between v2022 and v2021.

old_sst <- read.csv("../v2021/output/sst_updated.csv") %>% 
  filter(year == 2020) %>% 
  select(rgn_id, old_prs_score = pressure_score)

sst <- read.csv("output/sst_updated.csv") %>%
  filter(year == 2020) %>% 
  select(rgn_id, prs_score = pressure_score)

combine <- sst %>% 
  left_join(old_sst, by = "rgn_id")

plot(combine$old_prs_score, combine$prs_score)

p <- ggplot(combine, aes(x = old_prs_score, y = prs_score, label = rgn_id)) +
  geom_point(shape = 19) + 
  theme_bw() + 
  geom_abline(intercept = 0, slope = 1) + 
  labs(title = "SST comparison v2022 and v2021 \n (data year 2021)")
ggplotly(p)

Citation information

Selig, E.R., K.S. Casey, and J.F. Bruno (2010), New insights into global patterns of ocean temperature anomalies: implications for coral reef health and management, Global Ecology and Biogeography, DOI: 10.1111/j.1466-8238.2009.00522.x.