The Ultraviolet Radiation pressure layer is generated from daily data on Local Noon Erythemal UV Irradiance (mW/m2) derived from satellite observations.
One additional year of data was added to mazu.
https://disc.gsfc.nasa.gov/datasets/OMUVBd_V003/summary https://acdisc.gesdisc.eosdis.nasa.gov/data/Aura_OMI_Level3/OMUVBd.003/ Reference: The Ultraviolet Radiation pressures layer uses the Aura OMI Global Surface UVB Data Product.
Native Data Resolution: 1 degree
Values: Level-3 OMI Surface UV Irradiance and Erythemal Dose- OMUVBd
Time Range: Daily data from 2005 - 2020 (10/1/2004 through 08/10/2021, but only full years of data are used) Format: NetCDF HDF5 (.he5.nc) Downloaded: August 10, 2021
::opts_chunk$set(message = FALSE, warning = FALSE)
knitr
## rhdf5 package for working with HDF5 files, from bioconductor: http://bioconductor.org/packages/release/bioc/html/rhdf5.html
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install()
#BiocManager::install("rhdf5")
library(raster)
source('http://ohi-science.org/ohiprep_v2021/workflow/R/common.R')
library(ncdf4)
library(rgdal)
library(sf) # use simple features rather than sp
library(rhdf5)
library(ggplot2)
library(RColorBrewer)
library(foreach)
library(doParallel)
library(dplyr)
library(readr)
library(stringr)
library(httr)
library(lubridate)
library(googleVis)
library(animation)
library(plotly)
## update these 3 to reflect current assessment year, or whichever year data is being used!!!
<- "d2021"
data_yr <- file.path(dir_M, "git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV", data_yr)
raw_data_dir <- file.path(dir_M, "git-annex/globalprep/prs_uv/v2021/int") # intermediate spatial data location
int_sp_data <- file.path(dir_M, sprintf("git-annex/globalprep/prs_uv/v2021/output"))
out_dir
## years of data we are using for this data layer
<- c(2005:2020)
yrs <- str_pad(1:12, 2, "left", pad = "0")
mths <- seq(1, 358, 7)
days_full
## global ocean raster at 1km for resampling/projecting purposes
<- raster(file.path(dir_M, "model/GL-NCEAS-Halpern2008/tmp/ocean.tif"))
ocean <- st_read(file.path(dir_M, "git-annex/globalprep/spatial/d2014/data"), layer = "regions_gcs")
ocean_shp <- ocean_shp %>% filter(rgn_typ %in% c("land", "land-disputed", "land-noeez")) %>% st_geometry()
land
## define mollweide projection CRS
<- crs("+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs")
mollCRS
## define colors in spectral format for plotting rasters -- rainbow color scheme
<- rev(colorRampPalette(brewer.pal(9, "Spectral"))(255)) cols
Data files can be found in the GES DISC EARTHDATA Data Archive. An EarthData account will be required, and will need to be linked to the GES DISC data archive, see instructions here. Download a links list for the variable “ErythemalDailyDose”, in NetCDF format. To get this list, go here, and click on “Subset/Get Data”. For the download method, select “Get file subsets using OPeNDAP”; you don’t need to change the date range (that will happen in the code below) and you don’t need to change the region. From the variables drop down select just the “Erythemal Daily Dose” option and the file format should be netCDF. Note this list is valid for only 2 days so a new one must be generated if data is to be downloaded after that time frame. Once the links list .txt file has been downloaded, the code below will use the list and earthdata login info (username and password) to download the new data files into the raw data directory. The naming convention of the downloaded files: ‘OMI-Aura’ refers to the instrument, ‘L3’ means it is a level 3 data product, ‘OMUVBd’ is the measurement, the first date is when the data was recorded, the second date and time corresponds to modification/upload of the data.
## need username and password, define in console when prompted (or read from secure file), don't save here!!
<- readline("Type earthdata username:")
usrname <- readline("Type earthdata password:") pass
## This took 103.5 minutes in 2019...
## read in file list .text, downloaded from earthdata & saved in destination folder (same as raw_data_dir)
<- read_delim(file.path("/home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV/d2021", "file_list.txt"), delim = "\\", col_names = FALSE)
file_list_raw
<- file_list_raw %>%
file_list mutate(url_str = as.character(X1)) %>%
mutate(check_netcdf = str_match(url_str, pattern = "http.*OMI-Aura_L3-OMUVBd.*nc")) %>%
filter(!is.na(check_netcdf)) %>%
select(url_str)
## set up timekeeping for data download
= Sys.time()
t0 = length(file_list$url_str)
n.pym = 0
i.pym
## download the data
for(i in 1:nrow(file_list)){
#i = 1
= as.character(file_list[i,])
url
= substr(url, 88, 144)
name_raw_file
if(file.exists(file.path(raw_data_dir, "data", name_raw_file)) != TRUE){
= httr::GET(url, authenticate(usrname, pass, type = "basic"), verbose(info = TRUE, ssl = TRUE))
x = content(x, "raw")
bin writeBin(bin, file.path(raw_data_dir, "data", name_raw_file)) # gnutls_handshake() failed: Handshake failed
<- i.pym + 1
i.pym <- as.numeric(difftime(Sys.time(), t0, units="mins"))
min.done <- (n.pym - i.pym) * min.done/i.pym
min.togo print(sprintf("Retrieving %s of %s. Minutes done=%0.1f, to go=%0.1f",
# approx time remaining for data download
i.pym, n.pym, min.done, min.togo))
else{
}print(sprintf("Skipping %s of %s. Already done.",
i.pym, n.pym))
}
}
## tip: after downloading, check all .he5.nc files are about the same size i.e. they all downloaded properly/fully
# v2021: all look good
## list and check missing dates in NetCDF data files
## list all files from raw data folder
<- list.files(file.path(raw_data_dir, "data"), pattern = "OMI-Aura_L3-OMUVBd.*.he5.nc$", full.names = TRUE) # netcdf not hdf
files <- files[substr(files, 96, 99) %in% yrs] # select only files for yrs we want
files
## check all days are there; should go from Oct 2004 through Dec 31 of last full year of data
<- files %>%
files_df data.frame() %>%
rename(fullname = ".") %>% # View(files_df)
mutate(post_modify_date = substr(fullname, 111, 119),
year = substr(fullname, 96, 99),
mo = substr(fullname, 101, 102),
dy = substr(fullname, 103, 104)) %>%
mutate(date = paste(year, mo, dy, sep = "-"),
wk_of_yr = lubridate::week(as_date(date))) %>%
group_by(year, wk_of_yr) %>%
mutate(nday_in_wk = n()) %>%
ungroup()
<- files_df %>%
check_ndays group_by(year, mo) %>% # group_by(year) %>%
summarize(ndays = n()) #%>% # View(check_ndays)
#filter(ndays < 28)
## For some reason June 2016 is missing 14 days (June 1-14, 2016).. I checked the website and those files don't exist. It is just a gap in the data. The rest seem ok. - v2021
Calculate weekly means and standard deviations across all years:
## for every week in each year of the time series, calculate weekly mean and standard deviation
registerDoParallel(3)
## note: 2016 wk 22 has only 4 layers (4th layer all NAs) and length(days)=52; missing all week 23
## note: so far some 2005 files appear to have been downloaded incorrectly... replace them manually or just retry download??
foreach (yr = yrs) %dopar% { # j = 22; yr = 2016
#yr = 2005; #j = 1
<- files[substr(files, 96, 99) == yr]
l
<- files_df %>%
days_df filter(year == yr) %>%
select(wk_of_yr, nday_in_wk) %>%
distinct() %>% # select just distinct weeks with number of data days they contain
::complete(wk_of_yr = seq(1:53)) %>% # possible max 53 weeks
tidyrmutate(nday_in_wk = replace(nday_in_wk, is.na(nday_in_wk), 0)) %>% # zeros if no data
mutate(lag_nday = lag(nday_in_wk),
lag_nday = replace(lag_nday, is.na(lag_nday), 1),
doy = cumsum(lag_nday)) # day-of-year for start of each week of data
<- days_df$doy
days <- days_df$wk_of_yr
weeks
for (j in weeks[-length(weeks)]) { # print(days[j]:(days[j+1]-1)) # checking without foreach+dopar
## gapfill for weeks with 1 or fewer days using prev + subseq. weeks
if(days_df$nday_in_wk[j] < 2){
<- l[days[j-1]:(days[j+2]-1)] # gapfilling
wk_files else {
} <- l[days[j]:(days[j+1]-1)]
wk_files
}
<- raster(wk_files[1], varname = "ErythemalDailyDose")
rasters for(i in wk_files[-1]){
<- raster(i, varname = "ErythemalDailyDose")
r <- stack(rasters, r)
rasters
}<- rasters
uv_week = str_pad(weeks[j], 2, "left", pad = "0")
week
<- calc(uv_week, fun = function(x) {mean(x, na.rm = TRUE)},
week_mean filename = sprintf("%s/weekly_means/weekly_means_%s_%s.tif",
overwrite = TRUE)
int_sp_data, yr, week),
<- calc(uv_week, fun = function(x) {sd(x, na.rm = TRUE)},
week_sd filename = sprintf("%s/weekly_sd/weekly_sd_%s_%s.tif",
overwrite = TRUE)
int_sp_data, yr, week),
<- overlay(week_mean, week_sd, fun = function(x, y) {x + y},
week_mean_sd filename = sprintf("%s/weekly_mean_sd/weekly_mean_sd_%s_%s.tif",
overwrite = TRUE)
int_sp_data, yr, week),
}
}
## get weekly climatologies across all years in the time series
<- list.files(file.path(int_sp_data, "weekly_means"), full.names = TRUE)
names_weekly <- substr(names_weekly, 87, 92) %>% unique()
match_weeks
## check all weeks expected to be there are there
<- names_weekly %>%
names_weekly_df data.frame() %>%
rename(fullname = ".") %>%
mutate(yr = substr(fullname, 82, 85),
wk = substr(fullname, 87, 88)) # View(names_weekly_df)
<- names_weekly_df %>% # View(tmp)
tmp select(yr, wk) %>%
group_by(yr) %>%
summarize(maxwk = max(wk))
foreach(i = match_weeks) %dopar% {
= names_weekly[(substr(names_weekly, 87, 92) == i)] %>% stack()
w
<- calc(w, fun = function(x){mean(x, na.rm = TRUE)},
m filename = sprintf("%s/weekly_climatologies/mean_week_%s", int_sp_data, i),
overwrite = TRUE)
<- calc(w, fun = function(x){sd(x, na.rm = TRUE)},
sd filename = sprintf("%s/weekly_climatologies/sd_week_%s", int_sp_data, i),
overwrite = TRUE)
<- overlay(m, sd, fun = function(x, y){x + y},
m_sd filename = sprintf("%s/weekly_climatologies/mean_sd_week_%s", int_sp_data, i),
overwrite = TRUE) ## climatologies based on mean & sd of all years, additional year each assessment...
}
Compare each week in each year to the climatology for that week. The climatology is equal to the mean plus one standard deviation.
## loop to calculate annual positive anomalies
foreach (i = yrs) %dopar% {
<- names_weekly_df %>% filter(yr == i)
match_weeks = stack()
s
for(j in match_weeks$wk) {
= raster(sprintf("%s/weekly_means/weekly_means_%s_%s.tif", int_sp_data, i, j)) # mean UV for week j, year i
w_mean = raster(sprintf("%s/weekly_climatologies/mean_sd_week_%s.tif", int_sp_data, j)) # week j climatology
w_anom = overlay(w_mean, w_anom, fun = function(x, y){ifelse(x > y, 1, 0)}) # compare to average anomaly for that week
count = stack(s, count)
s
}
= calc(s, fun = function(x){sum(x, na.rm = TRUE)},
year filename = sprintf("%s/annual_anomalies_diff/annual_pos_anomalies_%s.tif", int_sp_data, i),
overwrite = TRUE) ## each assessment get new year of data, another layer in this calculation...
}
Calculate the difference in total number of anomalies over a 5 year period compared to the first 5 years (2005-2009)
<- list.files(file.path(int_sp_data, "annual_anomalies_diff"),
l pattern = "anomalies", full.names = TRUE)
## reference period is 2005-2009
<- l[1:5] %>% stack() %>%
ref calc(., fun = function(x){sum(x, na.rm = TRUE)})
plot(ref, col = cols, axes = FALSE, main = "Total Number of Anomalies 2005-2009")
plot(land, add = TRUE)
registerDoParallel(4)
foreach(i = 2005:(max(yrs) - 4)) %dopar% {
## calculate difference between total number of anomalies in recent and historical (2005-2009) time periods
<- i:(i + 4)
y <- stack(l[str_sub(l, -8, -5) %in% y]) %>% sum(., na.rm = TRUE)
s <- s - ref
diff projection(diff) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
writeRaster(diff,
filename = sprintf("%s/annual_anomalies_diff/annual_diff_%s_%s.tif",
1], y[5]),
int_sp_data, y[overwrite = TRUE)
}
<- list.files(file.path(int_sp_data, "annual_anomalies_diff"),
ref_files full.names = TRUE, pattern = "diff")
## this says reference point is 41
read.csv("../../supplementary_information/v2016/reference_points_pressures.csv", stringsAsFactors = FALSE) %>%
filter(pressure == "Ultraviolet Radiation Anomalies") %>%
::select(ref_point) %>%
dplyras.numeric(.$ref_point)
## get the reference point (resc_num = rescale number, excludes baseline 2005_2009 tif)
<- c()
vals for(i in 2006:(max(yrs) - 4)){
<- i + 4
max_yr <- raster(ref_files[str_sub(ref_files, -13, -10) == i]) %>%
m mask(as(land, "Spatial"), inverse = TRUE) %>% getValues()
<- c(vals, m)
vals
}<- stats::quantile(vals, prob = 0.9999, na.rm = TRUE) # 42 for v2018; 46 for v2019, 49 for v2020, 47 for v2021
resc_num
## rescale using reference point
for(file in ref_files){
print(file)
<- gsub(".tif", "", basename(file))
the_name <- raster(file) %>%
m calc(fun = function(x){ifelse(x > 0, ifelse(x > resc_num, 1, x/resc_num), 0)},
filename = file.path(int_sp_data, sprintf("rescaled/%s_rescaled.tif", the_name)),
overwrite = TRUE)
}
<- list.files(file.path(int_sp_data, "rescaled"),
resc_files full.names = TRUE, pattern = "rescaled.tif")
## resample to ocean raster scale (~1km) and then mask
registerDoParallel(4)
foreach(i = 2005:(max(yrs) - 4)) %dopar% {
<- i + 4
max_yr
if(file.exists(file.path("/home/shares/ohi/git-annex/globalprep/prs_uv/v2021/output",
sprintf("uv_%s_%s-2005_2009_mol_1km.tif", i, max_yr))) != TRUE){
<- resc_files[str_sub(resc_files, -22, -19) == i] %>%
mol1km_masked raster() %>%
projectRaster(crs = mollCRS, over = TRUE, method = "ngb") %>%
resample(ocean, method = "ngb") %>%
mask(ocean,
filename = file.path("/home/shares/ohi/git-annex/globalprep/prs_uv/v2021/output",
sprintf("uv_%s_%s-2005_2009_mol_1km.tif", i, max_yr)),
overwrite = TRUE)
else{
}print(sprintf("Skipping %s to %s. Already done.",
i, max_yr))
} }
<- list.files(out_dir, pattern = "uv_.*_mol_1km.tif", full.names = TRUE)
mol1km_masked <- raster(mol1km_masked[length(mol1km_masked)])
out plot(out, box = FALSE, col = cols, axes = FALSE, main = "UV Pressure Layer \n OHI 2021")
## load raster/zonal data
<- raster(file.path(dir_M, "git-annex/globalprep/spatial/v2017/regions_eez_with_fao_ant.tif"))
zones <- read.csv("~/github/ohi-global/eez/spatial/regions_list.csv") %>%
rgn_data filter(rgn_id <= 250)
## get raster data
<- list.files(out_dir, full = TRUE, pattern = "uv_.*_mol_1km.tif")
rasts <- stack(rasts) pressure_stack
saveGIF({
for(i in 1:nlayers(pressure_stack)){
= sprintf("UV Pressure %s",
n substr(names(pressure_stack[[i]]), 4, 12))
plot(pressure_stack[[i]],
zlim = c(0, 1), # don't forget to fix the zlimits
axes = FALSE, box = FALSE, col = cols,
main = n)}},
ani.width = 750,
ani.height = 400,
movie.name = "uv.gif")
## extract data for each region
<- zonal(pressure_stack, zones, fun = "mean", na.rm = TRUE,
regions_stats progress = "text") %>% data.frame()
write.csv(regions_stats, "int/uv_mean_rgn.csv", row.names = FALSE)
## check regions are all present or missing as expected
setdiff(regions_stats$zone, rgn_data$rgn_id) # high seas and Antarctica
# 260 261 262 263 264 266 267 269 270 272 273 274 275 276 277 248100 248200 248300 248400
# 248500 248600 258410 258420 258431 258432 258441 258442 258510 258520 258600 258700 288100 288200 288300
setdiff(rgn_data$rgn_id, regions_stats$zone) # Antarctica is 213
<- merge(rgn_data, regions_stats, all.y = TRUE, by.x = "rgn_id", by.y = "zone") %>%
data ::gather("year", "pressure_score", starts_with("uv")) %>%
tidyrfilter(rgn_id <= 250) # filter out non OHI global regions
<- data %>%
uv_data mutate(year = substring(year, 9, 12)) %>%
mutate(year = as.numeric(year)) %>%
::select(rgn_id, rgn_name, year, pressure_score) dplyr
## visualize data using googleVis plot
<- uv_data %>%
plotData ::select(rgn_name, year, pressure_score)
dplyr
= gvisMotionChart(plotData,
motion idvar = "rgn_name",
timevar = "year")
plot(motion)
print(motion, file = "uv.html")
## save data layer
<- uv_data %>%
uv_data ::select(rgn_id, year, pressure_score)
dplyrwrite.csv(uv_data, "output/uv.csv", row.names = FALSE)
<- read.csv("output/uv.csv")
uv_data
<- uv_data %>%
old filter(year == 2020) %>% # data years lag assessment yrs by 1
select(-year, new_pressure = pressure_score) %>%
left_join(read.csv("../v2020/output/uv.csv") %>% filter(year == 2019),
by = c("rgn_id")) %>%
rename(old_pressure = pressure_score)
<- ggplot(old, aes(new_pressure, old_pressure, label = rgn_id)) +
plot_diff geom_point() +
geom_abline()
ggplotly(plot_diff)
# ggplot(old %>% mutate(difference = new_pressure - old_pressure), aes(difference)) + geom_histogram(binwidth = 0.002)
Niilo Kalakoski, Panu Lahtinen, Jari Hovila (May 2016) OMI/Aura Surface UVB Irradiance and Erythemal Dose Daily L3 Global Gridded 1.0 degree x 1.0 degree V3, NASA Goddard Space Flight Center.