The Ultraviolet Radiation pressure layer is generated from daily data on Local Noon Erythemal UV Irradiance (mW/m2) derived from satellite observations.
terra
package
instead of raster
Data Summary: https://disc.gsfc.nasa.gov/datasets/OMUVBd_V003/summary
Download Link: https://acdisc.gesdisc.eosdis.nasa.gov/data/Aura_OMI_Level3/OMUVBd.003/
Reference: The Ultraviolet Radiation pressures layer uses the Aura OMI Global Surface UVB Data Product.
Native Data Resolution: 1 degree
Values: Level-3 OMI Surface UV Irradiance and Erythemal Dose- OMUVBd
Time Range: Daily data from 2005 - 2023 (10/1/2004
through 07/14/2024, but only full years of data are used)
Format: NetCDF HDF5 (.he5.nc)
Downloaded: file list: July 16, 2024; data files: July 17, 2024
README
- General: <https://acdisc.gesdisc.eosdis.nasa.gov/data/Aura_OMI_Level3/OMUVBd.003/doc/OMUVB_L3d_readme.pdf>
- Data User's Guide: <https://docserver.gesdisc.eosdis.nasa.gov/repository/Mission/OMI/3.3_ScienceDataProductDocumentation/3.3.2_ProductRequirements_Designs/README.OMI_DUG.pdf>
Jari Hovila, Antii Arola, and Johanna Tamminen (2013), OMI/Aura Surface UVB Irradiance and Erythemal Dose Daily L3 Global Gridded 1.0 degree x 1.0 degree V3, NASA Goddard Space Flight Center, Goddard Earth Sciences Data and Information Services Center (GES DISC), Accessed: July 17, 2024, 10.5067/Aura/OMI/DATA3009
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
current_yr <- 2024 ############## UPDATE ME
data_yr <- paste0("d", current_yr)
version_yr <- paste0("v", current_yr)
## Replacing this with librarian shelf which will search bioconductor - v2022
## rhdf5 package for working with HDF5 files, from bioconductor: http://bioconductor.org/packages/release/bioc/html/rhdf5.html
# if (!requireNamespace("BiocManager", quietly = TRUE)){
# install.packages("BiocManager")
# }
# BiocManager::install()
# BiocManager::install("rhdf5")
if (!require(librarian)){
install.packages("librarian")
library(librarian)
}
librarian::shelf(
Biobase,
rhdf5, ## From bioconductor
raster,
terra,
ncdf4,
rgdal,
sf,
ggplot2,
RColorBrewer,
foreach,
doParallel,
dplyr,
readr,
stringr,
httr,
lubridate,
# googleVis, # not used anymore
animation,
plotly,
tictoc
)
source(paste0('http://ohi-science.org/ohiprep_', version_yr, '/workflow/R/common.R'))
## File paths
raw_data_dir <- file.path(dir_M, "git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV", data_yr)
int_sp_data <- file.path(dir_M, "git-annex/globalprep/prs_uv", version_yr, "int") # intermediate spatial data location
out_dir <- file.path(dir_M, "git-annex/globalprep/prs_uv", version_yr, "output")
## years of data we are using for this data layer
yrs <- c(2005:(current_yr-1))
mths <- str_pad(1:12, 2, "left", pad = "0")
days_full <- seq(1, 358, 7)
## global ocean raster at 1km for resampling/projecting purposes
ocean <- terra::rast(file.path(dir_M, "model/GL-NCEAS-Halpern2008/tmp/ocean.tif"))
ocean_shp <- st_read(file.path(dir_M, "git-annex/globalprep/spatial/d2014/data"), layer = "regions_gcs")
land <- ocean_shp %>%
filter(, rgn_typ %in% c("land", "land-disputed", "land-noeez")) %>%
st_geometry()
## define mollweide projection CRS
mollCRS <- crs("+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs")
## define colors in spectral format for plotting rasters -- rainbow color scheme
cols <- rev(colorRampPalette(brewer.pal(9, "Spectral"))(255))
Data files can be found in the GES DISC EARTHDATA Data Archive.
Steps:
Navigate here, and click on “Subset / Get Data”.
For the download method, select “Get File Subsets using OPeNDAP”;
From the variables drop down select just the “ErythemalDailyDose: Erythemal Daily Dose” option
From the file format choose netCDF.
Note: this list is valid for only 2 days. A new list must be generated if data is to be downloaded after that time frame.
Rename the file to file_list.txt
Make 2 new folders
/home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV/<version_year>
and a /data
subfolder (See below)
Place file_list.txt
in
/home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV/<version_year>
Create new folders for the raw data, intermediate files, and output files
## Added v2023
YEAR=2024 ###### UPDATE ME
# make raw data folder for new year
cd /home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV/
mkdir -p ./d$YEAR/data
# make new intermediate and output file folders
cd /home/shares/ohi/git-annex/globalprep/prs_uv
mkdir -p ./v$YEAR/{output,int}
cd v$YEAR/int
mkdir {annual_anomalies_diff,rescaled,weekly_climatologies,weekly_mean_sd,weekly_means,weekly_sd} # v2023: add weekly_mean_sd
## need username and password, define in console when prompted (or read from secure file), don't save here!!
# OHI has a shared account for fellows (v2023)
usrname <- readline("Type earthdata username: ")
pass <- readline("Type earthdata password: ")
## This took 103.5 minutes in 2019...
## This took like 6 hours in 2022 for some reason... You can leave this running after hours if need be, and it should keep going
## v2023: 8261.674 sec elapsed (137.7 minutes)
tic()
## read in file list .text, downloaded from earthdata & saved in destination folder (same as raw_data_dir)
file_list_raw <- read_delim(
file.path("/home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV", data_yr, "file_list_07-16.txt"),
delim = "\\", col_names = FALSE)
file_list <- file_list_raw %>%
mutate(url_str = as.character(X1)) %>%
mutate(check_netcdf = str_match(url_str, pattern = "http.*OMI-Aura_L3-OMUVBd.*nc4")) %>%
filter(!is.na(check_netcdf)) %>%
select(url_str)
## set up timekeeping for data download
t0 = Sys.time()
n.pym = length(file_list$url_str)
i.pym = 0
## download the data
for(i in 1:nrow(file_list)){
#i = 1
url = as.character(file_list[i,])
name_raw_file = substr(url, 88, 146) # v2023: check that this is grabbing the full name (should end with .he5.nc4)
if(file.exists(file.path(raw_data_dir, "data", name_raw_file)) != TRUE){
x = httr::GET(url, authenticate(usrname, pass, type = "basic"), verbose(info = TRUE, ssl = TRUE))
bin = content(x, "raw")
writeBin(bin, file.path(raw_data_dir, "data", name_raw_file)) # gnutls_handshake() failed: Handshake failed
i.pym <- i.pym + 1
min.done <- as.numeric(difftime(Sys.time(), t0, units="mins"))
min.togo <- (n.pym - i.pym) * min.done/i.pym
print(sprintf("Retrieving %s of %s. Minutes done=%0.1f, to go=%0.1f",
i.pym, n.pym, min.done, min.togo)) # approx time remaining for data download
} else {
print(sprintf("Skipping %s of %s. Already done.",
i.pym, n.pym))
}
}
## tip: after downloading, check all .he5.nc4 files are about the same size i.e. they all downloaded properly/fully
toc()
# v2023: all look good; later, 3 cannot be read in, and I manually added those to Mazu using Cyberduck on 6/28/2023
## list and check missing dates in NetCDF data files
## list all files from raw data folder
files <- list.files(file.path(raw_data_dir, "data"), pattern = "OMI-Aura_L3-OMUVBd.*.he5.nc4$", full.names = TRUE) # netcdf not hdf
files <- files[substr(files, 96, 99) %in% yrs] # select only files for yrs we want
## check all days are there; should go from Jan 01 2005 through Dec 31 of last full year of data
files_df <- files %>%
data.frame() %>%
rename(fullname = ".") %>% # View(files_df)
mutate(post_modify_date = substr(fullname, 111, 119),
year = substr(fullname, 96, 99),
mo = substr(fullname, 101, 102),
dy = substr(fullname, 103, 104)) %>%
mutate(date = paste(year, mo, dy, sep = "-"),
wk_of_yr = lubridate::week(as_date(date))) %>%
group_by(year, wk_of_yr) %>%
mutate(nday_in_wk = n()) %>%
ungroup()
check_ndays <- files_df %>%
group_by(year, mo) %>% # group_by(year) %>%
summarize(ndays = n()) # %>% # View(check_ndays)
# filter(ndays < 28)
# View(check_ndays)
# gap_08 <- files_df %>% filter(year == "2008" & mo == "09") # missing 27th-30
## For some reason June 2016 is missing 14 days (June 1-14, 2016). I checked the website and those files don't seem to exist. It is just a gap in the data. Days are also missing from November 2004 (missing November 20-30, 2004; 11 days), September 2008 (missing September 27-30, 2008; 4 days), and March 2017 (missing March 12-16, 2017; 5 days). The rest seem ok. - v2023
#v2024: I found the exact same data gaps as v2023. September 2008 (2008-09) only has 26 days (missing 27th-30th). Similar issue to v2023 2016 -- also only have 16 days of data in June 2016 (2016-06); only have 26 days in March 2017 (2017-03).
After checking the Data Calendar on the NASA’s GES DISC OMI/Aura Surface UVB Irradiance and Erythemal Dose Daily L3 Global Gridded 1.0 degree x 1.0 degree V3 (OMUVBd) summary page, I also found that there are 15 dates without data in 2016 (missing 1 in May and 14 in June),
Calculate weekly means and standard deviations across all years:
tic()
## for every week in each year of the time series, calculate weekly mean and standard deviation
registerDoParallel(30)
## note: 2016 wk 22 has only 4 layers (4th layer all NAs) and length(days)=52; missing all week 23
## note: so far some 2005 files appear to have been downloaded incorrectly... replace them manually or just retry download??
## v2023 note: 3 files listed in skip_files downloaded incorrectly and could not be read in; those were originally excluded but then manually downloaded and added in
foreach(yr = yrs) %dopar% { # for (yr in yrs) {
# yr = 2005 ## for testing
print(paste("Calculating weekly mean and SD for", yr)) # this doesn't seem to show up
l <- files[substr(files, 96, 99) == yr]
days_df <- files_df %>%
filter(year == yr) %>%
select(wk_of_yr, nday_in_wk) %>%
distinct() %>% # select just distinct weeks with number of data days they contain
tidyr::complete(wk_of_yr = seq(1:53)) %>% # possible max 53 weeks
mutate(nday_in_wk = replace(nday_in_wk, is.na(nday_in_wk), 0)) %>% # zeros if no data
mutate(lag_nday = lag(nday_in_wk),
lag_nday = replace(lag_nday, is.na(lag_nday), 1),
doy = cumsum(lag_nday)) # day-of-year for start of each week of data
days <- days_df$doy
weeks <- days_df$wk_of_yr
for(j in weeks[-length(weeks)]) { # print(days[j]:(days[j+1]-1)) # checking without foreach+dopar
# j = 1 ## for testing
## gapfill for weeks with 1 or fewer days using prev + subseq. weeks
if(days_df$nday_in_wk[j] < 2){
wk_files <- l[days[j-1]:(days[j+2]-1)] # gapfilling
} else {
wk_files <- l[days[j]:(days[j+1]-1)]
}
#skip_files <- c(
#"/home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV/d2023/data/OMI-Aura_L3-OMUVBd_2012m0817_v003-2016m0715t150819.he5.nc",
#"/home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV/d2023/data/OMI-Aura_L3-OMUVBd_2013m0902_v003-2016m0715t173913.he5.nc",
#"/home/shares/ohi/git-annex/globalprep/_raw_data/NASA_OMI_AURA_UV/d2023/data/OMI-Aura_L3-OMUVBd_2018m1213_v003-2018m1217t093002.he5.nc"
#) # v2023: originally used this to skip these files, but later manually downloaded them and they now work
rasters <- NULL
for (i in wk_files) {
#if (!(i %in% skip_files)) { # these files were originally giving issues in v2023; this can be used in the future for similar circumstances
r <- terra::rast(i)
if (is.null(rasters)) {
rasters <- r
}
else {
rasters <- c(rasters, r)
}
#} # this is the closing bracket for the skip_files if statement
}
uv_week <- rasters
week = str_pad(weeks[j], 2, "left", pad = "0")
week_mean <- terra::app(uv_week,
fun = "mean",
na.rm = TRUE,
filename = sprintf("%s/weekly_means/weekly_means_%s_%s.tif", int_sp_data, yr, week),
overwrite = TRUE)
week_sd <- terra::app(uv_week,
fun = "sd",
na.rm = TRUE,
filename = sprintf("%s/weekly_sd/weekly_sd_%s_%s.tif", int_sp_data, yr, week),
overwrite = TRUE)
week_mean_sd <- week_mean + week_sd
terra::writeRaster(week_mean_sd,
filename = sprintf("%s/weekly_mean_sd/weekly_mean_sd_%s_%s.tif",
int_sp_data, yr, week),
overwrite = TRUE)
# week_mean_sd <- terra::lapp(week_mean, week_sd, fun = sum,
# filename = sprintf("%s/weekly_mean_sd/weekly_mean_sd_%s_%s.tif",
# int_sp_data, yr, week), overwrite = TRUE)
# this isn't working but lapp should be the true equivalent of the old raster::overlay()
}
}
toc()
# v2023: 28.403 sec elapsed
# v2024: 14.204 sec elapsed
v2024: had to manually re-download the following files:
- OMI-Aura_L3-OMUVBd_2021m1123_v003-2021m1127t093001.he5.nc4
Downloaded by navigating to the summary
page and: - selecting “Subset/Get Data” - Get File Subsets using
OPeNDAP
- Date Range: selecting the date of the probelmatic file in start and
end date - ErythemalDailyDose: Erythemal Daily Dose - File format:
NetCDF
then download the individual file, upload to data folder on Mazu and replace old file
tic()
## get weekly climatologies across all years in the time series
names_weekly <- list.files(file.path(int_sp_data, "weekly_means"), full.names = TRUE)
match_weeks <- substr(names_weekly, 87, 92) %>% unique()
## check all weeks expected to be there are there
names_weekly_df <- names_weekly %>%
data.frame() %>%
rename(fullname = ".") %>%
mutate(yr = substr(fullname, 82, 85),
wk = substr(fullname, 87, 88)) # View(names_weekly_df)
tmp <- names_weekly_df %>% # View(tmp)
select(yr, wk) %>%
group_by(yr) %>%
summarize(maxwk = max(wk))
foreach(i = match_weeks) %dopar% {
w <- names_weekly[(substr(names_weekly, 87, 92) == i)] %>% terra::rast()
m <- terra::app(w,
fun = "mean",
na.rm = TRUE,
filename = sprintf("%s/weekly_climatologies/mean_week_%s", int_sp_data, i),
overwrite = TRUE)
sd <- terra::app(w,
fun = "sd",
na.rm = TRUE,
filename = sprintf("%s/weekly_climatologies/sd_week_%s", int_sp_data, i),
overwrite = TRUE)
# m_sd <- overlay(m, sd, fun = function(x, y){x + y},
# filename = sprintf("%s/weekly_climatologies/mean_sd_week_%s", int_sp_data, i),
# overwrite = TRUE)
m_sd <- m + sd
terra::writeRaster(m_sd,
filename = sprintf("%s/weekly_climatologies/mean_sd_week_%s", int_sp_data, i),
overwrite = TRUE) ## climatologies based on mean & sd of all years, additional year each assessment...
# like mentioned before, lapp should be the true equivalent of the old raster::overlay() but this also seems to work
}
toc()
# v2023: get Error in x@ptr$nrow() : external pointer is not valid. does not seem to affect anything. however, the results of tic/toc() don't show.
# v2024: got same error. Again, the files are being written out to int/weekly_climatologies appropriately. 41.013 sec elapsed
Compare each week in each year to the climatology for that week. The climatology is equal to the mean plus one standard deviation.
#doParallel::registerDoParallel(30)
## loop to calculate annual positive anomalies
tic()
foreach (i = yrs) %dopar% {
match_weeks <- names_weekly_df %>% filter(yr == i)
s <- NULL
for(j in match_weeks$wk) {
w_mean <- terra::rast(sprintf("%s/weekly_means/weekly_means_%s_%s.tif", int_sp_data, i, j)) # mean UV for week j, year i
w_anom <- terra::rast(sprintf("%s/weekly_climatologies/mean_sd_week_%s.tif", int_sp_data, j)) # week j climatology
count <- terra::lapp(c(w_mean, w_anom), fun = function(x, y){ifelse(x > y, 1, 0)}) # compare to average anomaly for that week
if (is.null(s)) {
s <- count
}
else {
s <- c(s, count)
}
}
year <- terra::app(s,
fun = "sum",
na.rm = TRUE,
filename = sprintf("%s/annual_anomalies_diff/annual_pos_anomalies_%s.tif", int_sp_data, i),
overwrite = TRUE) ## each assessment get new year of data, another layer in this calculation...
}
toc()
# v2023: get Error in x@ptr$nrow() : external pointer is not valid. does not seem to affect anything. however, the results of tic/toc() don't show.
# v2024: 40.229 sec elapsed
Calculate the difference in total number of anomalies over a 5 year period compared to the first 5 years (2005-2009)
l <- list.files(file.path(int_sp_data, "annual_anomalies_diff"),
pattern = "anomalies", full.names = TRUE)
## reference period is 2005-2009
ref <- l[1:5] %>% terra::rast() %>%
terra::app(., fun = "sum", na.rm = TRUE)
plot(ref, col = cols, axes = FALSE, main = "Total Number of Anomalies 2005-2009")
plot(land, add = TRUE)
tic()
registerDoParallel(4)
foreach(i = 2005:(max(yrs) - 4)) %dopar% {
## calculate difference between total number of anomalies in recent and historical (2005-2009) time periods
y <- i:(i + 4)
s <- terra::rast(l[str_sub(l, -8, -5) %in% y]) %>% sum(., na.rm = TRUE)
diff <- s - ref
crs(diff) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
writeRaster(diff,
filename = sprintf("%s/annual_anomalies_diff/annual_diff_%s_%s.tif",
int_sp_data, y[1], y[5]),
overwrite = TRUE)
}
toc()
# v2023: get Error in x@ptr$nrow() : external pointer is not valid. does not seem to affect anything. however, the results of tic/toc() don't show.
# v2024: got Error: external pointer is not valid
#registerDoParallel(30)
ref_files <- list.files(file.path(int_sp_data, "annual_anomalies_diff"),
full.names = TRUE, pattern = "diff")
## this says reference point is 41
read.csv("../../supplementary_information/v2016/reference_points_pressures.csv", stringsAsFactors = FALSE) %>%
filter(pressure == "Ultraviolet Radiation Anomalies") %>%
dplyr::select(ref_point) %>%
as.numeric(.$ref_point)
## get the reference point (resc_num = rescale number, excludes baseline 2005_2009 tif)
vals <- c()
tic()
for(i in 2006:(max(yrs) - 4)){
t <- as.numeric(format(Sys.time(), "%s"))
max_yr <- i + 4
m <- ref_files[str_sub(ref_files, -13, -10) == i] %>%
terra::rast() %>%
terra::mask(mask = terra::vect(land), inverse = TRUE) %>%
terra::values()
vals <- c(vals, m)
print(paste(i, "took", round((as.numeric(format(Sys.time(), "%s")) - t) / 60, 2), "minutes"))
}
toc()
# v2023: 280.28 sec elapsed
# v2024: 209.313 sec elapsed
## 42 for v2018; 46 for v2019; 49 for v2020; 47 for v2021; X for v2022; 55 for v2023; 41 for v2024
resc_num <- stats::quantile(vals, prob = 0.9999, na.rm = TRUE)
## rescale using reference point
tic()
for(file in ref_files){
print(file)
the_name <- gsub(".tif", "", basename(file))
m <- file %>%
terra::rast() %>%
terra::app(fun = function(x){ifelse(x > 0, ifelse(x > resc_num, 1, x/resc_num), 0)},
filename = file.path(int_sp_data, sprintf("rescaled/%s_rescaled.tif", the_name)),
overwrite = TRUE)
}
toc()
# v2023: 0.583 sec elapsed
# v2024: 0.498 sec elapsed
resc_files <- list.files(file.path(int_sp_data, "rescaled"),
full.names = TRUE, pattern = "rescaled.tif")
## resample to ocean raster scale (~1km) and then mask
tic()
registerDoParallel(30)
foreach(i = 2005:(max(yrs) - 4)) %dopar% {
t <- as.numeric(format(Sys.time(), "%s"))
max_yr <- i + 4
if(file.exists(file.path("/home/shares/ohi/git-annex/globalprep/prs_uv", version_yr, "output",
sprintf("uv_%s_%s-2005_2009_mol_1km.tif", i, max_yr))) != TRUE){
mol1km_masked <- resc_files[str_sub(resc_files, -22, -19) == i] %>%
terra::rast() %>%
terra::project(y = mollCRS, over = TRUE, method = "near") %>%
terra::resample(ocean, method = "near") %>%
terra::mask(ocean,
filename = file.path("/home/shares/ohi/git-annex/globalprep/prs_uv", version_yr, "output",
sprintf("uv_%s_%s-2005_2009_mol_1km.tif", i, max_yr)),
overwrite = TRUE)
print(paste(i, "to", max_yr, "took", round((as.numeric(format(Sys.time(), "%s")) - t) / 60, 2), "minutes"))
} else {
print(sprintf("Skipping %s to %s. Already done.", i, max_yr))
}
}
toc()
# v2023: 413.808 sec elapsed
# v2024: 1.023 sec elapsed (switched from 4 cores to 30 cores); individual year groups took ~1.5-2 min each
mol1km_masked <- list.files(out_dir, pattern = "uv_.*_mol_1km.tif", full.names = TRUE)
out <- terra::rast(mol1km_masked[length(mol1km_masked)])
plot(out, box = FALSE, col = cols, axes = FALSE, main = paste("UV Pressure Layer\nOHI", current_yr))
## load raster/zonal data
zones <- terra::rast(file.path(dir_M, "git-annex/globalprep/spatial/v2017/regions_eez_with_fao_ant.tif"))
rgn_data <- read_csv("~/github/ohi/ohi-global/eez/spatial/regions_list.csv") %>% # update if your path to ohi-global is different
filter(rgn_id <= 250)
## get raster data
rasts <- list.files(out_dir, full = TRUE, pattern = "uv_.*_mol_1km.tif")
extracted_names <- str_extract(rasts, "uv_\\d{4}_\\d{4}-\\d{4}_\\d{4}_mol_1km") %>%
str_replace("-", ".") # this part may not be necessary; kept for consistency with v2022 names
pressure_stack <- terra::rast(rasts)
names(pressure_stack) <- extracted_names # ensure these are correct! #pressure_stack@ptr[["names"]]
saveGIF({
for(i in 1:terra::nlyr(pressure_stack)){
n <- sprintf("UV Pressure %s",
substr(names(pressure_stack[[i]]), 4, 12))
plot(pressure_stack[[i]],
zlim = c(0, 1), # don't forget to fix the zlimits
axes = FALSE, box = FALSE, col = cols,
main = n)}},
ani.width = 750,
ani.height = 400,
movie.name = "uv.gif")
# v2024: Output at: uv.gif display-im6.q16: unable to open X server `:0' @ error/display.c/DisplayImageCommand/412.
## make pressure_stack and zones same projection
tic()
pressure_stack_same_crs <- terra::project(pressure_stack, crs(zones), method = "near") # v2023: ensure CRSs are exactly the same for zonal (got Warning: [zonal] SRS do not match originally)
toc()
# v2023: 1452.395 sec elapsed
# v2024: 1316.3 sec elapsed
## extract data for each region
tic()
regions_stats <- terra::zonal(pressure_stack_same_crs, zones, fun = "mean", na.rm = TRUE) %>%
data.frame()
toc()
# v2023: 529.873 sec elapsed
# v2024: 538.123 sec elapsed
# v2023: make column names correct (match v2022)
regions_stats <- rename(regions_stats, "zone" = "regions_eez_with_fao_ant")
write.csv(regions_stats, "int/uv_mean_rgn.csv", row.names = FALSE)
## check regions are all present or missing as expected
setdiff(regions_stats$zone, rgn_data$rgn_id) # high seas and Antarctica
# 260 261 262 263 264 266 267 269 270 272 273 274 275 276 277 248100 248200 248300 248400
# 248500 248600 258410 258420 258431 258432 258441 258442 258510 258520 258600 258700 288100 288200 288300
setdiff(rgn_data$rgn_id, regions_stats$zone) # Antarctica is 213
data <- terra::merge(rgn_data, regions_stats, all.y = TRUE, by.x = "rgn_id", by.y = "zone") %>%
tidyr::gather("year", "pressure_score", starts_with("uv")) %>%
filter(rgn_id <= 250) # filter out non OHI global regions
uv_data <- data %>%
mutate(year = substring(year, 9, 12)) %>%
mutate(year = as.numeric(year)) %>%
dplyr::select(rgn_id, rgn_name, year, pressure_score)
# ## visualize data using googleVis plot
# plotData <- uv_data %>%
# dplyr::select(rgn_name, year, pressure_score)
#
# motion = gvisMotionChart(plotData,
# idvar = "rgn_name",
# timevar = "year")
#
# plot(motion)
# print(motion, file = "uv.html")
# # This code above is mostly no longer supported due to use of Flash
## visualize data using plotly::ggplotly
# grab relevant variables
plotData <- uv_data %>%
dplyr::select(rgn_name, year, pressure_score)
# create graph
uv_graph <- ggplot(plotData, aes(x = year, y = pressure_score, color = rgn_name)) +
geom_line() +
labs(x = "Year",
y = "Pressure Score",
color = "Region Name")
# convert to ggplotly graph
ggplotly_graph <- ggplotly(uv_graph)
ggplotly_graph
# save plot
htmlwidgets::saveWidget(ggplotly_graph, file = "uv.html")
## save data layer
uv_data_selected <- uv_data %>%
dplyr::select(rgn_id, year, pressure_score)
write.csv(uv_data_selected, "output/uv.csv", row.names = FALSE)
## This top part was added in 2022 to help with data checks to be able to keep track of the
## data year and which version year the plot is being generated from See conversation in
## https://github.com/OHI-Science/globalfellows-issues/issues/210 to understand why... maybe.
## mostly the point is to get nice plot axes
data_year_1 <- current_yr - 1
data_year_2 <- current_yr - 2
version_year_1 <- paste0("v", current_yr - 0)
version_year_2 <- paste0("v", current_yr - 1)
uv_data_1 <- read.csv(paste0("../", version_year_1, "/output/uv.csv")) %>%
filter(year == data_year_1)
uv_data_2 <- read.csv(paste0("../",version_year_2, "/output/uv.csv")) %>%
filter(year == data_year_2)
combined <- uv_data_1 %>% # data years lag assessment yrs by 1
select(-year, new_pressure = pressure_score) %>%
left_join(uv_data_2, by = c("rgn_id")) %>%
rename(old_pressure = pressure_score)
plot_diff <- ggplot(combined, aes(new_pressure, old_pressure, label = rgn_id)) +
geom_point() +
geom_abline() +
labs(title = "UV Pressure",
x = paste("Data year", data_year_1, "-", version_year_1),
y = paste("Data year", data_year_2, "-", version_year_2)) +
theme_minimal()
ggplotly(plot_diff)
## check also the most recent year in common
uv_data_3 <- read.csv(paste0("../", version_year_1, "/output/uv.csv")) %>%
filter(year == data_year_2)
combined_2 <- uv_data_2 %>% # data years lag assessment yrs by 1
select(-year, new_pressure = pressure_score) %>%
left_join(uv_data_3, by = c("rgn_id")) %>%
rename(old_pressure = pressure_score)
plot_diff_2 <- ggplot(combined_2, aes(new_pressure, old_pressure, label = rgn_id)) +
geom_point() +
geom_abline() +
labs(title = "UV Pressure",
x = paste("Data year", data_year_2, "-", version_year_1),
y = paste("Data year", data_year_2, "-", version_year_2)) +
theme_minimal()
ggplotly(plot_diff_2)
## check also the two most recent years for the latest version
uv_data_4 <- read.csv(paste0("../", version_year_1, "/output/uv.csv")) %>%
filter(year == data_year_1)
combined_3 <- uv_data_4 %>% # data years lag assessment yrs by 1
select(-year, new_pressure = pressure_score) %>%
left_join(uv_data_3, by = c("rgn_id")) %>%
rename(old_pressure = pressure_score)
plot_diff_3 <- ggplot(combined_3, aes(new_pressure, old_pressure, label = rgn_id)) +
geom_point() +
geom_abline() +
labs(title = "UV Pressure",
x = paste("Data year", data_year_1, "-", version_year_1),
y = paste("Data year", data_year_2, "-", version_year_1)) +
theme_minimal()
ggplotly(plot_diff_3)
# ggplot(combined %>% mutate(difference = new_pressure - old_pressure), aes(difference)) + geom_histogram(binwidth = 0.002)