diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..82d7124626f4fd4248e450af4206e19f07bee7bf --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +temp/* +*.rds \ No newline at end of file diff --git a/00_population/population.R b/00_population/population.R new file mode 100644 index 0000000000000000000000000000000000000000..132f9473141d8cc65114db4bf67c1a9a1dbb8c5b --- /dev/null +++ b/00_population/population.R @@ -0,0 +1,72 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: process NASA GPW Data +## +## Purpose of script: process NASA GPW Data +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +# Process NASA GPW Data +# https://cmr.earthdata.nasa.gov/search/concepts/C1597158029-SEDAC.html + +# Load packages +library(raster) +library(dplyr) +library(purrr) +library(readr) + +# Define bounding box filter early to reduce memory usage +bbox <- extent(-34, 45, 25, 75) + +# File names and corresponding years +files <- list.files("data", full.names = TRUE) +files <- files[grepl("Global_\\d{4}", files)] +years <- c(2000, 2005, 2010, 2015, 2020) + +# Preallocate list to store extracted data +extracted_data <- list() +coords <- NULL + +for (i in seq_along(files)) { + r <- raster(files[i]) + year <- years[i] + + r_crop <- crop(r, bbox) + + rm(r) + # Convert to data frame with coordinates + df <- as.data.frame(r_crop, xy = TRUE) + rm(r_crop) + + # Only keep values within the bounding box + df <- df %>% filter(x > -34 & x < 45 & y > 25 & y < 75) + + if (is.null(coords)) { + coords <- df[, c("x", "y")] + } + + # Store the values + write_rds(df[[3]], paste0("data/gpw_", year, ".rds")) + rm(df) +} + +for (i in seq_along(files)) { + year <- years[i] + # Read the data back in + df <- read_rds(paste0("data/gpw_", year, ".rds")) + # Store in the list + extracted_data[[i]] <- df + rm(df) +} + +# Combine into final data frame +final_df <- bind_cols(coords, setNames(as.data.frame(extracted_data), years)) +rm(extracted_data) + +final_df$population <- rowMeans(final_df[, 3:ncol(final_df)], na.rm = TRUE) +# Save the result +write_rds(final_df, "data/population.rds") \ No newline at end of file diff --git a/01_shapefiles/create_shapefiles.R b/01_shapefiles/create_shapefiles.R new file mode 100644 index 0000000000000000000000000000000000000000..98a30d51e945929477ef8bd8044a82fa3de0b6ea --- /dev/null +++ b/01_shapefiles/create_shapefiles.R @@ -0,0 +1,172 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: process shapefiles +## +## Purpose of script: process shapefiles +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +library(sf) +library(dplyr) +library(readr) +library(gridExtra) + +# select parameters: +region <- "europe" +level <- "NUTS3" + +get_shapes <- function(level) { + if (level == "europe") { + shp <- sf::st_read("data/WHO_Europe/new_who_europe.shp") + shp <- shp[which(shp$country != "Russian Federation" & + shp$country != "Moldova" & + shp$country != "Ukraine" & + shp$country != "Andorra" & + shp$country != "Belarus" & + shp$erp_sb_ != "Central Asia"), ] + shp <- shp[which(shp$country != "Armenia" & + shp$country != "Azerbaijan" & + shp$country != "Georgia" & + shp$country != "Israel"), ] + + shp <- shp %>% summarise(geometry = sf::st_union(geometry)) + shp$adm1_id <- "europe" + shp$adm1_name <- "europe" + sf::st_write(shp, dsn = "data/outputs", layer = "europe", + driver = "ESRI Shapefile", delete_layer = TRUE) + + } else if (level == "IS03") { + adm3 <- sf::st_read("data/WHO_Europe/new_who_europe.shp") + ISO2 <- adm3$ISO2 + ISO3 <- adm3$ISO3 + + adm1 <- sf::st_read("data/NUTS_2024/NUTS_RG_01M_2024_3857.shp") + adm1 <- adm1[which(adm1$CNTR_CODE %in% adm3$ISO2), ] + # remove peripheral regions + adm1 <- adm1[which(adm1$NUTS_ID != "FRY" & + adm1$NUTS_ID != "FRY1" & + adm1$NUTS_ID != "FRY2" & + adm1$NUTS_ID != "FRY3" & + adm1$NUTS_ID != "FRY4" & + adm1$NUTS_ID != "FRY5" & + adm1$NUTS_ID != "FRY10" & + adm1$NUTS_ID != "FRY20" & + adm1$NUTS_ID != "FRY30" & + adm1$NUTS_ID != "FRY40" & + adm1$NUTS_ID != "FRY50" & + adm1$NUTS_ID != "NO0B" & + adm1$NUTS_ID != "NO0B1" & + adm1$NUTS_ID != "NO0B2"), ] + adm1 <- adm1[adm1$LEVL_CODE == 3, ] + adm1 <- adm1 %>% + group_by(CNTR_CODE) %>% + summarise(geometry = sf::st_union(geometry), .groups = "drop") + + adm2 <- sf::st_read(paste0("data/NUTS_2021/", + "NUTS_RG_01M_2021_3857_LEVL_3.shp")) + adm2 <- adm2[which(adm2$CNTR_CODE == "UK"), ] + adm2 <- sf::st_transform(adm2, sf::st_crs(adm1)) + adm2 <- adm2 %>% + group_by(CNTR_CODE) %>% + summarise(geometry = sf::st_union(geometry), .groups = "drop") + + adm3 <- adm3[which(adm3$ISO3 == "BIH"), ] + adm3 <- sf::st_transform(adm3, sf::st_crs(adm1)) + + colnames(adm1)[which(colnames(adm1) == "CNTR_CODE")] <- "adm1_id" + colnames(adm2)[which(colnames(adm2) == "CNTR_CODE")] <- "adm1_id" + + adm3 <- adm3[, which(colnames(adm3) %in% c("ISO2", + "geometry"))] + colnames(adm3)[which(colnames(adm3) == "ISO2")] <- "adm1_id" + + # merge all dataframes + shp <- rbind(adm1, adm2, adm3) + ISO3 <- ISO3[which(ISO2 %in% shp$adm1_id)] + + + sf::st_write(shp, dsn = "data/outputs", layer = "ISO3", + driver = "ESRI Shapefile", delete_layer = TRUE) + + } else if (level == "european_region") { + shp <- sf::st_read("data/WHO_Europe/new_who_europe.shp") + shp <- shp[which(shp$country != "Russian Federation" & + shp$country != "Moldova" & + shp$country != "Ukraine" & + shp$country != "Andorra" & + shp$country != "Belarus" & + shp$erp_sb_ != "Central Asia"), ] + shp <- shp[which(shp$country != "Armenia" & + shp$country != "Azerbaijan" & + shp$country != "Georgia" & + shp$country != "Israel"), ] + # group by European region + shp <- shp %>% + group_by(erp_sb_) %>% + summarise(geometry = sf::st_union(geometry), .groups = "drop") + + # rename columns + colnames(shp)[which(colnames(shp) == "erp_sb_")] <- "adm1_id" + shp$adm1_name <- shp$adm1_id + # save shp + sf::st_write(shp, dsn = "data/outputs", layer = "european_region", + driver = "ESRI Shapefile", delete_layer = TRUE) + + } else if (level == "NUTS3") { + adm1 <- sf::st_read("data/NUTS_2024/NUTS_RG_01M_2024_3857.shp") + + # remove peripheral regions + adm1 <- adm1[which(adm1$NUTS_ID != "FRY" & + adm1$NUTS_ID != "FRY1" & + adm1$NUTS_ID != "FRY2" & + adm1$NUTS_ID != "FRY3" & + adm1$NUTS_ID != "FRY4" & + adm1$NUTS_ID != "FRY5" & + adm1$NUTS_ID != "FRY10" & + adm1$NUTS_ID != "FRY20" & + adm1$NUTS_ID != "FRY30" & + adm1$NUTS_ID != "FRY40" & + adm1$NUTS_ID != "FRY50" & + adm1$NUTS_ID != "NO0B" & + adm1$NUTS_ID != "NO0B1" & + adm1$NUTS_ID != "NO0B2"), ] + adm1 <- adm1[adm1$LEVL_CODE == 3, ] + + adm2 <- sf::st_read(paste0("data/NUTS_2021/", + "NUTS_RG_01M_2021_3857_LEVL_3.shp")) + adm2 <- adm2[which(adm2$CNTR_CODE == "UK"), ] + adm2 <- sf::st_transform(adm2, sf::st_crs(adm1)) + adm3 <- sf::st_read("data/WHO_Europe/new_who_europe.shp") + adm3 <- adm3[which(adm3$ISO3 == "BIH"), ] + adm3 <- sf::st_transform(adm3, sf::st_crs(adm1)) + + adm1 <- adm1[, which(colnames(adm1) %in% c("NUTS_ID", + "NUTS_NAME", + "geometry"))] + colnames(adm1)[which(colnames(adm1) == "NUTS_ID")] <- "adm1_id" + colnames(adm1)[which(colnames(adm1) == "NUTS_NAME")] <- "adm1_name" + + adm2 <- adm2[, which(colnames(adm2) %in% c("NUTS_ID", + "NUTS_NAME", + "geometry"))] + colnames(adm2)[which(colnames(adm2) == "NUTS_ID")] <- "adm1_id" + colnames(adm2)[which(colnames(adm2) == "NUTS_NAME")] <- "adm1_name" + + adm3 <- adm3[, which(colnames(adm3) %in% c("ISO3", + "country", + "geometry"))] + colnames(adm3)[which(colnames(adm3) == "ISO3")] <- "adm1_id" + colnames(adm3)[which(colnames(adm3) == "country")] <- "adm1_name" + + # merge all dataframes + shp <- rbind(adm1, adm2, adm3) + + # save shp + sf::st_write(shp, dsn = "data/outputs", layer = "NUTS3", + driver = "ESRI Shapefile", delete_layer = TRUE) + } +} \ No newline at end of file diff --git a/02_calculate_spei/calculate_spei.R b/02_calculate_spei/calculate_spei.R new file mode 100755 index 0000000000000000000000000000000000000000..4de6fb4e6872d2a24107939eafd04cf2cf2f7cc8 --- /dev/null +++ b/02_calculate_spei/calculate_spei.R @@ -0,0 +1,92 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: calculate gridded SPEI +## +## Purpose of script: Calculate gridded SPEI +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +## load up the packages +packages <- c("startR", "s2dv", "CSTools", "multiApply", "ClimProjDiags", + "SPEI", "zoo", "TLMoments", "lmomco", "lmom", "lubridate", "dplyr", + "raster", "sf") +install.packages(setdiff(packages, rownames(installed.packages()))) +lapply(packages, require, character.only = TRUE) + +## load local functions +source("support_functions/load_data.R") +source("support_functions/save_SPEI.R") +source("support_functions/convert_raster.R") + +## --------------------------- +# select parameters: +region <- "europe" +accum <- c(6, 12) # c(1, 3, 6, 12) +calculate_spi <- FALSE +calculate_spei <- TRUE +dataset <- "era5land" +start_year <- 1980 +end_year <- 2024 +start_month <- 1 +end_month <- 12 +ref_period <- c(1981, 2010) + +# flags to load input variables, calculate SPEI and convert to raster +load_data <- TRUE +save_spei <- TRUE +convert_spei_raster <- TRUE +pop_raster <- FALSE + +# set coordinates +lats_min <- 25 +lats_max <- 75 +lons_min <- -34 +lons_max <- 45 + +vars <- c("spi"[calculate_spi], "spei"[calculate_spei]) + + +if (load_data == TRUE) { + # obtain dates for startR call: + for (mm in start_month:end_month){ + if (mm == start_month) { + dates <- as.Date(paste0(start_year:end_year, "-", start_month, "-1")) + } else { + dates <- append(dates, as.Date(paste0(start_year:end_year, "-", mm, "-1"))) + } + } + dates <- dates[order(dates)] + dates <- array(substr(gsub("-", "", as.character(dates)), 1, 6), + dim = c(time = (end_month - start_month + 1), + syear = (end_year - start_year + 1))) + + load_data_era5land(dates, region, + lats_min, lats_max, + lons_min, lons_max, + calculate_spei) +} + +# SPEI ---- +if (save_spei == TRUE) { + for (i in accum) { + save_SPEI_era5land(region, i, + calculate_spi, calculate_spei, + ref_period) + } +} + +if (convert_spei_raster == TRUE) { + # convert to raster ------------------------------------------------------- + for (i in accum) { + convert_to_raster_era5land(i, vars, region) + } +} + +if (pop_raster == TRUE) { + # create population raster ----------------------------------------------- + out <- convert_to_raster_population_era5land(region, vars, accum) +} diff --git a/02_calculate_spei/save_PET.R b/02_calculate_spei/save_PET.R new file mode 100644 index 0000000000000000000000000000000000000000..813e8e30bab654af51009a994d289bfa4f49e255 --- /dev/null +++ b/02_calculate_spei/save_PET.R @@ -0,0 +1,36 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: save components of SPEI +## +## Purpose of script: save PET and precipitation +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +# load packages +packages <- c("sf", "lubridate", "zoo", "raster", "dplyr", "SPEI", + "multiApply", "tidyr", "ggplot2", "CSIndicators", "CSTools") +lapply(packages, require, character.only = TRUE) + +## load local functions +source("support_functions/load_data.R") +source("support_functions/save_SPEI.R") +source("support_functions/convert_raster.R") + +region <- "europe" + +# calculate PET: load climate +tasmax <- readRDS("data/europe_tasmax.rds") +tasmin <- readRDS("data/europe_tasmin.rds") +data <- list('tmax' = tasmax, 'tmin' = tasmin) +pet <- CST_PeriodPET(data = data, pet_method = "hargreaves") +saveRDS(pet, "data/europe_PET.rds") +rm(tasmax, tasmin, data, pet) + + +# convert to raster +convert_to_raster_input_variables("prlr", region) +convert_to_raster_input_variables("PET", region) \ No newline at end of file diff --git a/02_calculate_spei/save_masked_indicators.R b/02_calculate_spei/save_masked_indicators.R new file mode 100755 index 0000000000000000000000000000000000000000..fd8cdc40cf5a1b2f5133ee0cad75df567bd18ae4 --- /dev/null +++ b/02_calculate_spei/save_masked_indicators.R @@ -0,0 +1,41 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: mask indicators by land +## +## Purpose of script: mask indicators by land +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +# load packages +packages <- c("sf", "lubridate", "zoo", "raster", "dplyr", + "tidyr", "ggplot2", "terra") +lapply(packages, require, character.only = TRUE) + +region <- "europe" + +rasterOptions(tmpdir = "/scratch/eball") +terraOptions( tmpdir = "/scratch/eball") + +# load shapefile +adm1 <- sf::st_read("../01_shapefiles/data/outputs/europe.shp") + +adm1_proj <- st_transform(adm1, crs = 4326) +adm1_vect <- vect(adm1_proj) +rm(adm1_proj, adm1) + +for (var in c("spei_12")) {#, "spei_6", "prlr", "PET")) { + data_stack <- readRDS(paste0("data/europe_", var, "_stack.rds")) + data_stack_rast <- rast(data_stack) + rm(data_stack) + data_masked <- NULL + for (i in 1:nlyr(data_stack_rast)) { + data_masked[[i]] <- mask(crop(data_stack_rast[[i]], adm1_vect), adm1_vect) + print(i) + } + saveRDS(rast(data_masked), paste0("data/temp/europe-masked_", var, ".rds")) + rm(data_masked, data_stack_rast) +} \ No newline at end of file diff --git a/02_calculate_spei/support_functions/PeriodPET.R b/02_calculate_spei/support_functions/PeriodPET.R new file mode 100644 index 0000000000000000000000000000000000000000..f365d39b444bca4aca8108e7b4f33523c896ba4c --- /dev/null +++ b/02_calculate_spei/support_functions/PeriodPET.R @@ -0,0 +1,409 @@ +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'This function is build to work be compatible with other tools in +#'that work with 's2dv_cube' object class. The input data must be this object +#'class. If you don't work with 's2dv_cube', see PeriodPET. For more information +#'on the SPEI calculation, see functions CST_PeriodStandardization and +#'CST_PeriodAccumulation. +#' +#'@param data A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +#' 'tmean' is required. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. +#'@param dates An array of temporal dimensions containing the Dates of +#' 'data'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'data'. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#' +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#' +#'lat <- c(40) +#' +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#' +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@import SPEI +#'@import lubridate +#'@import multiApply +#'@export +CST_PeriodPET <- function(data, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!all(sapply(data, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'data' must be a list of 's2dv_cube' class.") + } + # latitude + if (!any(names(data[[1]]$coords) %in% .KnownLatNames())) { + stop("Spatial coordinate names of parameter 'data' do not match any ", + "of the names accepted by the package.") + } + # Dates + dates_exp <- data[[1]]$attrs$Dates + if (!'Dates' %in% names(data[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'data'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + } + lat_dim <- names(data[[1]]$coords)[[which(names(data[[1]]$coords) %in% .KnownLatNames())]] + + res <- PeriodPET(data = lapply(data, function(x) x$data), + dates = data[[1]]$attrs$Dates, + lat = data[[1]]$coords[[lat_dim]], + pet_method = pet_method, time_dim = time_dim, + leadtime_dim = leadtime_dim, lat_dim = lat_dim, + na.rm = na.rm, ncores = ncores) + # Add metadata + source_files <- lapply(data, function(x) {x$attrs$source_files}) + coords <- data[[1]]$coords + Dates <- data[[1]]$attrs$Dates + metadata <- data[[1]]$attrs$Variable$metadata + metadata_names <- intersect(names(dim(res)), names(metadata)) + suppressWarnings( + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = paste0('PET'), + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + ) + return(res) +} + +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'For more information on the SPEI calculation, see functions +#'PeriodStandardization and PeriodAccumulation. +#' +#'@param data A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +#' 'tmean' is required. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. +#'@param dates An array of temporal dimensions containing the Dates of +#' 'data'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'data'. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#' +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#' +#'lat <- c(40) +#' +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#' +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@import SPEI +#'@import lubridate +#'@import multiApply +#'@export +PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + + # Initial checks + ## data + if (!inherits(data, 'list')) { + stop("Parameter 'data' needs to be a named list with the needed variables.") + } + if (is.null(names(data))) { + stop("Parameter 'data' needs to be a named list with the variable names.") + } + if (any(sapply(data, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'data' needs to be a list of arrays with dimension names.") + } + dims <- lapply(data, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'data' variables need to have the same dimensions.") + } + # lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be numeric.") + } + if (!lat_dim %in% names(dims[[1]])) { + stop("Parameter 'data' must have 'lat_dim' dimension.") + } + if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'data'.") + } + + # data (2) + if (all(c('tmin', 'tmax', 'pr') %in% names(data))) { + # hargreaves modified: 'tmin', 'tmax', 'pr' and 'lat' + if (!(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") + pet_method <- 'hargreaves_modified' + } + } else if (all(c('tmin', 'tmax') %in% names(data))) { + if (!(pet_method %in% c('hargreaves'))) { + warning("Parameter 'pet_method' will be set as 'hargreaves'.") + pet_method <- 'hargreaves' + } + } else if (c('tmean') %in% names(data)) { + # thornthwaite: 'tmean' (mean), 'lat' + if (!(pet_method == 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method <- 'thornthwaite' + } + } else { + stop("Parameter 'data' needs to be a named list with accepted ", + "variable names. See documentation.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(data, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!all(sapply(data, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + ## lat_dim + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!all(sapply(data, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'data' dimension.") + } + # dates + if (is.null(dates)) { + stop("Parameter 'dates' is missing, dates must be provided.") + } + if (!(is.Date(dates)) & !(is.POSIXct(dates))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (!all(dim(data[[1]])[c(time_dim, leadtime_dim)] == + dim(dates)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates' needs to have the same length as 'time_dim' ", + "and 'leadtime_dim' as 'data'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # complete dates + dates_monthly <- .datesmask(dates) + + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) + + # extract mask of NA locations to return to NA the final result + mask_na <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + varnames <- c('tmax', 'tmin') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + varnames <- c('tmax', 'tmin', 'pr') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 + mask_na[which(is.na(data$pr))] <- 0 + } else if (pet_method == 'thornthwaite') { + varnames <- c('tmean') + mask_na[which(is.na(data$tmean))] <- 0 + } + + # replace NA with 0 + for (dd in 1:length(data)) { + data[[dd]][which(is.na(data[[dd]]))] <- 0 + } + + # prepare data + target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) + pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), + fun = .pet, + dates_monthly = dates_monthly, pet_method = pet_method, + leadtime_dim = leadtime_dim, time_dim = time_dim, + output_dims = c(leadtime_dim, time_dim), + ncores = ncores)$output1 + # reorder dims in pet_estimated + pos <- match(names(dim(data[[1]])), names(dim(pet))) + pet <- aperm(pet, pos) + + # restore original NAs from mask_na + pet[which(mask_na == 0)] <- NA + + return(pet) +} + +.pet <- function(lat_mask, data2, data3 = NULL, data4 = NULL, + dates_monthly, pet_method = 'hargreaves', + leadtime_dim = 'time', time_dim = 'syear') { + + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = FALSE, verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + + if (pet_method == 'hargreaves_modified') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, + verbose = FALSE) + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + + if (pet_method == 'thornthwaite') { + pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, + verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + return(pet) +} + + +.datesmask <- function(dates) { + ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) + end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) + daily <- as.Date(ini:end) + monthly <- daily[which(lubridate::day(daily) == 1)] + dates_mask <- array(0, dim = length(monthly)) + for (dd in 1:length(dates)) { + ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_mask[ii] <- 1 + } + return(dates_mask) +} \ No newline at end of file diff --git a/02_calculate_spei/support_functions/PeriodSPEI.R b/02_calculate_spei/support_functions/PeriodSPEI.R new file mode 100644 index 0000000000000000000000000000000000000000..7903cada2ab64e9aea750aba77254ef452af4e1b --- /dev/null +++ b/02_calculate_spei/support_functions/PeriodSPEI.R @@ -0,0 +1,1130 @@ +#'Compute the Standardised Precipitation-Evapotranspiration Index +#' +#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +#'that is a multiscalar drought index based on climatic data. It can be used for +#'determining the onset, duration and magnitude of drought conditions with +#'respect to normal conditions in a variety of natural and managed systems such +#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +#'is to compare the highest possible evapotranspiration with the current water +#'availability. The SPEI uses the monthly (or weekly) difference between +#'precipitation and potential evapotranspiration. This represents a simple +#'climatic water balance which is calculated at different time scales to obtain +#'the SPEI. This function is build to work be compatible with other tools in +#'that work with 's2dv_cube' object class. The input data must be this object +#'class. If you don't work with 's2dv_cube', see PeriodSPEI. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. On the one hand, the model to be used to calculate potential +#'evapotranspiration is specified with the pet_method parameter (hargreaves, +#'hargraves modified or thornwhite). On the other hand, to choose the time scale +#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +#'accum parameter, where you must indicate the number of time steps you want to +#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'elapsed time steps, there will be no complete accumulations until reaching the +#'time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. Also, there is a parameter to specify if the +#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +#'probability distribution to transform the original values to standardized +#'units that are comparable in space and time and at different SPEI time scales. +#'The na.rm parameter is a logical parameter used to decide whether to remove +#'the NA values from the data before doing the calculation. It must be taken +#'into account that if na.rm == FALSE and there is some NA value in the specific +#'coordinates which the SPEI is computed, standardization cannot be carried out +#'for those coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. However, when na.rm == TRUE, if the amount of data for +#'those specific coordinates is smaller than 4, it will not be possible to carry +#'out because we will not have enough data and the result will be also filled +#'with NAs for that coordinates. +#' +#'@param exp A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. +#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each +#' variable in which the quantile PeriodSPEI should be applied. If it is not +#' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params A multidimensional array with named dimensions for computing the +#' SPEI. This option overrides computation of fitting parameters. It needs +#' to have the following dimensions: same leadtime dimension of 'exp' +#' (specified in 'leadtime_dim'); time dimension of length 1 (specified in +#' 'time_dim'); and a dimension named 'coef' with the length of the +#' coefficients needed for the used distribution (for 'Gamma' coef dimension is +#' of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). It can't +#' have member dimension (specified in 'memb_dim'). +#'@param pet_exp A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the +#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided +#' variables with specified 'pet_method'. It is NULL by default. +#'@param pet_expcor A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the +#' provided variables with specified 'pet_method'. It is NULL by default. +#'@param standardization A logical value indicating wether the standardization +#' is computed. +#'@param cross_validation A logical value indicating if cross validation is +#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +#' is not provided. It is FALSE by default. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An 's2dv_cube' object containing the SPEI multidimensional array in +#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +#'will be provided. The parameters of the standardization will only be returned +#'if 'return_params' is TRUE. The SPEI will only be computed if +#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic +#'water balance (precipitation minus evapotranspiration) will be returned. The +#'resultant arrays will have the same dimensions as the initial input data. The +#'other elements in the 's2dv_cube' will be updated with the combined +#'information of the input data arrays. +#' +#'@examples +#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#' latitude = 2, longitude = 1, ensemble = 25) +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#' latitude = 2, longitude = 1, ensemble = 15) +#' +#'dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16")), 'UTC') +#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), +#' paste0(2020, "-10-16")), 'UTC') +#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) +#'lat <- c(40,40.1) +#' +#'exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) +#' +#'expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +#' +#'exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) +#'exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, +#' 'pr' = expcor_pr) +#' +#'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), +#' Dates = dates_exp) +#'exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), +#' Dates = dates_expcor) +#' +#'res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) +#' +#'@import multiApply +#'@import ClimProjDiags +#'@import SPEI +#'@import zoo +#'@import TLMoments +#'@import lmomco +#'@import lmom +#'@import lubridate +#'@import CSTools +#'@export +CST_PeriodSPEI <- function(exp, exp_cor = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, pet_expcor = NULL, + standardization = TRUE, cross_validation = FALSE, + pet_method = 'hargreaves', method = 'parametric', + distribution = 'log-Logistic', + param_error = -9999, handle_infinity = FALSE, + return_params = FALSE, na.rm = FALSE, + ncores = NULL) { + + # Check 's2dv_cube' + if (is.null(exp)) { + stop("Parameter 'exp' cannot be NULL.") + } + if (!all(sapply(exp, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'exp' must be a list of 's2dv_cube' class.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'exp_cor' must be a list of 's2dv_cube' class.") + } + } + # latitude + if (!any(names(exp[[1]]$coords) %in% .KnownLatNames())) { + stop("Spatial coordinate names of parameter 'exp' do not match any ", + "of the names accepted by the package.") + } + # Dates + dates_exp <- exp[[1]]$attrs$Dates + if (!'Dates' %in% names(exp[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'exp'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + } + + if (!is.null(exp_cor)) { + if (!'Dates' %in% names(exp_cor[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + } + } + + lat_dim <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] + + res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), + dates_exp = exp[[1]]$attrs$Dates, + lat = exp[[1]]$coords[[lat_dim]], + exp_cor = if (is.null(exp_cor)) {NULL} else { + lapply(exp_cor, function(x) x$data)}, + dates_expcor = exp_cor[[1]]$attrs$Dates, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, lat_dim = lat_dim, + accum = accum, ref_period = ref_period, params = params, + pet_exp = pet_exp, pet_expcor = pet_expcor, + standardization = standardization, + cross_validation = cross_validation, + pet_method = pet_method, method = method, + distribution = distribution, + param_error = param_error, handle_infinity = handle_infinity, + return_params = return_params, na.rm = na.rm, + ncores = ncores) + + if (!is.null(exp_cor)) { + source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) + source_files <- lapply(exp, function(x) {x$attrs$source_files}) + source_files <- c(exp = source_files, exp_cor = source_files_expcor) + coords <- exp_cor[[1]]$coords + Dates <- exp_cor[[1]]$attrs$Dates + metadata <- exp_cor[[1]]$attrs$Variable$metadata + metadata_names <- names(metadata) + } else { + source_files <- lapply(exp, function(x) {x$attrs$source_files}) + coords <- exp[[1]]$coords + Dates <- exp[[1]]$attrs$Dates + metadata <- exp[[1]]$attrs$Variable$metadata + metadata_names <- names(metadata) + } + + if (standardization) { + varname <- 'SPEI' + } else { + varname <- 'Precipitation minus accumulated PET' + } + + if (return_params & standardization) { + metadata_names <- intersect(names(dim(res[[1]])), metadata_names) + suppressWarnings( + res[[1]] <- CSTools::s2dv_cube(data = res[[1]], coords = coords, + varName = varname, + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + ) + return(list(spei = res[[1]], params = res[[2]])) + } else { + metadata_names <- intersect(names(dim(res)), metadata_names) + suppressWarnings( + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = varname, + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + ) + return(res) + } +} + + +#'Compute the Standardised Precipitation-Evapotranspiration Index +#' +#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +#'that is a multiscalar drought index based on climatic data. It can be used for +#'determining the onset, duration and magnitude of drought conditions with +#'respect to normal conditions in a variety of natural and managed systems such +#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +#'is to compare the highest possible evapotranspiration with the current water +#'availability. The SPEI uses the monthly (or weekly) difference between +#'precipitation and potential evapotranspiration. This represents a simple +#'climatic water balance which is calculated at different time scales to obtain +#'the SPEI. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. On the one hand, the model to be used to calculate potential +#'evapotranspiration is specified with the pet_method parameter (hargreaves, +#'hargraves modified or thornwhite). On the other hand, to choose the time scale +#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +#'accum parameter, where you must indicate the number of time steps you want to +#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'elapsed time steps, there will be no complete accumulations until reaching the +#'time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. Also, there is a parameter to specify if the +#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +#'probability distribution to transform the original values to standardized +#'units that are comparable in space and time and at different SPEI time scales. +#'The na.rm parameter is a logical parameter used to decide whether to remove +#'the NA values from the data before doing the calculation. It must be taken +#'into account that if na.rm == FALSE and there is some NA value in the specific +#'coordinates which the SPEI is computed, standardization cannot be carried out +#'for those coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. However, when na.rm == TRUE, if the amount of data for +#'those specific coordinates is smaller than 4, it will not be possible to carry +#'out because we will not have enough data and the result will be also filled +#'with NAs for that coordinates. +#' +#'@param exp A named list with multidimensional array objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. +#'@param dates_exp An array of temporal dimensions containing the Dates of +#' 'exp'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'exp'. +#'@param exp_cor A named list with multidimensional array objects for each +#' variable in which the quantile PeriodSPEI should be applied. If it is not +#' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param dates_expcor An array of temporal dimensions containing the Dates of +#' 'exp_cor'. It must be of class 'Date' or 'POSIXct'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param accum accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params A multidimensional array with named dimensions for computing the +#' SPEI. This option overrides computation of fitting parameters. It needs +#' to be of same time dimension (specified in 'time_dim') of 'exp' and a +#' dimension named 'coef' with the length of the coefficients needed for the +#' used distribution (for 'Gamma' coef dimension is of lenght 2, for +#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +#' leadtime dimension (specified in 'leadtime_dim') of length 1. +#'@param pet_exp A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the +#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided +#' variables with specified 'pet_method'. It is NULL by default. +#'@param pet_expcor A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the +#' provided variables with specified 'pet_method'. It is NULL by default. +#'@param standardization A logical value indicating wether the standardization +#' is computed. +#'@param cross_validation A logical value indicating if cross validation is +#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +#' is not provided. It is FALSE by default. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An 's2dv_cube' object containing the SPEI multidimensional array in +#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +#'will be provided. The parameters of the standardization will only be returned +#'if 'return_params' is TRUE. The SPEI will only be computed if +#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic +#'water balance (precipitation minus evapotranspiration) will be returned. The +#'resultant arrays will have the same dimensions as the initial input data. +#' +#'@examples +#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#' latitude = 2, longitude = 1, ensemble = 25) +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#' latitude = 2, longitude = 1, ensemble = 15) +#'exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) +#' +#'expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +#' +#'dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16"))) +#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), +#' paste0(2020, "-09-15"), +#' paste0(2020, "-10-16")), 'UTC') +#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) +#'lat <- c(40,40.1) +#' +#'exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) +#'exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, +#' 'pr' = expcor_pr) +#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, +#' dates_exp = dates_exp, dates_expcor = dates_expcor) +#' +#'@import multiApply +#'@import ClimProjDiags +#'@import SPEI +#'@import zoo +#'@import TLMoments +#'@import lmomco +#'@import lmom +#'@import lubridate +#'@export +PeriodSPEI <- function(exp, dates_exp, lat, + exp_cor = NULL, dates_expcor = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, pet_expcor = NULL, + standardization = TRUE, cross_validation = FALSE, + pet_method = 'hargreaves', method = 'parametric', + distribution = 'log-Logistic', + param_error = -9999, handle_infinity = FALSE, + return_params = FALSE, na.rm = FALSE, ncores = NULL) { + + # Initial checks + ## exp + if (!inherits(exp, 'list')) { + stop("Parameter 'exp' needs to be a named list with the needed variables.") + } + if (is.null(names(exp))) { + stop("Parameter 'exp' needs to be a named list with the variable names.") + } + if (any(sapply(exp, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'exp' needs to be a list of arrays with dimension names.") + } + dims <- lapply(exp, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'exp' variables need to have the same dimensions.") + } + + ## exp_cor + if (!is.null(exp_cor)) { + if (!inherits(exp_cor, 'list')) { + stop("Parameter 'exp_cor' needs to be a named list with the needed ", + "variables if it is not NULL.") + } + if (is.null(names(exp_cor))) { + stop("Parameter 'exp_cor' needs to be a named list with the variable names.") + } + if (any(sapply(exp_cor, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'exp_cor' needs to be a list of arrays with dimension names.") + } + dimscor <- lapply(exp_cor, function(x) dim(x)) + first_dimscor <- dimscor[[1]] + all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dimscor, x))) + if (!all_equal) { + stop("Parameter 'exp_cor' variables need to have the same dimensions.") + } + } + # Variable checks + ## exp (2) + pet <- vector("list", 2) + if (!('pr' %in% names(exp))) { + stop("Variable 'pr' is not included in 'exp'.") + } + ## exp_cor (2) + if (!is.null(exp_cor)) { + if (!('pr' %in% names(exp_cor))) { + stop("Variable 'pr' is not included in 'exp_cor'.") + } + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) + } + } + + ## pet_exp + if (!is.null(pet_exp)) { + if (length(dim(exp[['pr']])) != length(dim(pet_exp))) { + stop("Parameter 'pet_exp' must have the same length of all the ", + "dimensions as variable 'pr' in 'exp'.") + } + if (!all(dim(exp[['pr']]) %in% dim(pet_exp))) { + stop("Parameter 'pet_exp' must have the same length of all the ", + "dimensions as variable 'pr' in 'exp'.") + } + if (any(names(dim(exp[['pr']])) != names(dim(pet_exp)))) { + pos <- match(names(dim(exp[['pr']])), names(dim(pet_exp))) + pet_exp <- aperm(pet_exp, pos) + } + pet[[1]] <- pet_exp + } else if (is.null(dates_exp)) { + stop("Parameter 'dates_exp' must be provided.") + } + ## pet_expcor + if (!is.null(exp_cor)) { + if (!is.null(pet_expcor)) { + if (length(dim(exp_cor[['pr']])) != length(dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'pr' in 'exp_cor'.") + } + if (!all(dim(exp_cor[['pr']]) %in% dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'pr' in 'exp_cor'.") + } + if (any(names(dim(exp_cor[['pr']])) != names(dim(pet_expcor)))) { + pos <- match(names(dim(exp_cor[['pr']])), names(dim(pet_expcor))) + pet_expcor <- aperm(pet_expcor, pos) + } + pet[[2]] <- pet_expcor + } else if (is.null(dates_expcor)) { + stop("Parameter 'dates_expcor' must be provided.") + } + } + + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'exp_cor' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) memb_dim %in% names(dim(x))))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) memb_dim %in% names(dim(x))))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + + ## lat_dim + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") + } + } + + ## dates + if (is.null(dates_exp)) { + stop("Parameter 'dates_exp' is missing, dates must be provided.") + } + if (!(is.Date(dates_exp)) & !(lubridate::is.POSIXct(dates_exp))) { + stop("Parameter 'dates_exp' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates_exp)) | !leadtime_dim %in% names(dim(dates_exp))) { + stop("Parameter 'dates_exp' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (!all(dim(exp[[1]])[c(time_dim, leadtime_dim)] == + dim(dates_exp)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates_exp' needs to have the same length as 'time_dim' ", + "and 'leadtime_dim' as 'exp'.") + } + + if (!is.null(exp_cor)) { + if (is.null(dates_expcor)) { + stop("Parameter 'dates_expcor' is missing, dates for 'exp_cor' must be ", + "provided if exp_cor is not NULL.") + } + if (!(is.Date(dates_expcor)) & !(lubridate::is.POSIXct(dates_expcor))) { + stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates_expcor)) | !leadtime_dim %in% names(dim(dates_expcor))) { + stop("Parameter 'dates_expcor' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (!all(dim(exp_cor[[1]])[c(time_dim, leadtime_dim)] == + dim(dates_expcor)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates_expcor' needs to have the same length as ", + "'time_dim' and 'leadtime_dim' as 'exp_cor'.") + } + } + + ## accum + if (accum > dim(exp[[1]])[leadtime_dim]) { + stop(paste0("Cannot compute accumulation of ", accum, " months because ", + "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) + } + + ## ref_period + if (!is.null(ref_period)) { + if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% year(dates_exp))) { + warning("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, + indices = 1)) + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + + ## standardization + if (!is.logical(standardization)) { + stop("Parameter 'standardization' must be a logical value.") + } + + ## param_error + if (!is.numeric(param_error)) { + stop("Parameter 'param_error' must be a numeric value.") + } + + ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } + + ## cross_validation + if (!is.logical(cross_validation)) { + stop("Parameter 'cross_validation' must be a logical value.") + } + if (!is.null(exp_cor)) { + if (cross_validation & standardization) { + warning("Detected 'cross_validation' = TRUE. It will be set as FALSE ", + "since 'exp_cor' is provided.") + cross_validation <- FALSE + } + } + + ## method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") + } + + ## distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") + } + + # Complete dates + dates <- .return2list(dates_exp, dates_expcor) + + # Compute PeriodSPEI + k = 0 + spei_res <- NULL + computed_pet <- FALSE + + for (data in .return2list(exp, exp_cor)) { + k = k + 1 + # Evapotranspiration estimation (unless pet is already provided) + if (is.null(pet[[k]]) | computed_pet) { + pet[[k]] <- PeriodPET(data = data, dates = dates[[k]], + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, leadtime_dim = leadtime_dim, + lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) + computed_pet <- TRUE + } + if (any(is.na(pet[[k]]))) { + mask_na <- which(is.na(pet[[k]])) + warning("There are NAs in PET.") + } + + # Accumulation + diff_p_pet <- data$pr - pet[[k]] + dates_monthly <- .datesmask(dates[[k]]) + accumulated <- .Accumulation(data = diff_p_pet, + dates_monthly = dates_monthly, accum = accum, + time_dim = time_dim, leadtime_dim = leadtime_dim, + ncores = ncores) + + # Standardization + if (standardization) { + spei <- .Standardization(data = accumulated, params = params, + accum = accum, time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, ref_period = ref_period, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + param_error = param_error, + method = method, distribution = distribution, + na.rm = TRUE, ncores = ncores) + + ref_period <- NULL + params <- spei$params + spei_res <- spei[[1]] + } else { + spei_res <- accumulated + } + pos <- match(names(dim(data[[1]])), names(dim(spei_res))) + spei_res <- aperm(spei_res, pos) + } + + if (standardization) { + if (return_params) { + return(list(spei = spei_res, params = params)) + } else { + return(spei_res) + } + } else { + return(spei_res) + } +} + +.Standardization <- function(data, params = NULL, accum = 1, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, cross_validation = FALSE, + handle_infinity = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + na.rm = FALSE, ncores = NULL) { + + nleadtime <- dim(data)[leadtime_dim] + ntime <- dim(data)[time_dim] + + if (!cross_validation) { + ntime <- 1 + } + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + + if (is.null(params)) { + params <- array(NA, dim = c(ntime, nleadtime, length(coef))) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), ntime, nleadtime)) + # dim(params): [time_dim, leadtime_dim, coef] + # with the values repeated each time_dim and leadtime_dim + params <- aperm(params, c(2,3,1)) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + fun = .standardization, + coef = coef, + leadtime_dim = leadtime_dim, + time_dim = time_dim, memb_dim = memb_dim, + ref_period = ref_period, handle_infinity = handle_infinity, + cross_validation = cross_validation, param_error = param_error, + method = method, distribution = distribution, + na.rm = na.rm, + output_dims = list(spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + ncores = ncores) + return(spei) +} + +.standardization <- function(data, params, coef, leadtime_dim = 'time', + time_dim = 'syear', memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + cross_validation = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + na.rm = FALSE) { + # data: [leadtime_dim, time_dim, memb_dim] + # params: [time_dim, leadtime_dim, 'coef'] + + # maximum number of parameters needed to define any of the considered distributions + ncoef <- length(coef) + nleadtime <- as.numeric(dim(data)[leadtime_dim]) + ntime <- as.numeric(dim(data)[time_dim]) + nmemb <- as.numeric(dim(data)[memb_dim]) + + if (cross_validation) { + params_result <- array(data = NA, dim = c(ntime, nleadtime, ncoef)) + } else { + params_result <- array(data = NA, dim = c(1, nleadtime, ncoef)) + } + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') + + if (all(is.na(data))) { + spei_mod <- array(NA, dim(data)) + # if the data [time, sdate, memb] has no variability it will raise an error + # further down, so we assign a value to the result and skip the step + } else if (anyNA(data) && !na.rm) { + spei_mod <- array(NA, dim(data)) + } else if (var(data, na.rm = T) == 0) { + spei_mod <- array(param_error, dim(data)) # Add this? + } else { + if (is.null(ref_period)) { + ref.start <- NULL + ref.end <- NULL + } else { + ref.start <- ref_period[[1]] + ref.end <- ref_period[[2]] + } + + spei_mod <- array(data = NA, dim = c(nleadtime, ntime, nmemb)) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) + + for (ff in 1:nleadtime) { + data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, + indices = ff, drop = 'selected') + params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} + + spei_data <- .std(data = data_subset, coef = coef, ntime = ntime, + nmemb = nmemb, method = method, + distribution = distribution, na.rm = na.rm, + ref.start = ref.start, ref.end = ref.end, + params = params_tmp, handle_infinity = handle_infinity, + cross_validation = cross_validation) + + spei_mod[ff, , ] <- spei_data[[1]] + params_ff <- spei_data[[2]] + # lengthen dimension coef of params_ff in case it doesn't match the + # corresponding dimension of parms_months + if (!is.null(params_ff)) { + if (length(params_ff) < ncoef) { + params_ff <- append(params_ff, array(NA, dim = ncoef - length(params_ff))) + } + params_result[, ff, ] <- params_ff + } + } + } + return(list(spei = spei_mod, params = params_result)) +} + +.std <- function(data, coef, ntime, nmemb, method = 'parametric', + distribution = 'log-Logistic', na.rm = FALSE, + ref.start = NULL, ref.end = NULL, params = NULL, + handle_infinity = FALSE, cross_validation = FALSE) { + + # data: [time_dim, memb_dim] + # params: NULL or [(ntime), coef] + + fit = 'ub-pwm' # hard-coded + + if (method == 'non-parametric') { + bp <- matrix(0, length(data), 1) + for (i in 1:length(data)) { + bp[i,1] = sum(data[] <= data[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data) + 0.12)) + dim(std_index) <- c(ntime, nmemb) + # it won't return params to be used in exp_cor; also it is not using + # handle_infinity nor cross_validation + params_result <- NULL + } else { + std_index <- array(NA, c(ntime, nmemb)) + # Select window if necessary + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data, ref.start, ref.end) + } else { + data.fit <- data + } + + if (cross_validation) { + loop_years <- ntime + } else { + loop_years <- 1 + } + + params_result <- array(NA, dim = c(loop_years, length(coef))) + colnames(params_result) <- names(coef) + + for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) + # Cumulative series (acu) + if (cross_validation) { + acu <- as.vector(data.fit[-nsd, ]) + } else { + acu <- as.vector(data.fit) + } + + acu.sorted <- sort.default(acu, method = "quick") + # remove NAs (no need if(na.rm) because if there are NA and na.rm = F + # we don't get to this point) + acu.sorted <- acu.sorted[!is.na(acu.sorted)] + # else all acu was NA and we don't need to continue with this case + + if (length(acu.sorted) != 0) { + acu_sd <- sd(acu.sorted) + if (!is.na(acu_sd)) { + if (acu_sd != 0) { + if (distribution != "log-Logistic") { + pze <- sum(acu == 0) / length(acu) + acu.sorted = acu.sorted[acu.sorted > 0] + } + if (!is.null(params)) { + f_params <- as.vector(params) + params_result[nsd, ] <- f_params + } else { + # else coef will be NA + if (length(acu.sorted) >= 4) { + # Calculate probability weighted moments based on fit with lmomco or TLMoments + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), + pwm.ub(acu.sorted, nmom = 3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) + + # Check L-moments validity + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + # lmom fortran functions need specific inputs L1, L2, T3 + # this is handled by lmomco internally with lmorph + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + # Calculate parameters based on distribution with lmom then lmomco + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){parglo(lmom)$para}), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){pargam(lmom)$para}), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){parpe3(lmom)$para})) + # Adjust if user chose log-Logistic and max-lik + if (distribution == 'log-Logistic' && fit == 'max-lik') { + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + params_result[nsd, ] <- f_params + } # end for the case the L-moments are not valid (std_index will be NA) + } # end for case there are not enough values to estimate the parameters (std_index will be NA) + } # end estimation of f_param + # Calculate cdf based on distribution with lmom + if (all(is.na(params_result[nsd,]))) { + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data, f_params), + "Gamma" = lmom::cdfgam(data, f_params), + "PearsonIII" = lmom::cdfpe3(data, f_params)) + } + + std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) + + # Adjust if user chose Gamma or PearsonIII - Not tested: For future development + # if (distribution != 'log-Logistic') { + # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point + # } + if (cross_validation) { + std_index[nsd, ] <- std_index_cv[nsd, ] + } else { + std_index <- std_index_cv + } + } + } # end if for the case there is no variability + } # end if for the case all NA in acu + } # next year (in case of cross_validation or all done if cross_validation == F) + + if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point + std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) + std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) + } + # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + } + return(list(std_index, params_result)) +} + +.Accumulation <- function(data, dates_monthly, accum = 2, + time_dim = 'syear', leadtime_dim = 'time', + ncores = NULL) { + + accumulated <- Apply(data = list(data), + target_dims = list(data = c(leadtime_dim, time_dim)), + dates_monthly = dates_monthly, + accum = accum, + output_dims = c(leadtime_dim, time_dim), + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = .accumulation, + ncores = ncores)$output1 + + pos <- match(names(dim(accumulated)), names(dim(data))) + accumulated <- aperm(accumulated, pos) + + return(accumulated) + +} + +.accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time') { + + # data:[time, syear] + dims <- dim(data) + + data_vector <- array(NA, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data_vector[dd] <- as.vector(data)[count] + count <- count + 1 + } + } + + data_sum_x <- rollapply(data_vector, accum, sum) + # adds as many NAs as needed at the begining to account for the months that + # cannot be added (depends on accu) and so that the position in the vector + # corresponds to the accumulated of the previous months (instead of the + # accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) + # discard the months that don't appear in the original data + data_sum_x <- data_sum_x[which(dates_monthly == 1)] + + accum_result <- array(data_sum_x, dim = c(dims)) + return(accum_result) +} + +.datesmask <- function(dates) { + ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) + end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) + daily <- seq(from = ini, to = end, by = "day")#as.Date(ini:end) + monthly <- daily[which(lubridate::day(daily) == 1)] + dates_mask <- array(0, dim = length(monthly)) + for (dd in 1:length(dates)) { + ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_mask[ii] <- 1 + } + return(dates_mask) +} \ No newline at end of file diff --git a/02_calculate_spei/support_functions/convert_raster.R b/02_calculate_spei/support_functions/convert_raster.R new file mode 100644 index 0000000000000000000000000000000000000000..f47c624ea04d7547be41ba1c21f19fea4589220d --- /dev/null +++ b/02_calculate_spei/support_functions/convert_raster.R @@ -0,0 +1,118 @@ +convert_to_raster_era5land <- function(accum, vars, region) { + for (i in accum){ + for (var in vars){ + + data <- readRDS(paste0("data/", region, "_", var, "_", i, ".rds")) + + # Assign latitude and longitude coordinates from data + lon <- data$coords$longitude + lat <- data$coords$latitude + + # Set time variables + nmonths <- data$dims[["time"]] + nyears <- data$dims[["syear"]] + ntimes <- nyears * nmonths + + # Transform multidimensional array into list of rasters + out <- NULL + for (x in 1:ntimes){ + + # Calculate i and j from x + m <- ((x - 1) %% nmonths) + 1 + y <- (((x - 1) %/% nmonths)) %% nyears + 1 + + # Extract date + b <- substr(data$attrs$Dates[x], 1, 10) + + if (is.na(b)) { + print(paste(x, b)) + next + } + # Extract array of values for year-month-day combination + a <- data$data[1, 1, m, y, 1, , ] #month, year, day + + # Convert to a raster + out[[b]] <- raster::raster(a, + xmn = min(lon), xmx = max(lon), + ymn = min(lat), ymx = max(lat)) + # Assign CRS + raster::crs(out[[b]]) <- "+init=epsg:4326" + print(x) + } + + # Convert list of rasters into stack + stack_name <- paste0(var, i, "_raster") + assign(stack_name, NULL) + assign(stack_name, raster::stack(out)) + print(paste0("calculated ", var, i)) + + saveRDS(get(stack_name), paste0("data/", region, "_", var, "_", i, "_stack.rds")) + } + } +} + +convert_to_raster_input_variables <- function(vars, region) { + for (var in vars){ + data <- readRDS(paste0("data/", region, "_", var, ".rds")) + + # Assign latitude and longitude coordinates from data + lon <- data$coords$longitude + lat <- data$coords$latitude + + # Set time variables + nmonths <- data$dims[["time"]] + nyears <- data$dims[["syear"]] + ntimes <- nyears * nmonths + + # Transform multidimensional array into list of rasters + out <- NULL + for (x in 1:ntimes){ + + # Calculate i and j from x + m <- ((x - 1) %% nmonths) + 1 + y <- (((x - 1) %/% nmonths)) %% nyears + 1 + + # Extract date + b <- substr(data$attrs$Dates[x], 1, 10) + + if (is.na(b)) { + print(paste(x, b)) + next + } + # Extract array of values for year-month-day combination + a <- data$data[1, 1, m, y, 1, , ] #month, year, day + + # Convert to a raster + out[[b]] <- raster::raster(a, + xmn = min(lon), xmx = max(lon), + ymn = min(lat), ymx = max(lat)) + # Assign CRS + raster::crs(out[[b]]) <- "+init=epsg:4326" + print(x) + } + + # Convert list of rasters into stack + stack_name <- paste0(var, "_raster") + assign(stack_name, NULL) + assign(stack_name, raster::stack(out)) + print(paste0("calculated ", var)) + + saveRDS(get(stack_name), paste0("data/", region, "_", var, "_stack.rds")) + } + +} + +convert_to_raster_population_era5land <- function(region, vars, accum) { + population <- readRDS("../00_population/data/population.rds") + rr <- raster::rasterFromXYZ(population[, c("x", "y", "population")]) + rm(population) + + spei6_raster <- readRDS(paste0("data/", region, "_", vars[1], "_", accum[1], "_stack.rds")) + spei6_raster <- spei6_raster[[1]] + + extent(rr) <- extent(spei6_raster) + crs(rr) <- crs(spei6_raster) + rr_regrid <- raster::resample(rr, spei6_raster, method = "bilinear") + rm(rr, spei6_raster) + saveRDS(rr_regrid, paste0("data/", region, "_population_stack.rds")) +} \ No newline at end of file diff --git a/02_calculate_spei/support_functions/load_data.R b/02_calculate_spei/support_functions/load_data.R new file mode 100644 index 0000000000000000000000000000000000000000..314f4713bc8c73c81063008a7c6c8e931cda5931 --- /dev/null +++ b/02_calculate_spei/support_functions/load_data.R @@ -0,0 +1,90 @@ +load_data_era5land <- function(dates, region, + lats_min, lats_max, + lons_min, lons_max, + calculate_spei) { + + # precipitation ---- + # obtain path for startR call: + path_dataset <- "/esarchive/recon/ecmwf/era5land/monthly_mean/$var$_f1h/$var$_$sdate$.nc" + + # startR call: + data_prlr <- startR::Start(dat = path_dataset, + var = "prlr", + sdate = dates, + split_multiselected_dims = TRUE, + latitude = startR::values(list(lats_min, lats_max)), + latitude_reorder = startR::Sort(decreasing = TRUE), + longitude = startR::values(list(lons_min, lons_max)), + longitude_reorder = startR::CircularSort(-180, 180), + synonims = list(latitude = c("lat", "latitude"), + longitude = c("lon", "longitude")), + return_vars = list(latitude = "dat", + longitude = "dat", + time = c("sdate")), + retrieve = TRUE) + data_prlr <- data_prlr * 3600 * 24 * 30.44 * 1000 + attr(data_prlr, "Variables")$common$prlr$units <- "mm" + data_prlr <- CSTools::as.s2dv_cube(data_prlr) + if (!("ensemble" %in% names(data_prlr$data))) { + data_prlr$data <- s2dv::InsertDim(data_prlr$data, pos = 5, len = 1, name = "ensemble") + } + saveRDS(data_prlr, paste0("data/", region, "_prlr.rds")) + + # tasmax ---- + if (calculate_spei) { + path_dataset <- "/esarchive/recon/ecmwf/era5land/monthly_mean/$var$_f24h/$var$_$sdate$.nc" + + # startR call: + data_tasmax <- startR::Start(dat = path_dataset, + var = "tasmax", + sdate = dates, + split_multiselected_dims = TRUE, + latitude = startR::values(list(lats_min, lats_max)), + latitude_reorder = startR::Sort(decreasing = TRUE), + longitude = startR::values(list(lons_min, lons_max)), + longitude_reorder = startR::CircularSort(-180, 180), + synonims = list(latitude = c("lat", "latitude"), + longitude = c("lon", "longitude")), + return_vars = list(latitude = "dat", + longitude = "dat", + time = c("sdate")), + retrieve = TRUE) + data_tasmax <- data_tasmax - 273.15 + attr(data_tasmax, "Variables")$common$tasmax$units <- "C" + data_tasmax <- CSTools::as.s2dv_cube(data_tasmax) + if (!("ensemble" %in% names(data_tasmax$data))) { + data_tasmax$data <- s2dv::InsertDim(data_tasmax$data, pos = 5, + len = 1, name = "ensemble") + } + saveRDS(data_tasmax, paste0("data/", region, "_tasmax.rds")) + } + + # tasmin ---- + if (calculate_spei) { + path_dataset <- "/esarchive/recon/ecmwf/era5land/monthly_mean/$var$_f24h/$var$_$sdate$.nc" + + # startR call: + data_tasmin <- startR::Start(dat = path_dataset, + var = "tasmin", + sdate = dates, + split_multiselected_dims = TRUE, + latitude = startR::values(list(lats_min, lats_max)), + latitude_reorder = startR::Sort(decreasing = TRUE), + longitude = startR::values(list(lons_min, lons_max)), + longitude_reorder = startR::CircularSort(-180, 180), + synonims = list(latitude = c("lat", "latitude"), + longitude = c("lon", "longitude")), + return_vars = list(latitude = "dat", + longitude = "dat", + time = c("sdate")), + retrieve = TRUE) + data_tasmin <- data_tasmin - 273.15 + attr(data_tasmin, "Variables")$common$tasmin$units <- "C" + data_tasmin <- CSTools::as.s2dv_cube(data_tasmin) + if (!("ensemble" %in% names(data_tasmin$data))) { + data_tasmin$data <- s2dv::InsertDim(data_tasmin$data, pos = 5, + len = 1, name = "ensemble") + } + saveRDS(data_tasmin, paste0("data/", region, "_tasmin.rds")) + } +} \ No newline at end of file diff --git a/02_calculate_spei/support_functions/save_SPEI.R b/02_calculate_spei/support_functions/save_SPEI.R new file mode 100644 index 0000000000000000000000000000000000000000..88bef0a6770cac3de9a6a3d670d7837437539a4a --- /dev/null +++ b/02_calculate_spei/support_functions/save_SPEI.R @@ -0,0 +1,40 @@ +source("support_functions/zzz.R") # source("https://earth.bsc.es/gitlab/es/csindicators/-/raw/develop-SPEI/R/zzz.R") +source("support_functions/PeriodPET.R") # source("https://earth.bsc.es/gitlab/es/csindicators/-/raw/develop-SPEI/R/PeriodPET.R") +source("support_functions/PeriodSPEI.R") # source("https://earth.bsc.es/gitlab/es/csindicators/-/raw/develop-SPEI/R/PeriodSPEI.R") + +save_SPEI_era5land <- function(region, accum, + calculate_spi, calculate_spei, + ref_period) { + # SPI ---- + + # load data + data_prlr <- readRDS(paste0("data/", region, "_prlr.rds")) + data_tasmax <- readRDS(paste0("data/", region, "_tasmax.rds")) + data_tasmin <- readRDS(paste0("data/", region, "_tasmin.rds")) + + + for (i in accum) { + print(paste0("Calculating accum ", i)) + if (calculate_spi) { + spi <- CST_PeriodSPEI(exp = list(pr = data_prlr), + pet_exp = array(0, dim = dim(data_prlr$data)), + accum = i, + standardization = TRUE, + ref_period = ref_period, + handle_infinity = TRUE) + # save as RDS + saveRDS(spi, paste0("data/", region, "_spi_", i, ".rds")) + } + if (calculate_spei) { + spei <- CST_PeriodSPEI(exp = list(tmax = data_tasmax, + tmin = data_tasmin, + pr = data_prlr), + accum = i, + standardization = TRUE, + ref_period = ref_period, + handle_infinity = TRUE) + # save as rds + saveRDS(spei, paste0("data/", region, "_spei_", i, ".rds")) + } + } +} \ No newline at end of file diff --git a/02_calculate_spei/support_functions/zzz.R b/02_calculate_spei/support_functions/zzz.R new file mode 100644 index 0000000000000000000000000000000000000000..47d871d4d8d4f2a3d1dfe01b9a79c5aea8674e70 --- /dev/null +++ b/02_calculate_spei/support_functions/zzz.R @@ -0,0 +1,109 @@ +.position <- function(dates, ini_day, ini_month, end_day, end_month) { + days <- as.numeric(format(dates, "%d")) + months <- as.numeric(format(dates, "%m")) + pos <- 1:length(dates) + position <- logical(length(dates)) + if (ini_month != end_month) { + pos <- sort(unique(c(pos[months == ini_month & days >= ini_day], + pos[months < end_month & months > ini_month], + pos[months == end_month & days <= end_day]))) + position[pos] <- TRUE + position[-pos] <- FALSE + } else { + pos <- sort(unique(c(pos[months == ini_month & + days >= ini_day & days <= end_day]))) + position[pos] <- TRUE + position[-pos] <- FALSE + } + if (!is.null(dim(dates))) { + dim(position) <- length(position) + if(!is.null(names(dim(dates)))) { + names(dim(position)) <- names(dim(dates)) + } + } + return(position) +} + +# Function to subset dimension indices of an array +.arraysubset <- function(x, dim, value, drop = FALSE) { + indices <- rep(list(bquote()), length(dim(x))) + if (is.character(dim)) { + dim <- which(names(dim(x)) %in% dim) + } + indices[dim] <- value + call <- as.call(c(list(as.name("["), quote(x)), indices, drop = drop)) + eval(call) +} + +# Function to insert a dimension in an array +.insertdim <- function(data, posdim, lendim, name = NULL) { + names(lendim) <- name + data <- array(data, dim = c(dim(data), lendim)) + ## Reorder dimension + if (posdim == 1) { + order <- c(length(dim(data)), 1:(length(dim(data)) - 1)) + data <- aperm(data, order) + } else if (posdim == length(dim(data))) { # last dim + + } else { # middle dim + order <- c(1:(posdim - 1), length(dim(data)), posdim:(length(dim(data)) - 1)) + data <- aperm(data, order) + } + return(data) +} + + +#======================= +# Read a powercurve file +# Create the approximation function +#======================= +read_pc <- function(file) { + pc <- list() + + # Read pc points + pc$points <- rbind(c(0, 0), read.delim(file, comment.char = "#")) + + # Create an approximating function + pc$fun <- approxfun(pc$points$WindSpeed, pc$points$Power, method = "linear", + yleft = NA, yright = 0) + + # Get the rated power from the power values + pc$attr$RatedPower <- max(pc$points$Power) + + return(pc) +} + +#======================= +# Evaluate the linear piecewise approximation function with the wind speed inputs to get wind power +#======================= +wind2power <- function(wind, pc) { + power <- pc$fun(wind) + return(power) +} + +#======================= +# Convert wind to power, and divide by rated power to obtain Capacity Factor values +#======================= +wind2CF <- function(wind, pc) { + power <- wind2power(wind, pc) + CF <- power / pc$attr$RatedPower + return(CF) +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'lons', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') +} + +.return2list <- function(data1, data2 = NULL) { + if (is.null(data1) & is.null(data2)) { + return(NULL) + } else if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } +} \ No newline at end of file diff --git a/03_aggregate_spatial/aggregate_spei.R b/03_aggregate_spatial/aggregate_spei.R new file mode 100755 index 0000000000000000000000000000000000000000..6b9db327c1faa9b5ca7c2ba1daaaf45963bf1023 --- /dev/null +++ b/03_aggregate_spatial/aggregate_spei.R @@ -0,0 +1,91 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: aggregate SPEI spatially +## +## Purpose of script: Aggregate SPEI to spatial regions +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +## load up the packages +packages <- c("startR", "s2dv", "CSTools", "multiApply", "ClimProjDiags", + "SPEI", "zoo", "TLMoments", "lmomco", "lmom", "lubridate", "dplyr", + "raster", "sf") +install.packages(setdiff(packages, rownames(installed.packages()))) +lapply(packages, require, character.only = TRUE) + +## --------------------------- +# select parameters: +region <- "europe" +levels <- c("european_region", "europe", "NUTS3", "ISO3") +accum <- c(6, 12) +start_year <- 1980 +end_year <- 2024 +start_month <- 1 +end_month <- 12 + +# load SPEI raster +spei6_raster <- readRDS(paste0("../02_calculate_spei/data/", region, "_", + "spei_6_stack.rds")) +spei12_raster <- readRDS(paste0("../02_calculate_spei/data/", region, "_", + "spei_12_stack.rds")) + +# load population raster +pop_density <- readRDS(paste0("../02_calculate_spei/data/", region, + "_population_stack.rds")) +cell_areas <- area(pop_density) +pop_era5land <- pop_density * cell_areas + +for (level in levels) { + # load shapefile + adm1 <- sf::st_read(paste0("../01_shapefiles/data/outputs/", level, ".shp")) + adm1_id <- adm1$adm1_id + adm1_name <- adm1$adm1_name + + ntimestep <- raster::nlayers(spei6_raster) + monthly_adm1 <- data.frame() + for (l in 1:ntimestep){ + name <- names(spei6_raster[[l]]) + name <- substr(name, 2, 11) + spei6 <- exactextractr::exact_extract(spei6_raster[[l]], adm1, "mean") + spei12 <- exactextractr::exact_extract(spei12_raster[[l]], adm1, "mean") + if (level == "NUTS3") { + spei6_pw <- exactextractr::exact_extract(spei6_raster[[l]], adm1, "weighted_mean", + weights = pop_era5land) + spei12_pw <- exactextractr::exact_extract(spei12_raster[[l]], adm1, "weighted_mean", + weights = pop_era5land) + + new_df <- data.frame(month = rep(substr(name, 6, 7), times = length(adm1_id)), + year = rep(substr(name, 1, 4), times = length(adm1_id)), + adm1_id = adm1_id, + #adm1_name = adm1_name, + spei6 = spei6, + spei12 = spei12, + spei6_pw = spei6_pw, + spei12_pw = spei12_pw) + } else { + spei6_pw <- NULL + spei12_pw <- NULL + new_df <- data.frame(month = rep(substr(name, 6, 7), times = length(adm1_id)), + year = rep(substr(name, 1, 4), times = length(adm1_id)), + adm1_id = adm1_id, + #adm1_name = adm1_name, + spei6 = spei6, + spei12 = spei12) + } + + monthly_adm1 <- rbind(monthly_adm1, new_df) + print(l) + } + + write.csv(monthly_adm1, paste0("data/", level, "_spei_", + paste0(accum, collapse = "-"), + "_monthly_drought_indices.csv"), + row.names = FALSE) +} +### END + + diff --git a/04_analysis/figure_01.R b/04_analysis/figure_01.R new file mode 100644 index 0000000000000000000000000000000000000000..291ee32dd41296de7935973fd35564122050dcf0 --- /dev/null +++ b/04_analysis/figure_01.R @@ -0,0 +1,493 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: plot main figure +## +## Purpose of script: plot main figure +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +library(sf) +library(ggplot2) +library(dplyr) +library(readr) +library(gridExtra) +library(cowplot) +library(patchwork) +library(rnaturalearth) +library(rnaturalearthdata) +library(RColorBrewer) +library(maps) + + +# select parameters: +level <- "NUTS3" +accum <- 12 # 6, 12 +month <- 9 +population_weight <- FALSE +# end parameter selection + +region <- "europe" +weights <- c("_pw"[population_weight], ""[!population_weight]) + +# load shapefile +adm <- sf::st_read(paste0("../01_shapefiles/data/outputs/", level, ".shp")) +adm_id <- adm$adm1_id +adm_name <- adm$adm1_name + +# transform to Eckert III to plot +eckert_crs <- "+proj=eck3 +datum=WGS84 +units=m +no_defs" +adm <- sf::st_transform(adm, crs = eckert_crs) + +# remove ultraperipheric regions and islands +adm1 <- adm[which( + adm$adm1_id != "FRY" & + adm$adm1_id != "FRY1" & + adm$adm1_id != "FRY2" & + adm$adm1_id != "FRY3" & + adm$adm1_id != "FRY4" & + adm$adm1_id != "FRY5" & + adm$adm1_id != "FRY10" & + adm$adm1_id != "FRY20" & + adm$adm1_id != "FRY30" & + adm$adm1_id != "FRY40" & + adm$adm1_id != "FRY50" & + adm$adm1_id != "ES7" & + adm$adm1_id != "ES70" & + adm$adm1_id != "ES703" & + adm$adm1_id != "ES704" & + adm$adm1_id != "ES705" & + adm$adm1_id != "ES706" & + adm$adm1_id != "ES707" & + adm$adm1_id != "ES708" & + adm$adm1_id != "ES709" & + adm$adm1_id != "PT2" & + adm$adm1_id != "PT20" & + adm$adm1_id != "PT200" & + adm$adm1_id != "PT3" & + adm$adm1_id != "PT30" & + adm$adm1_id != "PT300" & + adm$adm1_id != "NO0B" & + adm$adm1_id != "NO0B1" & + adm$adm1_id != "NO0B2" + ),] + + +# open SPEI file +data_csv <- read.csv(paste0("../03_aggregate_spatial/data/", level, "_", + "spei_6-12_monthly_drought_indices.csv")) + +# select relevant SPEI accumulation and weighting +data_csv <- data_csv[, c("month", "year", "adm1_id", paste0("spei", accum, weights))] +names(data_csv)[which(names(data_csv) == paste0("spei", accum, weights))] <- "spei" + +# define baseline and comparator periods +ref_years <- 1981:2010 +comp_years <- 2015:2024 + +# select month to plot (month == 9 plots April-September SPEI6) +data_csv <- data_csv[which(data_csv$month == month), ] +# count months below -1.6 (abnormally dry) +data_csv$spei <- data_csv$spei < -1.6 +data_csv$spei[which(is.na(data_csv$spei))] <- FALSE + +# separate to reference period and comparator period +ref_data <- data_csv[which(data_csv$year %in% ref_years), ] +comp_data <- data_csv[which(data_csv$year %in% comp_years), ] + +# percentage of years in each period in which SPEI = -1.6 +ref_data <- ref_data %>% group_by(adm1_id) %>% summarise(spei_perc_ref = 100 * mean(spei, na.rm = TRUE)) +comp_data <- comp_data %>% group_by(adm1_id) %>% summarise(spei_perc_comp = 100 * mean(spei, na.rm = TRUE)) + +# combine reference and comparator +combined_df <- full_join(ref_data, comp_data, by = "adm1_id") + +# merge shapefile +adm_merged <- merge(adm1, combined_df, by = "adm1_id") +adm_merged$spei_perc_diff <- adm_merged$spei_perc_comp - adm_merged$spei_perc_ref + +############### plotting ############### +# Get land polygons and transform to same CRS +land <- ne_countries(scale = "medium", returnclass = "sf") +land <- sf::st_transform(land, sf::st_crs(adm_merged)) +land <- sf::st_crop(land, adm_merged) + +if (level == "country") { + breaks <- c(0,1,2,3,4,5,6,7,8,9,10) + lw <- 0.2 +} else if (level == "NUTS3") { + breaks <- c(0,4,8,12,16,20,24,28,32,36,40) + lw <- 0.05 +} + +if (accum == 6) { + if (level == "country") { + breaks <- c(0,2,4,6,8,10,12,14,16,18,20) + lw <- 0.2 + } else if (level == "NUTS3") { + breaks <- c(0,4,8,12,16,20,24,28,32,36,40) + lw <- 0.05 + } +} + +reds_reversed <- brewer.pal(n = 9, name = "Reds") + +# Left: percentage of years in reference period +p1 <- ggplot() + + geom_sf(data = land, fill = "gray90", fill = NA) + # Add land + geom_sf(data = adm_merged, aes(fill = spei_perc_ref), color = "black", linewidth = lw) + + scale_fill_stepsn( + breaks = breaks, + limits = range(breaks), + name='Percentage\nof years (%)', + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_minimal() + + geom_sf(data = land, fill = NA, color = "black", linewidth = 0.15) + # Add borders + ggtitle("(a) Percentage of years (1981-2010) experiencing extreme drought") + +# Right: percentage of years in comparator period +p2 <- ggplot() + + geom_sf(data = land, fill = "gray90", fill = NA) + # Add land + geom_sf(data = adm_merged, aes(fill = spei_perc_comp), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + name='Percentage\nof years (%)', + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_minimal() + + geom_sf(data = land, fill = NA, color = "black", linewidth = 0.15) + # Add borders + ggtitle("(b) Percentage of years (2015-2024) experiencing extreme drought") + +# plot islands in inset panels +canarias <- adm[which( + !(adm$adm1_id != "ES7" & + adm$adm1_id != "ES70" & + adm$adm1_id != "ES703" & + adm$adm1_id != "ES704" & + adm$adm1_id != "ES705" & + adm$adm1_id != "ES706" & + adm$adm1_id != "ES707" & + adm$adm1_id != "ES708" & + adm$adm1_id != "ES709") + ),] + +acores <- adm[which( + !(adm$adm1_id != "PT2" & + adm$adm1_id != "PT20" & + adm$adm1_id != "PT200") + ),] + +madeira <- adm[which( + !(adm$adm1_id != "PT3" & + adm$adm1_id != "PT30" & + adm$adm1_id != "PT300") + ),] + + +# select spatial levels +if (level == "country") { + canarias <- canarias %>% group_by(CNTR_CODE) %>% summarise(geometry = sf::st_union(geometry), .groups = "drop") + acores <- acores %>% group_by(CNTR_CODE) %>% summarise(geometry = sf::st_union(geometry), .groups = "drop") + madeira <- madeira %>% group_by(CNTR_CODE) %>% summarise(geometry = sf::st_union(geometry), .groups = "drop") +} + +aco_poly <- sf::st_cast(acores, "POLYGON") +aco_poly <- aco_poly[-c(1),] +aco_poly <- aco_poly %>% summarise(geometry = sf::st_union(geometry), .groups = "drop") +acores$geometry <- aco_poly$geometry + +mad_poly <- sf::st_cast(madeira, "POLYGON") +mad_poly <- mad_poly[-c(1,2),] +mad_poly <- mad_poly %>% summarise(geometry = sf::st_union(geometry), .groups = "drop") +madeira$geometry <- mad_poly$geometry + + +# merge shapefile and calculate percentage of events occurring in the comparator +canarias_merged <- merge(canarias, combined_df, by = "adm1_id") +acores_merged <- merge(acores, combined_df, by = "adm1_id") +madeira_merged <- merge(madeira, combined_df, by = "adm1_id") + +canarias_merged$spei_perc_diff <- canarias_merged$spei_perc_comp - canarias_merged$spei_perc_ref +acores_merged$spei_perc_diff <- acores_merged$spei_perc_comp - acores_merged$spei_perc_ref +madeira_merged$spei_perc_diff <- madeira_merged$spei_perc_comp - madeira_merged$spei_perc_ref + +land_can <- sf::st_crop(land, canarias_merged) +land_aco <- sf::st_crop(land, acores_merged) +land_mad <- sf::st_crop(land, madeira_merged) + +land_aco <- sf::st_transform(land_aco, 5634) +acores_merged <- sf::st_transform(acores_merged, 5634) + +land_mad <- sf::st_transform(land_mad, 5634) +madeira_merged <- sf::st_transform(madeira_merged, 5634) + +land_can <- sf::st_transform(land_can, 5634) +canarias_merged <- sf::st_transform(canarias_merged, 5634) + +lw <- 0.075 + +# Left: percentage of years in reference period +p_can1 <- ggplot() + + geom_sf(data = land_can, fill = "gray90", color = NA) + # Add land + geom_sf(data = canarias_merged, aes(fill = spei_perc_ref), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Canarias") + +# Left: percentage of years in reference period +p_aco1 <- ggplot() + + geom_sf(data = land_aco, fill = "gray90", color = NA) + # Add land + geom_sf(data = acores_merged, aes(fill = spei_perc_ref), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Açores") + +# Left: percentage of years in reference period +p_mad1 <- ggplot() + + geom_sf(data = land_mad, fill = "gray90", color = NA) + # Add land + geom_sf(data = madeira_merged, aes(fill = spei_perc_ref), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Madeira") + +# add insets +p_out1 <- ggdraw() + + draw_plot(p1) + + draw_plot(p_can1, width = 0.07, x = 0.12, y = -0.275) + + draw_plot(p_aco1, width = 0.07, x = 0.12, y = 0.025) + + draw_plot(p_mad1, width = 0.07, x = 0.12, y = -0.15) + +# Right: percentage of years in comparator period +p_can2 <- ggplot() + + geom_sf(data = land_can, fill = "gray90", color = NA) + # Add land + geom_sf(data = canarias_merged, aes(fill = spei_perc_comp), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Canarias") + +# Right: percentage of years in comparator period +p_aco2 <- ggplot() + + geom_sf(data = land_aco, fill = "gray90", color = NA) + # Add land + geom_sf(data = acores_merged, aes(fill = spei_perc_comp), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Açores") + +# Right: percentage of years in comparator period +p_mad2 <- ggplot() + + geom_sf(data = land_mad, fill = "gray90", color = NA) + # Add land + geom_sf(data = madeira_merged, aes(fill = spei_perc_comp), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Madeira") + +p_out2 <- ggdraw() + + draw_plot(p2) + + draw_plot(p_can2, width = 0.07, x = 0.12, y = -0.275) + + draw_plot(p_aco2, width = 0.07, x = 0.12, y = 0.025) + + draw_plot(p_mad2, width = 0.07, x = 0.12, y = -0.15) + +# Combine the two maps +final_plot <- p_out1 / p_out2 + +# Save the plot as a PNG file +ggsave(paste0("figures/figure_main_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".png"), + plot = final_plot, width = 7.25, height = 8, dpi = 300) + +print(paste0("figures/figure_main_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".png")) + +# Save the plot as a pdf file +ggsave(paste0("figures/figure_main_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".pdf"), + plot = final_plot, width = 7.25, height = 8, dpi = 300) + +print(paste0("figures/figure_main_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".pdf")) + + + + +################# difference plot ################### +if (level == "country") { + breaks <- c(-10,-7,-4,-1,1,4,7,10) + lw <- 0.2 +} else if (level == "NUTS3") { + breaks <- c(-35,-25,-15,-5,5,15,25,35) + lw <- 0.05 +} +if (accum == 6) { + if (level == "country") { + breaks <- c(-20,-15,-10,-5,0,5,10,15,20) + lw <- 0.2 + } else if (level == "NUTS3") { + breaks <- c(-35,-25,-15,-5,5,15,25,35) + lw <- 0.05 + } +} + +reds_reversed <- rev(brewer.pal(n = 9, name = "RdBu")) + +p_diff <- ggplot() + + geom_sf(data = land, fill = "gray90", fill = NA) + # Add land + geom_sf(data = adm_merged, aes(fill = spei_perc_diff), color = "black", linewidth = lw) + + scale_fill_stepsn( + breaks = breaks, + limits = range(breaks), + name='Change in\npercentage', + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme( + panel.background = element_rect(fill = "white", color = NA), + panel.grid.major = element_line(color = "gray90") + ) + + geom_sf(data = land, fill = NA, color = "black", linewidth = 0.15) + # Add borders + ggtitle("Change in percentage of years experiencing extreme drought") + +# plot islands insets +p_can_diff <- ggplot() + + geom_sf(data = land_can, fill = "gray90", color = NA) + # Add land + geom_sf(data = canarias_merged, aes(fill = spei_perc_diff), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Canarias") + +p_aco_diff <- ggplot() + + geom_sf(data = land_aco, fill = "gray90", color = NA) + # Add land + geom_sf(data = acores_merged, aes(fill = spei_perc_diff), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Açores") + +p_mad_diff <- ggplot() + + geom_sf(data = land_mad, fill = "gray90", color = NA) + # Add land + geom_sf(data = madeira_merged, aes(fill = spei_perc_diff), color = "black", linewidth = lw) + + scale_fill_stepsn(breaks = breaks, + limits = range(breaks), + colours = reds_reversed, + guide = guide_colorbar(barheight = 12) + ) + + theme_void() + + theme( + panel.border = element_rect(fill = NA, colour = "black"), + panel.background = element_rect(fill = alpha("lightblue", 0.35)), + legend.position = "none", + plot.title = element_text(size=7) + ) + + ggtitle("Madeira") + +p_out_diff <- ggdraw() + + draw_plot(p_diff) + + draw_plot(p_can_diff, + width = 0.07, + x = 0.12, + y = -0.275 + ) + + draw_plot(p_aco_diff, + width = 0.07, + x = 0.12, + y = 0.025 + ) + + draw_plot(p_mad_diff, + width = 0.07, + x = 0.12, + y = -0.15 + ) + +final_plot <- p_out_diff + +# Save the plot as a PNG file +ggsave(paste0("figures/figure_diff_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".png"), + plot = final_plot, width = 7.25, height = 4, dpi = 300) + +print(paste0("figures/figure_diff_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".png")) + +# Save the plot as a pdf file +ggsave(paste0("figures/figure_diff_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".pdf"), + plot = final_plot, width = 7.25, height = 4, dpi = 300) + +print(paste0("figures/figure_diff_", region, "_", level, "_spei_", accum, "_map", weights, "_", month, ".pdf")) diff --git a/04_analysis/figure_02.R b/04_analysis/figure_02.R new file mode 100644 index 0000000000000000000000000000000000000000..9830fef47da4948622def14c910b5c70b9108dd7 --- /dev/null +++ b/04_analysis/figure_02.R @@ -0,0 +1,138 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: plot timeseries SPEI, PET and precipitation +## +## Purpose of script: plot timeseries SPEI, PET and precipitation +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +# load packages +packages <- c("raster", "dplyr", "tidyr", "ggplot2", "stringr", "terra", "patchwork") +lapply(packages, require, character.only = TRUE) + +# select parameters +accum <- 6 +region <- "europe" +month <- 3 + +# load the masked data +spei_masked <- unwrap(readRDS(paste0("../02_calculate_spei/data/temp/europe-masked_spei_", accum, ".rds"))) + +# extract spei_masked layers to dates +layer_names <- names(spei_masked) +time_full <- str_extract(layer_names, "\\d{4}\\.\\d{2}.\\d{2}") +time_full <- str_replace(time_full, "\\.", "-") +time_full <- str_replace(time_full, "\\.", "-") +names(spei_masked) <- as.Date(time_full) + +# define a function to classify SPEI into categories +classify_spei <- function(x) { + ifelse(is.na(x), NA, + ifelse(x <= -2, "Exceptional drought", + ifelse(x <= -1.6, "Extreme drought", + ifelse(x <= -1.3, "Severe drought", + ifelse(x <= -0.8, "Moderate drought", + ifelse(x <= -0.5, "Abnormally dry", NA)))))) +} + +# initialize dataframe to store results +spei_stats <- data.frame() + +# Loop through each time step (month) +for (i in 1:nlyr(spei_masked)) { + if (lubridate::month(names(spei_masked)[i]) == month) { + cat("Processing layer:", names(spei_masked)[i], "\n") + + # Get raster layer + layer <- spei_masked[[i]] + + # Extract values as vector + values <- terra::values(layer) + + # Classify values + class_vals <- classify_spei(values) + + # Calculate proportions + # sum(!is.na(values)) - total land area + prop_table <- table(class_vals, useNA = "no") / sum(!is.na(values)) + + if (length(prop_table) != 0){ + # Store results + temp_df <- as.data.frame(prop_table) %>% + rename(category = class_vals, proportion = Freq) %>% + mutate(month = lubridate::month(names(spei_masked)[i]), + year = lubridate::year(names(spei_masked)[i])) + + spei_stats <- bind_rows(spei_stats, temp_df) + } + } +} + +# View result +print(head(spei_stats)) +spei_stats$date <- as.Date(paste0(spei_stats$year,"-", spei_stats$month,"-01")) +drought_colors <- c( + "Abnormally dry" = "#ffff44", # light yellow + "Moderate drought" = "#ffd487", # beige-orange + "Severe drought" = "#ffab32", # orange + "Extreme drought" = "#f1061b", # red + "Exceptional drought" = "#790108", # dark red + "bla" = "grey90" # for NA or uncategorized +) +spei_stats$category <- factor(spei_stats$category, levels = c( + "", "Abnormally dry", "Moderate drought", "Severe drought", + "Extreme drought", "Exceptional drought" +)) +# plot time series +plot_out <- ggplot(spei_stats, aes(x = lubridate::year(date), y = proportion, fill = category)) + + geom_area(position = "stack", color = NA) + + scale_fill_manual(values = drought_colors) + + scale_y_continuous(labels = scales::percent_format(accuracy = 1))+#, + #sec.axis = sec_axis(~ ., name = "", labels = NULL)) + + theme_minimal(base_size = 14) + + theme( + panel.grid.minor = element_blank(), + # panel.grid.major.x = element_blank(), + legend.position = "bottom", + legend.title = element_blank() + ) + + labs( + # title = "(a)", + x = NULL, + y = "Percentage land area affected" + ) + + +# Save the plot as a PNG file +ggsave(paste0("figures/drought_class_", region, "_spei_", accum, "_", month, ".png"), + plot = plot_out, width = 12, height = 4, dpi = 300) + +print(paste0("figures/drought_class_", region, "_spei_", accum, "_", month, ".png")) + +ggsave(paste0("figures/drought_class_", region, "_spei_", accum, "_", month, ".pdf"), + plot = plot_out, width = 12, height = 4, dpi = 300) + +print(paste0("figures/drought_class_", region, "_spei_", accum, "_", month, ".pdf")) + + +### combine panels +source("figure_03.R") + +final_plot <- ( plot_out / p_1 ) + plot_layout(heights = c(4,3)) + plot_annotation(tag_levels = "a", + tag_prefix = "(", + tag_suffix = ")") + +# Save the plot as a PNG file +ggsave(paste0("figures/timeseries_", region, "_spei_", accum, "_", month, ".png"), + plot = final_plot, width = 12, height = 7, dpi = 300) + +print(paste0("figures/timeseries_", region, "_spei_", accum, "_", month, ".png")) + +ggsave(paste0("figures/timeseries_", region, "_spei_", accum, "_", month, ".pdf"), + plot = final_plot, width = 12, height = 7, dpi = 300) + +print(paste0("figures/timeseries_", region, "_spei_", accum, "_", month, ".pdf")) diff --git a/04_analysis/figure_03.R b/04_analysis/figure_03.R new file mode 100644 index 0000000000000000000000000000000000000000..a1b474e0747b2d602f21223939c3b4bf56bc993f --- /dev/null +++ b/04_analysis/figure_03.R @@ -0,0 +1,98 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: plot PET and precipitation +## +## Purpose of script: plot PET and precipitation, to be called by figure_02.R +## +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +# month, region, accum variables imported from figure_02.R call + +# load packages +packages <- c("sf", "zoo", "raster", "terra", "dplyr", + "tidyr", "ggplot2", "stringr", "data.table") +lapply(packages, require, character.only = TRUE) + +# load the data +prlr_masked <- unwrap(readRDS("../02_calculate_spei/data/temp/europe-masked_prlr.rds")) +pet_masked <- unwrap(readRDS("../02_calculate_spei/data/temp/europe-masked_PET.rds")) + +# Get mean across all pixels for each time step (monthly) +prlr_ts <- global(prlr_masked, fun = "mean", na.rm = TRUE)[, 1] +pet_ts <- global(pet_masked, fun = "mean", na.rm = TRUE)[, 1] + +layer_names <- names(prlr_masked) +time_full <- str_extract(layer_names, "\\d{4}\\.\\d{2}.\\d{2}") +time_full <- str_replace(time_full, "\\.", "-") +time_full <- str_replace(time_full, "\\.", "-") + +# get accumulation of 6 month +prlr_sum <- rollsum(prlr_ts, accum, align = "right") +pet_sum <- rollsum(pet_ts, accum, align = "right") +time <- time_full[accum:length(time_full)] + +## as in 2022 report, select month now to normalize timeseries of filtered data +idxs <- which(lubridate::month(time) == month) +prlr_sum <- prlr_sum[idxs] +pet_sum <- pet_sum[idxs] +time <- time[idxs] + +normalize <- function(x) { + (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE) +} + +prlr_norm <- normalize(prlr_sum) +pet_norm <- normalize(pet_sum) +pet_norm_rev <- -1 * pet_norm + +# Build data frame +df <- data.frame( + Date = as.Date(paste0(time)), + Precipitation = prlr_norm, + PET = pet_norm_rev +) + +# select month at this point if whole timeseries to be normalized +#df <- df[which(lubridate::month(df$Date) == 9), ] + +# Reshape to long format +df_long <- melt(df, id.vars = "Date", variable.name = "Variable", value.name = "Value") +p_1 <- ggplot(df_long, aes(x = lubridate::year(Date), y = Value, color = Variable)) + + geom_line(linewidth = 0.8) + + geom_smooth(method = "loess", span = 0.2, se = FALSE, linetype = "dashed") + + scale_color_manual( + values = c("Precipitation" = "blue", "PET" = "red"), + labels = c("Precipitation", "Potential Evapotranspiration") + ) + + scale_y_continuous( + name = "Normalized Precipitation", + sec.axis = sec_axis(~ -., name = "Normalized PET (reversed)") + ) + + geom_hline(yintercept = 0, linetype = "dotted") + + labs(x = NULL, y = NULL) + + theme_minimal(base_size = 14) + + theme( + legend.position = c(0.8,0.9), + legend.background = element_rect(fill = alpha("white", 0.7), color = NA), + legend.title = element_blank(), + panel.grid.minor = element_blank(), + plot.title = element_blank() + ) + +final_plot <- p_1 + +# Save the plot as a PNG file +ggsave(paste0("figures/PET_pr_", region, "_spei_", accum, "_", month, ".png"), + plot = final_plot, width = 12, height = 3, dpi = 300) + +print(paste0("figures/PET_pr_", region, "_spei_", accum, "_", month, ".png")) + +ggsave(paste0("figures/PET_pr_", region, "_spei_", accum, "_", month, ".pdf"), + plot = final_plot, width = 12, height = 3, dpi = 300) + +print(paste0("figures/PET_pr_", region, "_spei_", accum, "_", month, ".pdf")) diff --git a/04_analysis/figure_04.R b/04_analysis/figure_04.R new file mode 100644 index 0000000000000000000000000000000000000000..771ffd740c949e191cd763d25b5b3b1ede538196 --- /dev/null +++ b/04_analysis/figure_04.R @@ -0,0 +1,253 @@ +#!/usr/bin/env Rscript +## --------------------------- +## +## Script name: plot regions/population exposed to drought +## +## Purpose of script: plot percentage of population/regions and calculate +## changes for reference and comparator periods. +## +## Author: Daniela Lührsen & Emily Ball +## Date Created: 2025-03-07 +## Email: daniela.luhrsen@bsc.es +## +## --------------------------- + +# load packages +library(terra) +library(sf) +library(dplyr) +library(readr) +library(data.table) +library(ggplot2) + +# select parameters: +region <- "europe" +level <- "NUTS3" +population_weight <- FALSE +accum <- 12 # 6, 12 +month <- 3 + +# define baseline and comparator periods +ref_years <- 1981:2010 +comp_years <- 2015:2024 + +weights <- c("_pw"[population_weight], ""[!population_weight]) + +# open SPEI file +data_csv <- read.csv(paste0("../03_aggregate_spatial/data/", level, "_spei_6-12_monthly_drought_indices.csv")) + +data_csv <- data_csv[, c("month", "year", "adm1_id", paste0("spei", accum, weights))] +names(data_csv)[which(names(data_csv) == paste0("spei", accum, weights))] <- "spei" + +data_csv <- data_csv[which(data_csv$month == month), ] + +# select years in the comparator period +comp_data <- data_csv[which(data_csv$year %in% comp_years), ] +comp_data$spei <- comp_data$spei < -1.6 +comp_data$spei[which(is.na(comp_data$spei))] <- FALSE +comp_all <- comp_data %>% group_by(adm1_id) %>% summarise(spei_sum_comp = any(spei, na.rm = TRUE)) + +#### percentage of regions experiencing at least one year of extreme-exceptional drought in 2015-2024 +print(100*mean(comp_all$spei_sum_comp)) + +####################################################################### + +# select years in reference period +ref_data <- data_csv[which(data_csv$year %in% ref_years), ] +ref_data$spei <- ref_data$spei < -1.6 +ref_data$spei[which(is.na(ref_data$spei))] <- FALSE +ref_all <- ref_data %>% group_by(adm1_id) %>% summarise(spei_sum_ref = 100*mean(spei, na.rm = TRUE)) + +comp_data <- data_csv[which(data_csv$year %in% comp_years), ] +comp_data$spei <- comp_data$spei < -1.6 +comp_data$spei[which(is.na(comp_data$spei))] <- FALSE +comp_all <- comp_data %>% group_by(adm1_id) %>% summarise(spei_sum_comp = 100*mean(spei, na.rm = TRUE)) + +combined_df <- full_join(ref_all, comp_all, by = "adm1_id") +combined_df$spei_diff <- combined_df$spei_sum_comp - combined_df$spei_sum_ref + + +#### increase in the percentage of years experiencing extreme-exceptional drought +combined_df$increase <- combined_df$spei_diff > 0 +print(mean(combined_df$increase)*100) + +####################################################################################### +# comparing 1981-2024 and 2015-2024 + +# define baseline and comparator periods +ref_years <- 1981:2024 +comp_years <- 2015:2024 + + +ref_data <- data_csv[which(data_csv$year %in% ref_years), ] +ref_data$spei <- ref_data$spei < -1.6 +ref_data$spei[which(is.na(ref_data$spei))] <- FALSE + +comp_data <- ref_data[which(ref_data$year %in% comp_years), ] +ref_all <- ref_data %>% group_by(adm1_id) %>% summarise(spei_sum_ref = sum(spei, na.rm = TRUE)) +comp_all <- comp_data %>% group_by(adm1_id) %>% summarise(spei_sum_comp = sum(spei, na.rm = TRUE)) + + +combined_df <- full_join(ref_all, comp_all, by = "adm1_id") +combined_df$spei_perc <- combined_df$spei_sum_comp / combined_df$spei_sum_ref * 100 + +combined_df$spei_perc[which(is.na(combined_df$spei_perc))] <- 0 + +combined_df$perc_30 <- combined_df$spei_perc > 30 + +### percentage of regions where at least 30% of drought events occurred in comparator +print(mean(combined_df$perc_30)* 100) + + + +######## plot figure ######### + + +# open SPEI file +data_csv <- read.csv(paste0("../03_aggregate_spatial/data/", level, "_spei_6-12_monthly_drought_indices.csv")) + +data_csv <- data_csv[which(data_csv$month == month), ] +data_csv <- data_csv[which(data_csv$year > 1980), ] + +data_csv$spei6_pw <- data_csv$spei6_pw < -1.6 +data_csv$spei12_pw <- data_csv$spei12_pw < -1.6 +data_csv$spei6 <- data_csv$spei6 < -1.6 +data_csv$spei12 <- data_csv$spei12 < -1.6 + +data_csv_tmp <- data_csv %>% + group_by(year, month) %>% summarise(spei6 = 100 * mean(spei6, na.rm = TRUE), + spei12 = 100 * mean(spei12, na.rm = TRUE), + spei6_pw = 100 * mean(spei6_pw, na.rm = TRUE), + spei12_pw = 100 * mean(spei12_pw, na.rm = TRUE),) + + +df <- data.frame( + Date = data_csv_tmp$year, + SPEI6 = data_csv_tmp$spei6, + SPEI12 = data_csv_tmp$spei12, + SPEI6_pw = data_csv_tmp$spei6_pw, + SPEI12_pw = data_csv_tmp$spei12_pw +) + +spei6 <- data_csv_tmp$spei6 / 100 +spei12 <- data_csv_tmp$spei12 / 100 + +df_long <- melt(df, id.vars = "Date", variable.name = "Variable", value.name = "Value") + +p_1 <- ggplot(df_long, aes(x = Date, y = Value, color = Variable, linetype = Variable)) + + geom_line(linewidth = 0.8) + + scale_color_manual( + values = c("SPEI6" = "blue", "SPEI12" = "red", + "SPEI6_pw" = "blue", "SPEI12_pw" = "red"), + labels = c("SPEI6", "SPEI12", "SPEI6 weighted", "SPEI12 weighted") + ) + + scale_linetype_manual( + values = c("SPEI6" = "solid", "SPEI12" = "solid", + "SPEI6_pw" = "dashed", "SPEI12_pw" = "dashed"), + labels = c("SPEI6", "SPEI12", "SPEI6 weighted", "SPEI12 weighted") + ) + + scale_y_continuous( + name = "Percentage of regions experiencing\nextreme to exceptional drought" + ) + + labs(x = NULL, y = NULL) + + theme_minimal(base_size = 14) + + theme( + legend.position = c(0.8,0.9), + legend.background = element_rect(fill = alpha("white", 0.7), color = NA), + legend.title = element_blank(), + panel.grid.minor = element_blank(), + plot.title = element_blank() + ) + +final_plot <- p_1 + + +# open population density (pop per km2) +pop_density <- readRDS(paste0("../02_calculate_spei/data/", region, + "_population_stack.rds")) +cell_areas <- terra::area(pop_density) +pop_era5land <- pop_density * cell_areas +adm1 <- sf::st_read("../01_shapefiles/data/outputs/NUTS3.shp") + +# extract to NUTS3 level +pop_shp <- exactextractr::exact_extract(pop_era5land, adm1, "sum") +eur_pop <- sum(pop_shp) +pop_shp <- rep(pop_shp, max(data_csv$year) - min(data_csv$year) + 1) + +data_csv_tmp <- cbind(data_csv, pop_shp) + +data_csv_tmp$spei6_exp <- data_csv_tmp$spei6 * data_csv_tmp$pop_shp +data_csv_tmp$spei12_exp <- data_csv_tmp$spei12 * data_csv_tmp$pop_shp +data_csv_tmp$spei6_pw_exp <- data_csv_tmp$spei6_pw * data_csv_tmp$pop_shp +data_csv_tmp$spei12_pw_exp <- data_csv_tmp$spei12_pw * data_csv_tmp$pop_shp + +data_csv_tmp <- data_csv_tmp %>% + group_by(year, month) %>% summarise( spei6 = sum( spei6_exp, na.rm = TRUE)/eur_pop, + spei12 = sum( spei12_exp, na.rm = TRUE)/eur_pop, + spei6_pw = sum( spei6_pw_exp, na.rm = TRUE)/eur_pop, + spei12_pw = sum(spei12_pw_exp, na.rm = TRUE)/eur_pop,) + + + +df <- data.frame( + Date = data_csv_tmp$year, + SPEI6_pop = data_csv_tmp$spei6, + SPEI12_pop = data_csv_tmp$spei12, + SPEI6_reg = spei6, + SPEI12_reg = spei12 +) + + +df_long <- melt(df, id.vars = "Date", variable.name = "Variable", value.name = "Value") + +p_1 <- ggplot(df_long, aes(x = Date)) + + geom_line( + data = filter(df_long, Variable %in% c("SPEI6_pop", "SPEI12_pop")), + aes(y = Value, color = Variable, linetype = Variable), + linewidth = 0.8 + ) + + geom_line( + data = filter(df_long, Variable %in% c("SPEI6_reg", "SPEI12_reg")), + aes(y = Value, color = Variable, linetype = Variable), + linewidth = 0.8 + ) + + scale_color_manual( + values = c("SPEI6_pop" = "blue", "SPEI12_pop" = "red", + "SPEI6_reg" = "blue", "SPEI12_reg" = "red"), + labels = c("SPEI6 (by population)", "SPEI12 (by population)", + "SPEI6 (by region)", "SPEI12 (by region)") + ) + + scale_linetype_manual( + values = c("SPEI6_pop" = "solid", "SPEI12_pop" = "solid", + "SPEI6_reg" = "dashed", "SPEI12_reg" = "dashed"), + labels = c("SPEI6 (by population)", "SPEI12 (by population)", + "SPEI6 (by region)", "SPEI12 (by region)") + ) + + scale_y_continuous( + name = "Percentage of regions experiencing\nextreme to exceptional drought", + labels = scales::percent_format(accuracy = 1), + sec.axis = sec_axis(~ ., name = "Percentage of population experiencing\nextreme to exceptional drought") + ) + + labs(x = NULL, y = NULL) + + theme_minimal(base_size = 14) + + theme( + legend.position = c(0.7,0.85), + legend.background = element_rect(fill = alpha("white", 0.7), color = NA), + legend.title = element_blank(), + panel.grid.minor = element_blank(), + plot.title = element_blank() + ) + +final_plot <- p_1 + +# Save the plot as a PNG file +ggsave(paste0("figures/percentage_pop_", region, "_", month, ".png"), + plot = final_plot, width = 12, height = 4, dpi = 300) + +print(paste0("figures/percentage_pop_", region, "_", month, ".png")) + +ggsave(paste0("figures/percentage_pop_", region, "_", month, ".pdf"), + plot = final_plot, width = 12, height = 4, dpi = 300) + +print(paste0("figures/percentage_pop_", region, "_", month, ".pdf")) + diff --git a/README.md b/README.md index 4cb63db2866a173a3ce44ad8f1219643933af339..82e839d05a11a7bfc065ef309ad9c4bfc7d5c0fc 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This project is licensed under GNU General Public License version 3 ([GPL-3](htt Droughts are a major type of natural disaster, triggering a crisis with food security and public health. It is widely defined as a prolonged period of dry weather caused by a lack of rainfall. Such an extreme climate event can last from a few weeks to many decades and can affect from a few hundred to millions of square kilometres. We have assessed the drought condition using the Standardized Precipitation Evapotranspiration Index (SPEI). SPEI helps to detect, monitor and analyse the drought severity, duration, and extent globally. It is a standardised index with no units, where positive values of SPEI6 correspond to conditions of wet periods whereas negative values correspond to dry periods. -The **Drought** indicator, published in the [Lancet Countdown in Europe](https://www.lancetcountdown.org/europe/) report, tracks the vulnerability of a country (and/or its administrative units) to extreme drought conditions, starting in 1950 until near present time. +The **Drought** indicator, published in the [Lancet Countdown in Europe](https://www.lancetcountdown.org/europe/) report, tracks the vulnerability of a country (and/or its administrative units) to extreme drought conditions, starting in 1981 until 2024. ## Content This repository is organised into branches, with each branch corresponding to the respective year of report release. @@ -23,18 +23,14 @@ Access to a specific year report release by selecting the desired branch instead -**Find [here](https://earth.bsc.es/gitlab/ghr/lcde-drought/-/blob/2024/) the branch of this repository corresponding to the latest report (2024).** +**Find [here](https://earth.bsc.es/gitlab/ghr/lcde-drought/-/blob/2026/) the branch of this repository corresponding to the latest report (2026).** ## Authors -Balakrishnan Solaraju-Murali, PhD +Emily Ball, PhD -Kim van Daalen, PhD - -Nube Gonzalez-Reviriego, PhD - -Alba Llabrés-Brustenga, PhD +Daniela Lührsen Rachel Lowe, PhD -Contact: kim.vandaalen@bsc.es +Contact: emily.ball@bsc.es | daniela.luhrsen@bsc.es