diff --git a/example_scripts/example_indicators.R b/example_scripts/example_indicators.R new file mode 100644 index 0000000000000000000000000000000000000000..aa8ec7bb5714e7b72612f10454862a61f5b333ec --- /dev/null +++ b/example_scripts/example_indicators.R @@ -0,0 +1,30 @@ +############################################################################### +## Author: A. Llabrés-Brustenga +## Description: Example script for use of module indicators. +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Indicators/Indicators.R") + +# Read recipe +recipe_file <- "recipes/examples/recipe_spei_spi.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data_raw <- Loading(recipe) + +# Change units: very important to estimate evapotranspiration! +data_units <- Units(recipe, data_raw) + +# Obtain SPEI and/or SPI according to recipe +result <- Indicators(recipe, data_units) + +# the result is a list with the requested indicators, e.g.: +# > summary(result$SPEI$obs$data) +# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's +# -Inf -0.782 -0.026 NaN 0.827 Inf 13200 +# > summary(result$SPEI$hcst$data) +# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's +# -3.2 -0.8 0.0 0.0 0.8 3.3 330000 \ No newline at end of file diff --git a/modules/Indicators/Indicators.R b/modules/Indicators/Indicators.R new file mode 100644 index 0000000000000000000000000000000000000000..8ddd265595d7a3def77a36899d5d1075b36c1523 --- /dev/null +++ b/modules/Indicators/Indicators.R @@ -0,0 +1,109 @@ +# Load functions +source("modules/Indicators/R/data_format_csindicators.R") +source("modules/Indicators/R/spei_spi.R") +source("modules/Indicators/R/threshold.R") +source("modules/Indicators/R/data_transformation.R") +source("modules/Indicators/R/malaria_or_ticks.R") + +source("modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R") #### tmp!!! + +Indicators <- function(recipe, data){ + + var.list <- strsplit(recipe$Analysis$Variables$name, ', ')[[1]] + ncores <- recipe$Analysis$ncores + + # SPEI + if (!is.null(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)){ + if (recipe$Analysis$Workflow$Indicators$SPEI$return_spei){ + + # read recipe parameters + pet_method <- recipe$Analysis$Workflow$Indicators$SPEI$PET_method + if (pet_method == 'none'){pet_method <- NULL} + spei_accum <- recipe$Analysis$Workflow$Indicators$SPEI$Nmonths_accum + spei_standardization <- recipe$Analysis$Workflow$Indicators$SPEI$standardization + spei_stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_ref_period + if (is.null(spei_stand_refperiod)){ + spei_stand_refperiod <- c(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end) + } + spei_stand_handleinf <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_handle_infinity + + # call spei_spi function from modules/Indicators/R + result_spei <- spei_spi(data = data, indicator = 'spei', + var.list = var.list, + pet_method = pet_method, + accum = spei_accum, + standardization = spei_standardization, + stand_refperiod = spei_stand_refperiod, + stand_handleinf = spei_stand_handleinf, + ncores = ncores) + + } + } + + # SPI + if (!is.null(recipe$Analysis$Workflow$Indicators$SPI$return_spi)){ + if (recipe$Analysis$Workflow$Indicators$SPI$return_spi){ + + # read recipe parameters + spi_accum <- recipe$Analysis$Workflow$Indicators$SPI$Nmonths_accum + spi_standardization <- recipe$Analysis$Workflow$Indicators$SPI$standardization + spi_stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPI$standardization_ref_period + if (is.null(spi_stand_refperiod)){ + spi_stand_refperiod <- c(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end) + } + spi_stand_handleinf <- recipe$Analysis$Workflow$Indicators$SPI$standardization_handle_infinity + + # call spei_spi function from modules/Indicators/R + result_spi <- spei_spi(data = data, indicator = 'spi', + var.list = var.list, + pet_method = NULL, + accum = spi_accum, + standardization = spi_standardization, + stand_refperiod = spi_stand_refperiod, + stand_handleinf = spi_stand_handleinf, + ncores = ncores) + + } + } + + # Threshold based: + if (!is.null(recipe$Analysis$Workflow$Indicators$Threshold_based$return_thresholdbased)){ + if (recipe$Analysis$Workflow$Indicators$Threshold_based$return_thresholdbased){ + thrs <- recipe$Analysis$Workflow$Indicators$Threshold_based$threshold + return.values <- recipe$Analysis$Workflow$Indicators$Threshold_based$returnValues + result_threshold <- threshold(data, thrs, var.list, return.values) + } + } + + # Malaria indicator: + if (!is.null(recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability)){ + if (recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability){ + result_malaria <- list() + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp){ + result_malaria[[ssp]] <- malaria_or_ticks(data, var.list, ssp) + } + } + } + # Tick-borne disease indicator: + if (!is.null(recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability)){ + if (recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability){ + result_ticks <- list() + for (ssp in recipe$Analysis$Workflow$Indicators$Ticks$ssp){ + result_ticks[[ssp]] <- malaria_or_ticks(data, var.list, ssp) + } + } + } + + indicators.list <- c('SPEI' = 'result_spei', 'SPI' = 'result_spi', + 'ThresholdBased' = 'result_threshold', + 'Malaria' = 'result_malaria', + 'Ticks' = 'result_ticks') + result <- list() + for (ind in indicators.list){ + if (exists(ind)){ + result[[names(indicators.list)[which(indicators.list == ind)]]] <- eval(parse(text = ind)) + } + } + + return(result) +} diff --git a/modules/Indicators/R/data_format_csindicators.R b/modules/Indicators/R/data_format_csindicators.R new file mode 100644 index 0000000000000000000000000000000000000000..13b7dcb0307b208bcd555f88d55d48f9a51d9533 --- /dev/null +++ b/modules/Indicators/R/data_format_csindicators.R @@ -0,0 +1,49 @@ +# data format for input in CSIndicators SPEI functions +data_format_csindicators <- function(data, vars, var.list, lat, lon, dates){ + dict <- c('tas' = 'tmean', + 'tasmax' = 'tmax', + 'tasmin' = 'tmin', + 'prlr' = 'pr', + 'pet' = 'pet', + 'hurs' = 'hurs', + 'tdps' = 'tdps') + dim.names <- names(dim(data$data)) + + result <- list() + for (var in vars){ + if (var %in% var.list){ + data.var <- list(data = data$data[,which(var.list == var),,,,,,,], + coords = list(latitude = lat, longitude = lon)) + + # read original units + var.metadata.pos <- which(names(data$attrs$Variable$metadata) == var) + units <- data$attrs$Variable$metadata[[var.metadata.pos]]$units + # to keep original dims + for(nn in dim.names){ + if(!(nn %in% names(dim(data.var$data)))){ + data.var$data <- s2dv::InsertDim(data.var$data, + pos = 1, + len = 1, + name = nn) + } + } + dim.order <- match(dim.names, names(dim(data.var$data))) + data.var$data <- aperm(data.var$data, dim.order) + + # transform to s2dv_cube with metadata + attr(data.var, 'class') <- 's2dv_cube' + data.var$attrs$Dates <- dates + data.var$attrs$Units <- units + } else { + data.var <- NULL + } + + # append all variables in a list of s2dv_cubes + result <- append(result, list(data.var)) + + } + + names(result) <- dict[vars] + + return(result) +} diff --git a/modules/Indicators/R/data_transformation.R b/modules/Indicators/R/data_transformation.R new file mode 100644 index 0000000000000000000000000000000000000000..2be32e6d62608bbb3fe3e07dd9e99fa58b1c2558 --- /dev/null +++ b/modules/Indicators/R/data_transformation.R @@ -0,0 +1,15 @@ +data_transform_hurs <- function(tas, tdps){ + # obtain relative humidity (in %; tas and tdps need to be in C) + # ref: https://www.osti.gov/servlets/purl/548871&lang=en + # Alduchov OA, Eskridge RE. + # Improved Magnus form approximation of saturation vapor pressure. + # Journal of Applied Meteorology and Climatology. 1996 Apr;35(4):601-9 + + b = 17.625 + c = 243.04 + hurs <- tas # for metadata (lon, lat, and attrs$Dates) + hurs$data <- 100*exp(b * c * (tdps$data - tas$data) / ((c + tdps$data) * (c + tas$data))) + hurs$attrs$Units <- '%' + + return(hurs) # metadata: coords and dates +} \ No newline at end of file diff --git a/modules/Indicators/R/malaria_or_ticks.R b/modules/Indicators/R/malaria_or_ticks.R new file mode 100644 index 0000000000000000000000000000000000000000..233675bcd39baadf6223dfd9d21d143f3b2c4568 --- /dev/null +++ b/modules/Indicators/R/malaria_or_ticks.R @@ -0,0 +1,79 @@ +malaria_or_ticks <- function(data, var.list, ssp){ + + # define thresholds: + if (tolower(ssp) == 'p.falciparum'){ + thrs_tas <- c(18,32) + thrs_prlr <- c(80, Inf) + thrs_hurs <- c(60, Inf) + var.thrs.list <- c('tas' = 'thrs_tas', 'prlr' = 'thrs_prlr', 'hurs' = 'thrs_hurs') + # check in prepare_outputs that variables in names(var.list) are requested + } else if (tolower(ssp) == 'p.vivax'){ + thrs_tas <- c(14.5,33) + thrs_prlr <- c(80, Inf) + thrs_hurs <- c(60, Inf) + var.thrs.list <- c('tas' = 'thrs_tas', 'prlr' = 'thrs_prlr', 'hurs' = 'thrs_hurs') + # check in prepare_outputs that variables in names(var.list) are requested + } else if (tolower(ssp) == 'i.ricinus'){ + thrs_tas <- c(10,26) + thrs_hurs <- c(45, Inf) + var.thrs.list <- c('tas' = 'thrs_tas', 'hurs' = 'thrs_hurs') + # check in prepare_outputs that variables in names(var.list) are requested + } else { + var.thrs.list <- c() # no variables to comply thresholds + } + + # obtain climatic suitability for obs, hcst and fcst (or elements of "data") + result <- data + for (ll in names(result)){ + data_element <- data[[ll]] + if (!is.null(data_element)){ + # prepare data + tas <- data_format_csindicators(data_element, vars = c('tas'), var.list =var.list, + lat = data_element$coords$latitude, + lon = data_element$coords$longitude, + dates = data_element$attrs$Dates)[[1]] + + if ('hurs' %in% var.list){ + hurs <- data_format_csindicators(data_element, vars = c('hurs'), var.list = var.list, + lat = data_element$coords$latitude, + lon = data_element$coords$longitude, + dates = data_element$attrs$Dates)[[1]] + } else { + tdps <- data_format_csindicators(data_element, vars = c('tdps'), var.list = var.list, + lat = data_element$coords$latitude, + lon = data_element$coords$longitude, + dates = data_element$attrs$Dates)[[1]] + hurs <- data_transform_hurs(tas, tdps) + } + if ('prlr' %in% var.list){ + prlr <- data_format_csindicators(data_element, vars = c('prlr'), var.list = var.list, + lat = data_element$coords$latitude, + lon = data_element$coords$longitude, + dates = data_element$attrs$Dates)[[1]] + } + + # create result object + if (length(var.thrs.list > 0)){ + result_element <- eval(parse(text = names(var.thrs.list)[1])) # tas + result_element$attrs$Units <- NULL + result_element$data[which(!is.na(result_element$data))] <- 1 + } else { + result_element <- NULL + } + + # apply threholds + if (length(var.thrs.list > 0)){ + for (var in names(var.thrs.list)){ + thrs <- eval(parse(text = var.thrs.list[which(names(var.thrs.list) == var)])) + var.data <- eval(parse(text = var)) + new.data <- array(0, dim = dim(result_element$data)) + new.data[which(var.data$data >= thrs[1] & var.data$data <= thrs[2])] <- 1 + result_element$data <- result_element$data * new.data + result_element$metadata <- append(result_element$metadata, paste0(var, ' suitability threshold: ', thrs[1], ' to ', thrs[2], ' (', var.data$attrs$Units, ')')) + } + } + } # end "if (!is.null...)" + result[[ll]] <- result_element + } # end "for" + return(result) +} \ No newline at end of file diff --git a/modules/Indicators/R/spei_spi.R b/modules/Indicators/R/spei_spi.R new file mode 100644 index 0000000000000000000000000000000000000000..3bc61aed69e83f78ed8bcde4f68f92d184527729 --- /dev/null +++ b/modules/Indicators/R/spei_spi.R @@ -0,0 +1,209 @@ +spei_spi <- function(data, indicator, + var.list, + pet_method = NULL, + accum, + standardization, + stand_refperiod, + stand_handleinf, + ncores = NULL){ + + lat_obs <- data$obs$coords$latitude + lon_obs <- data$obs$coords$longitude + dates_obs <- data$obs$attrs$Dates + lat_hcst <- data$hcst$coords$latitude + lon_hcst <- data$hcst$coords$longitude + dates_hcst <- data$hcst$attrs$Dates + lat_fcst <- data$fcst$coords$latitude + lon_fcst <- data$fcst$coords$longitude + dates_fcst <- data$fcst$attrs$Dates + + if (indicator == 'spei'){ + + # obtain PET + if (is.null(pet_method)){ # alredy checked (prepare_outputs) that PET exists + # in the data when SPEI is requested without PET method + + data_obs <- data_format_csindicators(data$obs, + vars = c('pet', 'prlr'), + var.list = var.list, + lat = lat_obs, + lon = lon_obs, + dates = dates_obs) + if (!is.null(data$hcst)){ + data_hcst <- data_format_csindicators(data$hcst, + vars = c('pet', 'prlr'), + var.list = var.list, + lat = lat_hcst, + lon = lon_hcst, + dates = dates_hcst) + } else { + data_hcst <- NULL + } + if (!is.null(data$fcst)){ + data_fcst <- data_format_csindicators(data$fcst, + vars = c('pet', 'prlr'), + var.list = var.list, + lat = lat_fcst, + lon = lon_fcst, + dates = dates_fcst) + } else { + data_fcst <- NULL + } + + } else { + + if (pet_method == 'hargreaves'){ + vars <- c('tasmax', 'tasmin') + } else if (pet_method == 'hargreaves_modified'){ + vars <- c('tasmax', 'tasmin', 'prlr') + } else if (pet_method == 'thornthwaite'){ + vars <- c('tas') + } + + # add prlr to the data for prlr-pet + if (!('prlr' %in% vars)){vars <- c(vars, 'prlr')} + + # call CST_PeriodPET from CSIndicators + data_obs <- data_format_csindicators(data$obs, + vars = vars, + var.list = var.list, + lat = lat_obs, + lon = lon_obs, + dates = dates_obs) + data_obs$pet <- CST_PeriodPET(data = data_obs, + pet_method = pet_method, + ncores = ncores) + + if (!is.null(data$hcst)){ + data_hcst <- data_format_csindicators(data$hcst, + vars = vars, + var.list = var.list, + lat = lat_hcst, + lon = lon_hcst, + dates = dates_hcst) + data_hcst$pet <- CST_PeriodPET(data = data_hcst, + pet_method = pet_method, + ncores = ncores) + } + + if (!is.null(data$fcst)){ + data_fcst <- data_format_csindicators(data$fcst, + vars = vars, + var.list = var.list, + lat = lat_fcst, + lon = lon_fcst, + dates = dates_fcst) + data_fcst$pet <- CST_PeriodPET(data = data_fcst, + pet_method = pet_method, + ncores = ncores) + } + } + + # Obtain difference Precipitation - PET + data_obs_diff <- data_obs$pr + data_obs_diff$data <- data_obs$pr$data - data_obs$pet$data + + if (!is.null(data$hcst)){ + data_hcst_diff <- data_hcst$pr + data_hcst_diff$data <- data_hcst$pr$data - data_hcst$pet$data + } + + if (!is.null(data$fcst)){ + data_fcst_diff <- data_fcst$pr + data_fcst_diff$data <- data_fcst$pr$data - data_fcst$pet$data + } + + } else { # spi (no PET calculation and use of precipitation directly instead of Precipitation - PET) + + data_obs <- data_format_csindicators(data$obs, + vars = 'prlr', + var.list = var.list, + lat = lat_obs, + lon = lon_obs, + dates = dates_obs) + data_obs_diff <- data_obs$pr + + if (!is.null(data$hcst)){ + data_hcst <- data_format_csindicators(data$hcst, + vars = 'prlr', + var.list = var.list, + lat = lat_hcst, + lon = lon_hcst, + dates = dates_hcst) + data_hcst_diff <- data_hcst$pr + } + + if (!is.null(data$fcst)){ + data_fcst <- data_format_csindicators(data$fcst, + vars = 'prlr', + var.list = var.list, + lat = lat_fcst, + lon = lon_fcst, + dates = dates_fcst) + data_fcst_diff <- data_fcst$pr + } + } + + #### same workflow for SPEI and SPI starting here + + # call CST_PeriodAccumulation function from CSIndicators + data_obs_accum <- CST_PeriodAccumulation(data = data_obs_diff, + rollwidth = accum, + sdate_dim = 'syear', + ncores = ncores) + if (!is.null(data$hcst)){ + data_hcst_accum <- CST_PeriodAccumulation(data = data_hcst_diff, + rollwidth = accum, + sdate_dim = 'syear', + ncores = ncores) + } else { + data_hcst_accum <- NULL + } + if (!is.null(data$fcst)){ + data_fcst_accum <- CST_PeriodAccumulation(data = data_fcst_diff, + rollwidth = accum, + sdate_dim = 'syear', + ncores = ncores) + } else { + data_fcst_accum <- NULL + } + + # call CST_PeriodStandardization function from CSIndicators + if (standardization){ + data_obs_ind <- CST_PeriodStandardization (data = data_obs_accum, + data_cor = NULL, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores) + + if (!is.null(data$hcst)){ + data_hcst_ind <- CST_PeriodStandardization (data = data_hcst_accum, + data_cor = NULL, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores) + } else { + data_hcst_ind <- NULL + } + + if (!is.null(data$fcst)){ + data_fcst_ind <- CST_PeriodStandardization (data = data_hcst_accum, + data_cor = data_fcst_accum, + ref_period = stand_refperiod, + handle_infinity = stand_handleinf, + ncores = ncores) + } else { + data_fcst_ind <- NULL + } + } else { + data_obs_ind <- data_obs_accum + data_hcst_ind <- data_hcst_accum + data_fcst_ind <- data_fcst_accum + } + + # result: spi or spei (create list of previous data s2dv_cubes) + result <- list(data_hcst_ind, data_fcst_ind, data_obs_ind) + names(result) <- c('hcst', 'fcst', 'obs') + + return(result) +} \ No newline at end of file diff --git a/modules/Indicators/R/threshold.R b/modules/Indicators/R/threshold.R new file mode 100644 index 0000000000000000000000000000000000000000..5c2b100af601b78d60034a1b1b89851134bc4f4b --- /dev/null +++ b/modules/Indicators/R/threshold.R @@ -0,0 +1,61 @@ +threshold <- function(data, thrs, var.list, return.values = TRUE){ + if (!is.null(data$obs)){ + result_obs <- c() + dim_var <- which(names(dim(data$obs$data)) == 'var') + for(var in var.list){ + data_tmp <- Subset(data$obs$data, along = 'var', indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values){ + result_obs <- abind(result_obs, data_tmp, along = dim_var) + } else { + result_obs <- abind(result_obs, data_threshold, along = dim_var) + } + } + names(dim(result_obs)) <- names(dim(data$obs$data)) + data$obs$data <- result_obs + } + + if (!is.null(data$hcst)){ + result_hcst <- c() + dim_var <- which(names(dim(data$hcst$data)) == 'var') + for(var in var.list){ + data_tmp <- Subset(data$hcst$data, along = 'var', indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values){ + result_hcst <- abind(result_hcst, data_tmp, along = dim_var) + } else { + result_hcst <- abind(result_hcst, data_threshold, along = dim_var) + } + } + names(dim(result_hcst)) <- names(dim(data$hcst$data)) + data$hcst$data <- result_hcst + } + + if (!is.null(data$fcst)){ + result_fcst <- c() + dim_var <- which(names(dim(data$fcst$data)) == 'var') + for(var in var.list){ + data_tmp <- Subset(data$fcst$data, along = 'var', indices = which(var.list == var), drop = FALSE) + data_threshold <- data_tmp + data_threshold[which(data_tmp >= thrs[[which(var.list == var)]][[1]] & data_tmp <= thrs[[which(var.list == var)]][[2]])] <- TRUE + data_threshold[which(data_tmp < thrs[[which(var.list == var)]][[1]] | data_tmp > thrs[[which(var.list == var)]][[2]])] <- FALSE + data_tmp[which(!data_threshold)] <- NA + if (return.values){ + result_fcst <- abind(result_fcst, data_tmp, along = dim_var) + } else { + result_fcst <- abind(result_fcst, data_threshold, along = dim_var) + } + } + names(dim(result_fcst)) <- names(dim(data$fcst$data)) + data$fcst$data <- result_fcst + } + + #result <- list(obs = result_obs, hcst = result_hcst, fcst = result_fcst) + return(data) +} \ No newline at end of file diff --git a/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R b/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R new file mode 100644 index 0000000000000000000000000000000000000000..b71270f14086d2db804f4a046b0766b6f1952edc --- /dev/null +++ b/modules/Indicators/R/tmp/CSIndicators_CST_PeriodStandardization.R @@ -0,0 +1,647 @@ +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'(Standarized Precipitation-Evapotranspiration Index). With this function the +#'data is fit to a probability distribution to transform the original values to +#'standardized units that are comparable in space and time and at different SPEI +#'time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standardization and the result will be also filled with NAs for those coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method works only when precipitation +#'is the sole variable provided, and all other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. This function is built to +#'be compatible with other tools in that work with 's2dv_cube' object +#'class. The input data must be this object class. If you don't work with +#''s2dv_cube', see PeriodStandardization. For more information on the SPEI +#'indicator calculation, see CST_PeriodPET and CST_PeriodAccumulation. +#' +#'@param data An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data to be standardized. +#'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data in which the standardization should be applied +#' using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An object of class \code{s2dv_cube} containing the standardized data. +#'If 'data_cor' is provided the array stored in element data will be of the same +#'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +#'element data will be of the same dimensions as 'data'. The parameters of the +#'standardization will only be returned if 'return_params' is TRUE, in this +#'case, the output will be a list of two objects one for the standardized data +#'and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +#'data <- NULL +#'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +#'class(data) <- 's2dv_cube' +#'SPEI <- CST_PeriodStandardization(data = data) +#'@export +CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, + handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of 's2dv_cube' class.") + } + if (!is.null(data_cor)) { + if (!inherits(data_cor, 's2dv_cube')) { + stop("Parameter 'data_cor' must be of 's2dv_cube' class.") + } + } + res <- PeriodStandardization(data = data$data, data_cor = data_cor$data, + dates = data$attrs$Dates, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + handle_infinity = handle_infinity, method = method, + distribution = distribution, + params = params, return_params = return_params, + na.rm = na.rm, ncores = ncores) + if (return_params) { + std <- res$spei + params <- res$params + } else { + std <- res + } + + if (is.null(data_cor)) { + data$data <- std + data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') + if (return_params) { + return(list(spei = data, params = params)) + } else { + return(data) + } + } else { + data_cor$data <- std + data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') + data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) + data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) + return(data_cor) + } +} + +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'indicator. With this function the data is fit to a probability distribution to +#'transform the original values to standardized units that are comparable in +#'space and time and at different SPEI time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. For more information about +#'SPEI, see functions PeriodPET and PeriodAccumulation. +#' +#'@param data A multidimensional array containing the data to be standardized. +#'@param data_cor A multidimensional array containing the data in which the +#' standardization should be applied using the fitting parameters from 'data'. +#'@param dates An array containing the dates of the data with the same time +#' dimensions as the data. It is optional and only necessary for using the +#' parameter 'ref_period' to select a reference period directly from dates. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return A multidimensional array containing the standardized data. +#'If 'data_cor' is provided the array will be of the same dimensions as +#''data_cor'. If 'data_cor' is not provided, the array will be of the same +#'dimensions as 'data'. The parameters of the standardization will only be +#'returned if 'return_params' is TRUE, in this case, the output will be a list +#'of two objects one for the standardized data and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +#'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +#'data <- array(rnorm(600, -194.5, 64.8), dim = dims) +#'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +#' +#'SPEI <- PeriodStandardization(data = data) +#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom lmomco pwm.pp pwm.ub pwm2lmom are.lmom.valid parglo pargam parpe3 +#'@importFrom lmom cdfglo cdfgam cdfpe3 pelglo pelgam pelpe3 +#'@importFrom SPEI parglo.maxlik +#'@importFrom stats qnorm sd window +#'@export +PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check inputs + ## data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + ## data_cor + if (!is.null(data_cor)) { + if (!is.array(data_cor)) { + stop("Parameter 'data_cor' must be a numeric array.") + } + if (is.null(names(dim(data_cor)))) { + stop("Parameter 'data_cor' must have dimension names.") + } + } + ## dates + if (!is.null(dates)) { + if (!any(inherits(dates, 'Date'), inherits(dates, 'POSIXct'))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (dim(data)[c(time_dim)] != dim(dates)[c(time_dim)]) { + stop("Parameter 'dates' needs to have the same length of 'time_dim' ", + "as 'data'.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!time_dim %in% names(dim(data_cor))) { + stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") + } + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!leadtime_dim %in% names(dim(data))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!leadtime_dim %in% names(dim(data_cor))) { + stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!memb_dim %in% names(dim(data_cor))) { + stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") + } + } + ## data_cor (2) + if (!is.null(data_cor)) { + if (dim(data)[leadtime_dim] != dim(data_cor)[leadtime_dim]) { + stop("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + } + } + ## ref_period + if (!is.null(ref_period)) { + years_dates <- format(dates, "%Y") + if (is.null(dates)) { + warning("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ref_period <- NULL + } else if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% years_dates)) { + warning("Parameter 'ref_period' contains years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- format(Subset(dates, along = leadtime_dim, indices = 1), "%Y") + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } + ## method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") + } + ## distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") + } + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + dims_data <- dim(data)[-which(names(dim(data)) == memb_dim)] + dims_params <- dim(params)[-which(names(dim(params)) == 'coef')] + if (!all(dims_data == dims_params)) { + stop("Parameter 'data' and 'params' must have same common dimensions ", + "except 'memb_dim' and 'coef'.") + } + + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") + } + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + if (is.null(ref_period)) { + ref_start <- NULL + ref_end <- NULL + } else { + ref_start <- ref_period[[1]] + ref_end <- ref_period[[2]] + } + + # Standardization + if (is.null(data_cor)) { + if (is.null(params)) { + res <- Apply(data = list(data), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, data_cor = NULL, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } else { + res <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(leadtime_dim, time_dim, 'coef')), + fun = .standardization, data_cor = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + } else { + res <- Apply(data = list(data = data, data_cor = data_cor), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + if (return_params) { + spei <- res$spei + params <- res$params + } else { + spei <- res$output1 + } + + if (is.null(data_cor)) { + pos <- match(names(dim(data)), names(dim(spei))) + spei <- aperm(spei, pos) + } else { + pos <- match(names(dim(data_cor)), names(dim(spei))) + spei <- aperm(spei, pos) + } + + if (return_params) { + pos <- match(c(names(dim(spei))[-which(names(dim(spei)) == memb_dim)], 'coef'), + names(dim(params))) + params <- aperm(params, pos) + return(list('spei' = spei, 'params' = params)) + } else { + return(spei) + } +} + +.standardization <- function(data, data_cor = NULL, params = NULL, + leadtime_dim = 'time', time_dim = 'syear', + ref_start = NULL, ref_end = NULL, handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', + return_params = FALSE, na.rm = FALSE) { + # data (data_cor): [leadtime_dim, time_dim, memb_dim] + dims <- dim(data)[-1] + fit = 'ub-pwm' + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + + if (is.null(data_cor)) { + # cross_val = TRUE + spei_mod <- data*NA + if (return_params) { + params_result <- array(dim = c(dim(data)[-length(dim(data))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data2 <- data[ff, , ] + dim(data2) <- dims + if (method == 'non-parametric') { + bp <- matrix(0, length(data2), 1) + for (i in 1:length(data2)) { + bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) + dim(std_index) <- dims + spei_mod[ff, , ] <- std_index + } else { + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + for (nsd in 1:dim(data)[time_dim]) { + if (is.null(params)) { + acu <- as.vector(data_fit[-nsd, ]) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + f_params <- NA + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + } + } + } else { + f_params <- params[ff, nsd, ] + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dims) + spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] + if (return_params) params_result[ff, nsd, ] <- f_params + } + } + } + } else { + # cross_val = FALSE + spei_mod <- data_cor*NA + dimscor <- dim(data_cor)[-1] + if (return_params) { + params_result <- array(dim = c(dim(data_cor)[-length(dim(data_cor))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data_cor2 <- data_cor[ff, , ] + dim(data_cor2) <- dimscor + if (method == 'non-parametric') { + bp <- matrix(0, length(data_cor2), 1) + for (i in 1:length(data_cor2)) { + bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + dim(std_index) <- dimscor + spei_mod[ff, , ] <- std_index + } else { + data2 <- data[ff, , ] + dim(data2) <- dims + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + acu <- as.vector(data_fit) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dimscor) + spei_mod[ff, , ] <- std_index_cv + if (return_params) params_result[ff, , ] <- f_params + } + } + } + } + } + if (handle_infinity) { + # could also use "param_error" ?; we are giving it the min/max value of the grid point + spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) + spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) + } + if (return_params) { + return(list(spei = spei_mod, params = params_result)) + } else { + return(spei_mod) + } +} + +.std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { + pwm = switch(fit, + 'pp-pwm' = lmomco::pwm.pp(data, -0.35, 0, nmom = 3), + lmomco::pwm.ub(data, nmom = 3) + # TLMoments::PWM(data, order = 0:2) + ) + lmom <- lmomco::pwm2lmom(pwm) + if (!any(!lmomco::are.lmom.valid(lmom), anyNA(lmom[[1]]), any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + params_result = switch(distribution, + 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){lmomco::parglo(lmom)$para}), + 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){lmomco::pargam(lmom)$para}), + 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){lmomco::parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + params_result = SPEI::parglo.maxlik(data, params_result)$para + } + return(params_result) + } else { + return(NA) + } +} \ No newline at end of file diff --git a/recipe_template.yml b/recipe_template.yml index 0db9e8fbf84a10bddae0e8a0fe9cf365e30529b0..aa3a09f50cb70a0a10304cdb0c712ef90b10aa6f 100644 --- a/recipe_template.yml +++ b/recipe_template.yml @@ -124,6 +124,26 @@ Analysis: Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} # Also available if variable is psl and/or z500: # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Indicators: + SPEI: + return_spei: yes # yes/no + PET_method: hargreaves # options: none, hargreaves, hargreaves_modified, thornthwaite + Nmonths_accum: 3 # any integer covered by (ftime_max - ftime_min + 1) + standardization: yes # yes/no + standardization_ref_period: # if null will use whole period, otherwise select a period inside the data requested period e.g. [1993,1999] + standardization_handle_infinity: no # yes/no, if yes will replace by Inf/-Inf results by max/min value of the timeseries in the same grid cell + SPI: + return_spi: no # yes/no + Nmonths_accum: 3 # any integer covered by (ftime_max - ftime_min + 1) + standardization: yes # yes/no + standardization_ref_period: # if null will use whole period, otherwise select a period inside the data requested period e.g. [1993,1999] + standardization_handle_infinity: no # yes/no, if yes will replace by Inf/-Inf results by max/min value of the timeseries in the same grid cell + Malaria: + return_climate_suitability: no # yes/no + ssp: ['P.falciparum', 'P.vivax'] # select one or several, in the example the options that are deveolped so far + Ticks: + return_climate_suitability: no # yes/no + ssp: ['I.ricinus'] # select one or several, in the example the options that are deveolped so far Skill: metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) save: 'all' # Options: 'all', 'none' (Mandatory, str) @@ -143,6 +163,7 @@ Analysis: dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: execute: yes # yes/no regions: @@ -160,6 +181,7 @@ Analysis: col1_width: NULL # Optional, int: to adjust width of first column in scorecards table col2_width: NULL # Optional, int: to adjust width of second column in scorecards table calculate_diff: False # Mandatory, bool: True/False + ncores: 10 # Number of cores to be used in parallel computation. # If left empty, defaults to 1. (Optional, int) remove_NAs: yes # Whether to remove NAs. diff --git a/recipes/examples/recipe_spei_spi.yml b/recipes/examples/recipe_spei_spi.yml new file mode 100644 index 0000000000000000000000000000000000000000..271f3bea97ad5888baeda0cff068b6903cb81e8e --- /dev/null +++ b/recipes/examples/recipe_spei_spi.yml @@ -0,0 +1,57 @@ +Description: + Author: Alba + Info: recipe template for the use of the module indicators to calculate SPEI or SPI + +Analysis: + Horizon: seasonal + Variables: + name: tasmin, tasmax, prlr + freq: monthly_mean + units: {tasmin: C, tasmax: C, prlr: mm} + Datasets: + System: + - {name: ECMWF-SEAS5.1} + Multimodel: no + Reference: + - {name: ERA5} + Time: + sdate: '0101' + fcst_year: '2024' + hcst_start: '1993' + hcst_end: '2003' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 35 + latmax: 50 + lonmin: 0 + lonmax: 40 + Regrid: + method: bilinear + type: to_system + Workflow: + Indicators: + SPEI: + return_spei: yes + PET_method: hargreaves # options: none, hargreaves, hargreaves_modified, thornthwaite + Nmonths_accum: 3 + standardization: yes + standardization_ref_period: # if null will use whole period, otherwise select a period inside the data requested period e.g. [1993,1999] + standardization_handle_infinity: no # option: yes, no + SPI: + return_spi: no + Nmonths_accum: 3 + standardization: yes + standardization_ref_period: # if null will use whole period, otherwise select a period inside the data requested period e.g. [1993,1999] + standardization_handle_infinity: no # option: yes, no + ncores: 6 + remove_NAs: yes + Output_format: S2S4E + logo: TRUE +Run: + Loglevel: INFO + Terminal: yes + output_dir: # ______ + code_dir: # _____ + autosubmit: no + filesystem: esarchive diff --git a/tools/check_recipe.R b/tools/check_recipe.R index c9641787e99beec2f5e399c03a4c445967fca57e..43c0c99a258936e52bd1a9d7f19c8ac3dfc1922e 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -675,6 +675,200 @@ check_recipe <- function(recipe) { } } + # Indicators + if ("Indicators" %in% names(recipe$Analysis$Workflow)) { + # list of variables requested to be loaded: + var.list <- strsplit(recipe$Analysis$Variables$name, ', ')[[1]] + # SPEI/SPI checks + # Check that precipiation is a requested variable + # when drought indices (SPEI or SPI) are requested + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + if (!('prlr' %in% var.list)) { + error(recipe$Run$logger, + paste0("Precipitation is necessary to calculate ", + "SPEI and it is not a variable in the recipe")) + error_status <- TRUE + } + } + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + if (!('prlr' %in% var.list)) { + error(recipe$Run$logger, + paste0("Precipitation is necessary to calculate ", + "SPI and it is not a variable in the recipe")) + error_status <- TRUE + } + } + # SPEI: check that necessary variables for the selected PET method are in the recipe + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + pet_method <- recipe$Analysis$Workflow$Indicators$SPEI$PET_method + if (!is.null(pet_method)) { + if (pet_method == 'none') { + # check that "pet" is in the variable list + ## NOTE: "pet" is not the correct abbr but no examples exist in esarchive now + if (!('pet' %in% var.list)) { + error(recipe$Run$logger, + paste0("a PET method is necessary to estimate potential ", + "evapotranspiration in the calculation of SPEI")) + error_status <- TRUE + } + } else { + if (pet_method == 'hargreaves') { + var.list.method <- c('tasmax', 'tasmin') + known_pet_method <- TRUE + } else if (pet_method == 'hargreaves_modified') { + var.list.method <- c('tasmax', 'tasmin', 'prlr') + known_pet_method <- TRUE + } else if (pet_method == 'thornthwaite') { + var.list.method <- c('tas') + known_pet_method <- TRUE + } else { + known_pet_method <- FALSE + error(recipe$Run$logger, + paste0("PET method ", pet_method, " unknown")) + error_status <- TRUE + } + if (known_pet_method) { + # check that the necessary variables are requested + missing.vars <- c() + for (var in var.list.method) { + if (identical(which(var.list == var), integer(0))) { + missing.vars <- c(missing.vars, var) + } + } + if (length(missing.vars) > 0) { + error(recipe$Run$logger, + paste0(missing.vars, " are necessary for ", pet_method, + " method and they are NOT selected in the recipe")) + error_status <- TRUE + } + } + } + } else { # same as not NULL but pet_method == 'none' + # check that "pet" is in the variable list + ## NOTE: "pet" is not the correct abbr but no examples exist in esarchive now + if (!('pet' %in% var.list)) { + error(recipe$Run$logger, + paste0("a PET method is necessary to estimate potential ", + "evapotranspiration in the calculation of SPEI")) + error_status <- TRUE + } + } + + } + + # SPEI/SPI check accum number + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + accum <- recipe$Analysis$Workflow$Indicators$SPEI$Nmonths_accum + ftime_interval <- recipe$Analysis$Time$ftime_max - recipe$Analysis$Time$ftime_min + 1 + if ((accum > 12 & ftime_interval < 12) || (accum > ftime_interval)) { + error(recipe$Run$logger, + paste0("not possible to accumulate ", accum, " months with the specified ftime ", + "in the calculation of SPEI")) + error_status <- TRUE + } + + } + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + accum <- recipe$Analysis$Workflow$Indicators$SPI$Nmonths_accum + ftime_interval <- recipe$Analysis$Time$ftime_max - recipe$Analysis$Time$ftime_min + 1 + if ((accum > 12 & ftime_interval < 12) || (accum > ftime_interval)) { + error(recipe$Run$logger, + paste0("not possible to accumulate ", accum, " months with the specified ftime ", + "in the calculation of SPI")) + error_status <- TRUE + } + } + + # Check standardization reference period + # SPEI + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPEI$return_spei)) { + stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPEI$standardization_ref_period + year_start <- recipe$Analysis$Time$hcst_start + year_end <- recipe$Analysis$Time$hcst_end + if (!is.null(stand_refperiod)) { + if (!(stand_refperiod[1] >= year_start & stand_refperiod[2] <= year_end)){ + error(recipe$Run$logger, + paste0("the standardization_ref_period needs to be contained ", + "in hcst_start and hcst_end period for the calculation of SPEI")) + error_status <- TRUE + } + } + } + # SPI + if (isTRUE(recipe$Analysis$Workflow$Indicators$SPI$return_spi)) { + stand_refperiod <- recipe$Analysis$Workflow$Indicators$SPI$standardization_ref_period + year_start <- recipe$Analysis$Time$hcst_start + year_end <- recipe$Analysis$Time$hcst_end + if (!is.null(stand_refperiod)){ + if (!(stand_refperiod[1] >= year_start & stand_refperiod[2] <= year_end)){ + error(recipe$Run$logger, + paste0("the standardization_ref_period needs to be contained ", + "in hcst_start and hcst_end period for the calculation of SPI")) + error_status <- TRUE + } + } + } + + # Threshold indicator: check that length of requested thresholds matches length variables + if (isTRUE(recipe$Analysis$Workflow$Indicators$Threshold_based$return_thresholdbased)) { + thrs <- recipe$Analysis$Workflow$Indicators$Threshold_based$threshold + if (is.null(thrs)) { + error(recipe$Run$logger, + paste0("Threshold based indicator is requested but no threshold ", + "has been indicated")) + error_status <- TRUE + } else { + if (length(thrs) != length(var.list)){ + error(recipe$Run$logger, + paste0("Threshold based indicators is requested but thresholds ", + "do NOT match the number of requested variables")) + error_status <- TRUE + } + } + } + + # Threshold-based predifined indicators (Malaria and Ticks) + if (isTRUE(recipe$Analysis$Workflow$Indicators$Malaria$return_climate_suitability)) { + # check that necessary variables are requested + if ((!all(c("tas", "tdps", "prlr") %in% var.list)) | + (!all(c("tas", "hurs", "prlr") %in% var.list))) { + error(recipe$Run$logger, + paste0("Necessary variables for Malaria indicator are ", + " tas, tdps or hurs, and prlr, NOT included in requested ", + "variables: ", var.list)) + error_status <- TRUE + } + # check that ssp is known + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + if (ssp != 'p.falciparum' & ssp != 'p.vivax'){ + error(recipe$Run$logger, + paste0("Unknown requested ssp ", ssp)) + error_status <- TRUE + } + } + } + # Tick-borne disease indicator: + if (isTRUE(recipe$Analysis$Workflow$Indicators$Ticks$return_climate_suitability)) { + # check that necessary variables are requested + if ((!all(c("tas", "tdps") %in% var.list)) | + (!all(c("tas", "hurs") %in% var.list))) { + error(recipe$Run$logger, + paste0("Necessary variables for Tick indicator are ", + " tas, and tdps or hurs, NOT included in requested ", + "variables: ", var.list)) + error_status <- TRUE + } + # check that ssp is known + for (ssp in recipe$Analysis$Workflow$Indicators$Malaria$ssp) { + if (ssp != 'i.ricinus') { + error(recipe$Run$logger, + paste0("Unknown requested ssp ", ssp)) + error_status <- TRUE + } + } + } + } # end checks Indicators + # Visualization if ("Visualization" %in% names(recipe$Analysis$Workflow)) { PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", diff --git a/tools/libs.R b/tools/libs.R index 401467860ba602c0bd459439e973e3637adb7a4f..102aeecd3a92ec258fe8e02b51279719799c0cd6 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -24,6 +24,7 @@ library(ncdf4) library(formattable) ## to plot horizontal color bars - used ?? library(kableExtra) library(memuse) # To check mem usage. +library(CSIndicators) # to use module Indicators # Functions ## To be removed when new package is done by library(CSOperational)