#'Compute the Standardization of Precipitation-Evapotranspiration Index #' #'@param data An 's2dv_cube' that element 'data' stores a multidimensional #' array containing the data to be standardized. #'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional #' array containing the data in which the standardization should be applied #' using the fitting parameters from 'data'. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'time'. #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. #'@param accum An integer value indicating the number of months for the #' accumulation for each variable. When it is greater than 1, the result will #' be filled with NA until the accum time_dim dimension number due to the #' accumulation to previous months. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. #'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, #' (if standardization is TRUE) all values of other dimensions except time_dim #' and leadtime_dim will be set to NA directly. On the other hand, if it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' #'@return An object of class \code{s2dv_cube} containing the standardized data. #'If 'data_cor' is provided the standardizaton is applied to it using 'data' #'to adjust it. #' #'@examples #'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) #'data <- NULL #'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) #'class(data) <- 's2dv_cube' #'SPEI <- CST_PeriodStandardization(data = data, accum = 2) #'@export CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', accum = NULL, ref_period = NULL, param_error = -9999, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(data)) { stop("Parameter 'exp' cannot be NULL.") } if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of 's2dv_cube' class.") } if (!is.null(data_cor)) { if (!inherits(data_cor, 's2dv_cube')) { stop("Parameter 'data_cor' must be of 's2dv_cube' class.") } } std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, accum = accum, ref_period = ref_period, param_error = param_error, handle_infinity = handle_infinity, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores) if (is.null(data_cor)) { data$data <- std data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') return(data) } else { data_cor$data <- std data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) return(data_cor) } } #'Compute the Standardization of Precipitation-Evapotranspiration Index #' #'@param data A multidimensional array containing the data to be standardized. #'@param data_cor A multidimensional array containing the data in which the #' standardization should be applied using the fitting parameters from 'data'. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'time'. #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. #'@param accum An integer value indicating the number of months for the #' accumulation for each variable. When it is greater than 1, the result will #' be filled with NA until the accum time_dim dimension number due to the #' accumulation to previous months. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. #'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, #' (if standardization is TRUE) all values of other dimensions except time_dim #' and leadtime_dim will be set to NA directly. On the other hand, if it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' #'@return A multidimensional array containing the standardized data. #'If 'data_cor' is provided the standardizaton is applied to it using 'data' #'to adjust it. #' #'@examples #'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) #'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) #'data <- array(rnorm(600, -194.5, 64.8), dim = dims) #'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) #' #'SPEI <- PeriodStandardization(data = data, accum = 2) #'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) #'@import multiApply #'@import ClimProjDiags #'@import TLMoments #'@import lmomco #'@import lmom #'@export PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', accum = NULL, ref_period = NULL, param_error = -9999, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check inputs ## data if (!is.array(data)) { stop("Parameter 'data' must be a numeric array.") } if (is.null(names(dim(data)))) { stop("Parameter 'data' must have dimension names.") } ## data_cor if (!is.null(data_cor)) { if (!is.array(data_cor)) { stop("Parameter 'data_cor' must be a numeric array.") } if (is.null(names(dim(data_cor)))) { stop("Parameter 'data_cor' must have dimension names.") } } ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") } if (!time_dim %in% names(dim(data))) { stop("Parameter 'time_dim' is not found in 'data' dimension.") } if (!is.null(data_cor)) { if (!time_dim %in% names(dim(data_cor))) { stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") } } ## leadtime_dim if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { stop("Parameter 'leadtime_dim' must be a character string.") } if (!leadtime_dim %in% names(dim(data))) { stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") } if (!is.null(data_cor)) { if (!leadtime_dim %in% names(dim(data_cor))) { stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") } } ## memb_dim if (!is.character(memb_dim) | length(memb_dim) != 1) { stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(data))) { stop("Parameter 'memb_dim' is not found in 'data' dimension.") } if (!is.null(data_cor)) { if (!memb_dim %in% names(dim(data_cor))) { stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") } } ## accum if (accum > dim(data)[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(data)[leadtime_dim], " months.")) } ## ref_period if (!is.null(ref_period)) { if (length(ref_period) != 2) { warning("Parameter 'ref_period' must be of length two indicating the ", "first and end years of the reference period. It will not ", "be used.") ref_period <- NULL } else if (!all(sapply(ref_period, is.numeric))) { warning("Parameter 'ref_period' must be a numeric vector indicating the ", "'start' and 'end' years of the reference period. It will not ", "be used.") ref_period <- NULL } else if (ref_period[[1]] > ref_period[[2]]) { warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", "will not be used.") ref_period <- NULL } else if (!all(unlist(ref_period) %in% year(dates_exp))) { warning("Parameter 'ref_period' contain years outside the dates. ", "It will not be used.") ref_period <- NULL } else { years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, indices = 1)) ref_period[[1]] <- which(ref_period[[1]] == years) ref_period[[2]] <- which(ref_period[[2]] == years) } } ## handle_infinity if (!is.logical(handle_infinity)) { stop("Parameter 'handle_infinity' must be a logical value.") } ## method if (!(method %in% c('parametric', 'non-parametric'))) { stop("Parameter 'method' must be a character string containing one of ", "the following methods: 'parametric' or 'non-parametric'.") } ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { stop("Parameter 'distribution' must be a character string containing one ", "of the following distributions: 'log-Logistic', 'Gamma' or ", "'PearsonIII'.") } ## na.rm if (!is.logical(na.rm)) { stop("Parameter 'na.rm' must be logical.") } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } target_dims <- c(leadtime_dim, time_dim, memb_dim) if (is.null(ref_period)) { ref_start <- NULL ref_end <- NULL } else { ref_start <- ref_period[[1]] ref_end <- ref_period[[2]] } # Standardization if (is.null(data_cor)) { spei <- Apply(data = list(data), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } else { spei <- Apply(data = list(data, data_cor), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } # add NA if (!is.null(accum)) { spei <- Apply(data = list(spei), target_dims = leadtime_dim, output_dims = leadtime_dim, fun = function(x, accum, leadtime_dim) { res <- c(rep(NA, accum-1), x) return(res) }, accum = accum, leadtime_dim = leadtime_dim)$output1 } if (is.null(data_cor)) { pos <- match(names(dim(data)), names(dim(spei))) spei <- aperm(spei, pos) } else { pos <- match(names(dim(data_cor)), names(dim(spei))) spei <- aperm(spei, pos) } return(spei) } .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', ref_period = NULL, handle_infinity = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] dims <- dim(data)[-1] fit = 'ub-pwm' coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) if (is.null(data_cor)) { # cross_val = TRUE spei_mod <- data*NA for (ff in 1:dim(data)[leadtime_dim]) { data2 <- data[ff, , ] dim(data2) <- dims if (method == 'non-parametric') { bp <- matrix(0, length(data2), 1) for (i in 1:length(data2)) { bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data } std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) dim(std_index) <- dims spei_mod[ff, , ] <- std_index } else { if (!is.null(ref_start) && !is.null(ref_end)) { data_fit <- window(data2, ref_start, ref_end) } else { data_fit <- data2 } for (nsd in 1:dim(data)[time_dim]) { acu <- as.vector(data_fit[-nsd, ]) acu_sorted <- sort.default(acu, method = "quick") if (na.rm) { acu_sorted <- acu_sorted[!is.na(acu_sorted)] } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] } if (length(acu_sorted) >= 4) { f_params <- .std(acu_sorted, fit, distribution) } if (all(is.na(f_params))) { cdf_res <- NA } else { f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data2, f_params), "Gamma" = lmom::cdfgam(data2, f_params), "PearsonIII" = lmom::cdfpe3(data2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] } } } } } } else { # cross_val = FALSE spei_mod <- data_cor*NA dimscor <- dim(data_cor)[-1] for (ff in 1:dim(data)[leadtime_dim]) { data_cor2 <- data_cor[ff, , ] dim(data_cor2) <- dimscor if (method == 'non-parametric') { bp <- matrix(0, length(data_cor2), 1) for (i in 1:length(data_cor2)) { bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data } std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) dim(std_index) <- dimscor spei_mod[ff, , ] <- std_index } else { data2 <- data[ff, , ] dim(data2) <- dims if (!is.null(ref_start) && !is.null(ref_end)) { data_fit <- window(data2, ref_start, ref_end) } else { data_fit <- data2 } acu <- as.vector(data_fit) acu_sorted <- sort.default(acu, method = "quick") if (na.rm) { acu_sorted <- acu_sorted[!is.na(acu_sorted)] } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] } if (length(acu_sorted) >= 4) { f_params <- .std(data = acu_sorted, fit = fit, distribution = distribution) } if (all(is.na(f_params))) { cdf_res <- NA } else { f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data_cor2, f_params), "Gamma" = lmom::cdfgam(data_cor2, f_params), "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = dimscor) spei_mod[ff, , ] <- std_index_cv } } } } } if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) } return(spei_mod) } .std <- function(data, fit, distribution) { pwm = switch(fit, "pp-pwm" = pwm.pp(data, -0.35, 0, nmom = 3), pwm.ub(data, nmom = 3) # TLMoments::PWM(data, order = 0:2) ) lmom <- pwm2lmom(pwm) if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) params = switch(distribution, "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), error = function(e){parglo(lmom)$para}), "Gamma" = tryCatch(lmom::pelgam(fortran_vec), error = function(e){pargam(lmom)$para}), "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){parpe3(lmom)$para})) if (distribution == 'log-Logistic' && fit == 'max-lik') { params = parglo.maxlik(data, params)$para } return(params) } else { return(NA) } }