#'Compute the Standardization of Precipitation-Evapotranspiration Index #' #'The Standardization of the data is the last step of computing the SPEI #'(Standarized Precipitation-Evapotranspiration Index). With this function the #'data is fit to a probability distribution to transform the original values to #'standardized units that are comparable in space and time and at different SPEI #'time scales. #' #'Next, some specifications for the calculation of this indicator will be #'discussed. To choose the time scale in which you want to accumulate the SPEI #'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to #'be performed in the previous step. However, since the accumulation is done for #'the elapsed time steps, there will be no complete accumulations until reaching #'the time instant equal to the value of the parameter. For this reason, in the #'result, we will find that for the dimension where the accumulation has been #'carried out, the values of the array will be NA since they do not include #'complete accumulations. If there are NAs in the data and they are not removed with the #'parameter 'na.rm', the standardization cannot be carried out for those #'coordinates and therefore, the result will be filled with NA for the #'specific coordinates. When NAs are not removed, if the length of the data for #'a computational step is smaller than 4, there will not be enough data for #'standarize and the result will be also filled with NAs for that coordinates. #'About the distribution used to fit the data, there are only two possibilities: #''log-logistic' and 'Gamma'. The 'Gamma' method only works when only #'precipitation is provided and other variables are 0 because it is positive #'defined (SPI indicator). For more information about SPEI, see functions #'PeriodPET and PeriodAccumulation. This function is build to work be compatible #'with other tools in that work with 's2dv_cube' object class. The input data #'must be this object class. If you don't work with 's2dv_cube', see #'PeriodStandardization. For more information on the SPEI indicator calculation, #'see CST_PeriodPET and CST_PeriodAccumulation. #' #'@param data An 's2dv_cube' that element 'data' stores a multidimensional #' array containing the data to be standardized. #'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional #' array containing the data in which the standardization should be applied #' using the fitting parameters from 'data'. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'time'. #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. #'@param accum An integer value indicating the number of #' time steps (leadtime_dim dimension) that have been accumulated in the #' previous step. When it is greater than 1, the result will be filled with #' NA until the accum leadtime_dim dimension number due to the #' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. #'@param handle_infinity A logical value wether to return infinite values (TRUE) #' or not (FALSE). When it is TRUE, the positive infinite values (negative #' infinite) are substituted by the maximum (minimum) values of each #' computation step, a subset of the array of dimensions time_dim, leadtime_dim #' and memb_dim. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, #' standardization cannot be carried out for those coordinates and therefore, #' the result will be filled with NA for the specific coordinates. If it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' #'@return An object of class \code{s2dv_cube} containing the standardized data. #'If 'data_cor' is provided the array stored in element data will be of the same #'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in #'element data will be of the same dimensions as 'data'. #' #'@examples #'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) #'data <- NULL #'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) #'class(data) <- 's2dv_cube' #'SPEI <- CST_PeriodStandardization(data = data, accum = 2) #'@export CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', accum = 1, ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of 's2dv_cube' class.") } if (!is.null(data_cor)) { if (!inherits(data_cor, 's2dv_cube')) { stop("Parameter 'data_cor' must be of 's2dv_cube' class.") } } std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, accum = accum, ref_period = ref_period, handle_infinity = handle_infinity, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores) if (is.null(data_cor)) { data$data <- std data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') return(data) } else { data_cor$data <- std data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) return(data_cor) } } #'Compute the Standardization of Precipitation-Evapotranspiration Index #' #'The Standardization of the data is the last step of computing the SPEI #'indicator. With this function the data is fit to a probability distribution to #'transform the original values to standardized units that are comparable in #'space and time and at different SPEI time scales. #' #'Next, some specifications for the calculation of this indicator will be #'discussed. To choose the time scale in which you want to accumulate the SPEI #'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to #'be performed in the previous step. However, since the accumulation is done for #'the elapsed time steps, there will be no complete accumulations until reaching #'the time instant equal to the value of the parameter. For this reason, in the #'result, we will find that for the dimension where the accumulation has been #'carried out, the values of the array will be NA since they do not include #'complete accumulations. If there are NAs in the data and they are not removed with the #'parameter 'na.rm', the standardization cannot be carried out for those #'coordinates and therefore, the result will be filled with NA for the #'specific coordinates. When NAs are not removed, if the length of the data for #'a computational step is smaller than 4, there will not be enough data for #'standarize and the result will be also filled with NAs for that coordinates. #'About the distribution used to fit the data, there are only two possibilities: #''log-logistic' and 'Gamma'. The 'Gamma' method only works when only #'precipitation is provided and other variables are 0 because it is positive #'defined (SPI indicator). For more information about SPEI, see functions #'PeriodPET and PeriodAccumulation. #' #'@param data A multidimensional array containing the data to be standardized. #'@param data_cor A multidimensional array containing the data in which the #' standardization should be applied using the fitting parameters from 'data'. #'@param dates An array containing the dates of the data with the same time #' dimensions as the data. It is optional and only necessary for using the #' parameter 'ref_period' to select a reference period directly from dates. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'time'. #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. #'@param accum An integer value indicating the number of #' time steps (leadtime_dim dimension) that have been accumulated in the #' previous step. When it is greater than 1, the result will be filled with #' NA until the accum leadtime_dim dimension number due to the #' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. #'@param handle_infinity A logical value wether to return infinite values (TRUE) #' or not (FALSE). When it is TRUE, the positive infinite values (negative #' infinite) are substituted by the maximum (minimum) values of each #' computation step, a subset of the array of dimensions time_dim, leadtime_dim #' and memb_dim. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, #' standardization cannot be carried out for those coordinates and therefore, #' the result will be filled with NA for the specific coordinates. If it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' #'@return A multidimensional array containing the standardized data. #'If 'data_cor' is provided the array will be of the same dimensions as #''data_cor'. If 'data_cor' is not provided, the array will be of the same #'dimensions as 'data'. #' #'@examples #'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) #'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) #'data <- array(rnorm(600, -194.5, 64.8), dim = dims) #'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) #' #'SPEI <- PeriodStandardization(data = data, accum = 2) #'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) #'@import multiApply #'@import ClimProjDiags #'@import TLMoments #'@import lmomco #'@import lmom #'@export PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', accum = 1, ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check inputs ## data if (!is.array(data)) { stop("Parameter 'data' must be a numeric array.") } if (is.null(names(dim(data)))) { stop("Parameter 'data' must have dimension names.") } ## data_cor if (!is.null(data_cor)) { if (!is.array(data_cor)) { stop("Parameter 'data_cor' must be a numeric array.") } if (is.null(names(dim(data_cor)))) { stop("Parameter 'data_cor' must have dimension names.") } } ## dates if (!is.null(dates)) { if (!(is.Date(dates)) & !(is.POSIXct(dates))) { stop("Parameter 'dates' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", "dimension.") } if (dim(data)[c(time_dim)] != dim(dates)[c(time_dim)]) { stop("Parameter 'dates' needs to have the same length of 'time_dim' ", "as 'data'.") } } ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") } if (!time_dim %in% names(dim(data))) { stop("Parameter 'time_dim' is not found in 'data' dimension.") } if (!is.null(data_cor)) { if (!time_dim %in% names(dim(data_cor))) { stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") } } ## leadtime_dim if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { stop("Parameter 'leadtime_dim' must be a character string.") } if (!leadtime_dim %in% names(dim(data))) { stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") } if (!is.null(data_cor)) { if (!leadtime_dim %in% names(dim(data_cor))) { stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") } } ## memb_dim if (!is.character(memb_dim) | length(memb_dim) != 1) { stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(data))) { stop("Parameter 'memb_dim' is not found in 'data' dimension.") } if (!is.null(data_cor)) { if (!memb_dim %in% names(dim(data_cor))) { stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") } } ## data_cor (2) if (!is.null(data_cor)) { if (dim(data)[leadtime_dim] != dim(data_cor)[leadtime_dim]) { stop("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", "of different length.") } } ## accum if (accum > dim(data)[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(data)[leadtime_dim], " months.")) } ## ref_period if (!is.null(ref_period)) { if (is.null(dates)) { warning("Parameter 'dates' is not provided so 'ref_period' can't be ", "used.") ref_period <- NULL } else if (length(ref_period) != 2) { warning("Parameter 'ref_period' must be of length two indicating the ", "first and end years of the reference period. It will not ", "be used.") ref_period <- NULL } else if (!all(sapply(ref_period, is.numeric))) { warning("Parameter 'ref_period' must be a numeric vector indicating the ", "'start' and 'end' years of the reference period. It will not ", "be used.") ref_period <- NULL } else if (ref_period[[1]] > ref_period[[2]]) { warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", "will not be used.") ref_period <- NULL } else if (!all(unlist(ref_period) %in% year(dates))) { warning("Parameter 'ref_period' contain years outside the dates. ", "It will not be used.") ref_period <- NULL } else { years <- year(ClimProjDiags::Subset(dates, along = leadtime_dim, indices = 1)) ref_period[[1]] <- which(ref_period[[1]] == years) ref_period[[2]] <- which(ref_period[[2]] == years) } } ## handle_infinity if (!is.logical(handle_infinity)) { stop("Parameter 'handle_infinity' must be a logical value.") } ## method if (!(method %in% c('parametric', 'non-parametric'))) { stop("Parameter 'method' must be a character string containing one of ", "the following methods: 'parametric' or 'non-parametric'.") } ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { stop("Parameter 'distribution' must be a character string containing one ", "of the following distributions: 'log-Logistic', 'Gamma' or ", "'PearsonIII'.") } ## na.rm if (!is.logical(na.rm)) { stop("Parameter 'na.rm' must be logical.") } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } target_dims <- c(leadtime_dim, time_dim, memb_dim) if (is.null(ref_period)) { ref_start <- NULL ref_end <- NULL } else { ref_start <- ref_period[[1]] ref_end <- ref_period[[2]] } # Standardization if (is.null(data_cor)) { spei <- Apply(data = list(data), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } else { spei <- Apply(data = list(data, data_cor), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } # add NA if (!is.null(accum)) { spei <- Apply(data = list(spei), target_dims = leadtime_dim, output_dims = leadtime_dim, fun = function(x, accum, leadtime_dim) { res <- c(rep(NA, accum-1), x) return(res) }, accum = accum, leadtime_dim = leadtime_dim, ncores = ncores)$output1 } if (is.null(data_cor)) { pos <- match(names(dim(data)), names(dim(spei))) spei <- aperm(spei, pos) } else { pos <- match(names(dim(data_cor)), names(dim(spei))) spei <- aperm(spei, pos) } return(spei) } .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] dims <- dim(data)[-1] fit = 'ub-pwm' coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) if (is.null(data_cor)) { # cross_val = TRUE spei_mod <- data*NA for (ff in 1:dim(data)[leadtime_dim]) { data2 <- data[ff, , ] dim(data2) <- dims if (method == 'non-parametric') { bp <- matrix(0, length(data2), 1) for (i in 1:length(data2)) { bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data } std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) dim(std_index) <- dims spei_mod[ff, , ] <- std_index } else { if (!is.null(ref_start) && !is.null(ref_end)) { data_fit <- window(data2, ref_start, ref_end) } else { data_fit <- data2 } for (nsd in 1:dim(data)[time_dim]) { acu <- as.vector(data_fit[-nsd, ]) if (na.rm) { acu_sorted <- sort.default(acu, method = "quick") } else { acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] } if (length(acu_sorted) >= 4) { f_params <- .std(data = acu_sorted, fit = fit, distribution = distribution) } else { f_params <- NA } if (all(is.na(f_params))) { cdf_res <- NA } else { f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data2, f_params), "Gamma" = lmom::cdfgam(data2, f_params), "PearsonIII" = lmom::cdfpe3(data2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] } } } } } } else { # cross_val = FALSE spei_mod <- data_cor*NA dimscor <- dim(data_cor)[-1] for (ff in 1:dim(data)[leadtime_dim]) { data_cor2 <- data_cor[ff, , ] dim(data_cor2) <- dimscor if (method == 'non-parametric') { bp <- matrix(0, length(data_cor2), 1) for (i in 1:length(data_cor2)) { bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data } std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) dim(std_index) <- dimscor spei_mod[ff, , ] <- std_index } else { data2 <- data[ff, , ] dim(data2) <- dims if (!is.null(ref_start) && !is.null(ref_end)) { data_fit <- window(data2, ref_start, ref_end) } else { data_fit <- data2 } acu <- as.vector(data_fit) if (na.rm) { acu_sorted <- sort.default(acu, method = "quick") } else { acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] } if (length(acu_sorted) >= 4) { f_params <- .std(data = acu_sorted, fit = fit, distribution = distribution) } if (all(is.na(f_params))) { cdf_res <- NA } else { f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data_cor2, f_params), "Gamma" = lmom::cdfgam(data_cor2, f_params), "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = dimscor) spei_mod[ff, , ] <- std_index_cv } } } } } if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) } return(spei_mod) } .std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { pwm = switch(fit, 'pp-pwm' = pwm.pp(data, -0.35, 0, nmom = 3), pwm.ub(data, nmom = 3) # TLMoments::PWM(data, order = 0:2) ) lmom <- pwm2lmom(pwm) if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) params = switch(distribution, 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), error = function(e){parglo(lmom)$para}), 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), error = function(e){pargam(lmom)$para}), 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){parpe3(lmom)$para})) if (distribution == 'log-Logistic' && fit == 'max-lik') { params = parglo.maxlik(data, params)$para } return(params) } else { return(NA) } }