From 95e6464a82a28d40b00fa55c5bf8e2582a32a7e7 Mon Sep 17 00:00:00 2001 From: allabres Date: Mon, 17 Apr 2023 20:13:07 +0200 Subject: [PATCH 01/42] first version of SPEI function, but it has several known issues --- R/PeriodSPEI.R | 509 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 509 insertions(+) create mode 100644 R/PeriodSPEI.R diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R new file mode 100644 index 0000000..d56916b --- /dev/null +++ b/R/PeriodSPEI.R @@ -0,0 +1,509 @@ +PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) + time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = NULL, + standardization = TRUE, + param_error = -9999, + handle_infinity = FALSE, + cross_validation = FALSE, + method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', + n_procs = 4){ + + # check provided data and parameters: + if (!is.null(pet) & !is.null(pet_method)){ + print ('WARNING: pet data is provided and also a pet estimation method, the provided pet data will be used and NOT estimated') + pet_method <- NULL + } + if (is.null(pet_method) & is.null(pet)){ + stop ('variable pet needs to be provided or a pet_method selected') + } + if ((pet_method == 'hargreaves' | pet_method == 'hargreaves_modified') & (is.null(tasmax) | is.null(tasmin))){ + stop (paste0('Need to provide tasmax and tasmin for the selected method ', pet_method)) + } + if (pet_method == 'thornthwaite' & is.null(tas)){ + stop (paste0('Need to provide tas for the selected method ', pet_method)) + } + print('WARNING: temperature needs to be in C and precipitation in mm/month') # there is no check + + # check if accumulation period is possible + if(accum > dim(prlr$data)[leadtime_dim][[1]]){ + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) + } + + # complete dates + dates <- prlr$attrs$Dates + dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly <- array(0, dim=length(dates_complete_monthly)) + for (dd in 1:length(dates)){ + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + dates_monthly[ii] <- 1 + } + + # Evapotranspiration estimation (unless pet is already provided) + if (is.null(pet)){ + if (pet_method == 'hargreaves'){ + data <- list(tasmax = tasmax$data, tasmin = tasmin$data, lat = prlr$coords$latitude) + } else if (pet_method == 'hargreaves_modified'){ + data <- list(tasmax = tasmax$data, tasmin = tasmin$data, prlr = prlr$data, lat = prlr$coords$latitude) + } else if (pet_method == 'thornthwaite'){ + data <- list(tas = tas$data, lat = prlr$coords$latitude) + } + pet <- evapotranspiration(data, dates_monthly, pet_method, time_dim, leadtime_dim, memb_dim, lon_dim, lat_dim, n_procs) + } + + # Accumulation + diff_P_PET <- prlr$data - pet + data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, leadtime_dim, memb_dim, n_procs) + + # Standardization: + if (standardization == TRUE){ + spei_dat <- spei_standardization(data_accum = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + n_procs = n_procs, + accum = accum, + param_error = param_error, + method = method, distribution = distribution, fit = fit) + } else { + spei_dat <- data_accum + } + + return(spei_dat) +} + + + +## functions + +evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', + lon_dim = 'longitude', lat_dim = 'latitude', + n_procs = 4){ + + lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + + # extract mask of NA locations to return to NA the final result + mask_NA <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves'){ + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + } + if (pet_method == 'hargreaves_modified'){ + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + mask_NA[which(is.na(data$prlr))] <- 0 + } + if (pet_method == 'thornthwaite'){ + mask_NA[which(is.na(tas$data))] <- 0 + } + + # replace NA with 0 + for (dd in 1:length(data)){ + data[[dd]][which(is.na(data[[dd]]))] <- 0 + } + + # prepare data + if (pet_method == 'hargreaves'){ + data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin) + targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) + } else if (pet_method == 'hargreaves_modified'){ + data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) + targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) + } else if (pet_method == 'thornthwaite'){ + data_input <- list(lat_mask = lat_mask, tas = data$tas) + targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) + } else { + stop (paste0('Unknown pet_method ', pet_method)) + } + + PET_estimated <- Apply(data = data_input, + target_dims = targetdim_input, + output_dims = c(leadtime_dim, time_dim), + pet_method = pet_method, + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dim_month = leadtime_dim, dim_year = time_dim, + fun = atomic_pet, ncores = n_procs) + + # reorder dims in PET_estimated + dims_order <- array(NA, length(dim(data[[1]]))) + for (ord in 1:length(dim(data[[1]]))){ + dims_order[ord] <- which(names(dim(PET_estimated$output1)) == names(dim(data[[1]]))[ord]) + } + data_pet <- aperm(PET_estimated$output1, dims_order) + + # restore original NAs from mask_NA + data_pet[which(mask_NA == 0 )] <- NA + + return(data_pet) + +} + +accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', n_procs = 4){ + + if(!time_dim %in% names(dim(diff_P_PET))){ + diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) + } + if(!leadtime_dim %in% names(dim(diff_P_PET))){ + diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) + } + + accum_result <- Apply(data = list(diff_P_PET), + target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + accum = accum, + output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), + dim_month = leadtime_dim, dim_year = time_dim, + fun = atomic_accum, ncores = n_procs) + + # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): + for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)){ + if(!d %in% names(dim(accum_result$output1))){ + accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) + } + } + + # reorder dims in accum_result + dims_order <- array(NA, length(dim(diff_P_PET))) + for (ord in 1:length(dim(diff_P_PET))){ + dims_order[ord] <- which(names(dim(accum_result$output1)) == names(dim(diff_P_PET))[ord]) + } + data_accum <- aperm(accum_result$output1, dims_order) + + return(data_accum) + +} + +spei_standardization <- function(data_accum, + leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, + n_procs, accum, param_error, + method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ + data_spei <- data_accum + + data_spei <- Apply(data = list(data_accum), + target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), + output_dims = c(leadtime_dim,time_dim, memb_dim), + dim_month = leadtime_dim, + dim_year = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = n_procs)$output1 + + return(data_spei) + +} + +### Atomic functions + +atomic_pet <- function(pet_method, dates_monthly, dim_month, dim_year, lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL){ + + ftime = as.numeric(dim(data2)[leadtime_dim]) + n_sdates = as.numeric(dim(data2)[time_dim]) + + # create a vector from data but adding 0 to achive complete time series of the considered period + #(starting in January of the first year) so that the solar radiation estimation is computed + # in each case for the correct month + if (!is.null(data2)){ + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data2[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data3)){ + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data3[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data4)){ + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data4[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (pet_method == 'hargreaves'){ + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = FALSE) + x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data + PET <- array(x_PET, dim = c(ftime,n_sdates)) + } + + if (pet_method == 'hargreaves_modified'){ # not sure this works properly + PET <- array(NA, dim = c(ftime,n_sdates)) + for(ns in 1:n_sdates){ + tmax_mod_temp <- data2[12*(ns-1)+(1:12)] #data2[,ns] + tmin_mod_temp <- data3[12*(ns-1)+(1:12)] #data3[,ns] + pre_mod_temp <- data4[12*(ns-1)+(1:12)] #data4[,ns] + + # Computation of PET + x_PET <- hargreaves(Tmin = tmin_mod_temp, Tmax = tmax_mod_temp, lat = lat_mask, Pre = pre_mod_temp, na.rm = FALSE) + PET[1:ftime,ns] <- x_PET #dim <- (lon lat ft sd memb) + } + } + + if (pet_method == 'thornthwaite'){ + x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) + x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data + PET <- array(x_PET, dim = c(ftime,n_sdates)) + } + + return(PET) + +} + +atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ + # data:[time, syear] + + ftime = as.numeric(dim(data)[dim_month]) + n_sdates = as.numeric(dim(data)[dim_year]) + + data_vector <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data_vector[dd] <- as.vector(data)[count] + count <- count + 1 + } + } + + # Accumulation at different timescales + data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data + accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form + if (accum > 1){ + accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data + } + + return(accum_result) + +} + +atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, + method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ + # data: [time, sdate, memb] + if (is.null(ref_period)){ + ref.start <- NULL + ref.end <- NULL + } else { + ref.start <- ref_period[[1]] + ref.end <- ref_period[[2]] + } + + if (all(is.na(data))) { + speiX <- array(NA, dim(data)) + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + speiX <- array(param_error, dim(data)) + } else { + speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, + method = method, distribution = distribution, fit = fit, + dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) + } + return(speiX) + +} + +### Support functions + +spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ + + # acc_data: [ftime, syear, nmemb] + + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + } + + ftime <- as.numeric(dim(acc_data)[dim_month]) + n_sdates <- as.numeric(dim(acc_data)[dim_year]) + nmemb <- as.numeric(dim(acc_data)[dim_memb]) + + spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) + names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + + for(ff in 1:ftime){ # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) + x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + spei_mod[ff,,] <- x_spei_mod + } + + return(spei_mod) +} + +# HandleInfinity: +spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, + ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { + + # data:[ftime = 1, syear, ensemble] + + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('SPEI can be only computed using the following approach: parametric or non-parametric') + } + + if (method == 'non-parametric'){ + + if (anyNA(data) && na.rm == FALSE) { + stop('Error: Data must not contain NAs') + } + + bp = matrix(0, length(data), 1) + for (i in 1:length(data)) { + bp[i,1] = sum(data[] <= data[i]); # Writes the rank of the data + } + + SPEI = qnorm((bp-0.44)/(length(data)+0.12)) + + return(SPEI) + + } else { + + std_index <- array(NA, c(n_sdates, nmemb)) + + if (anyNA(data) && na.rm==FALSE) { + stop('Error: Data must not contain NAs') + } + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + } + if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + } + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma'))) + ) + + dim_one <- length(coef) + + if (!is.null(params)) { + if (length(params)!=dim_one) { + stop(paste0('parameters array should have dimensions [', dim_one, ']')) + } + } + + # Select window if necessary + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data,ref.start,ref.end) + } else { + data.fit <- data + } + + if(cross_validation == "TRUE") { + loop_years <- n_sdates + } else { + loop_years <- 1 + } + + for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) + # Cumulative series (acu) + if (cross_validation == TRUE){ + acu <- as.vector(data.fit[,-nsd,]) + } else { + acu <- as.vector(data.fit) + } + + acu.sorted <- sort.default(acu, method = "quick") + acu.sorted <- acu.sorted[!is.na(acu.sorted)] # remove NAs (no need if(na.rm) because if there are NA and na.rm=F we don't get to this point) + if (length(acu.sorted)!=0){ # else all acu was NA and we don't need to continue with this case + acu_sd = sd(acu.sorted) + if (!is.na(acu_sd)){ + if (acu_sd != 0){ + if(distribution != "log-Logistic"){ + pze <- sum(acu==0)/length(acu) + acu.sorted = acu.sorted[acu.sorted > 0] + } + if (!is.null(params)) { + f_params = as.vector(params) + } else { + if (length(acu.sorted) >= 4){ # else coef will be NA + # Calculate probability weighted moments based on fit with lmomco or TLMoments + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), + pwm.ub(acu.sorted, nmom=3) + #TLMoments::PWM(acu.sorted, order=0:2) + ) + + # Check L-moments validity + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))){ + + # lmom fortran functions need specific inputs L1, L2, T3 + # this is handled by lmomco internally with lmorph + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + + # Calculate parameters based on distribution with lmom then lmomco + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), error = function(e){ parglo(lmom)$para }), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), error = function(e){ pargam(lmom)$para }), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){ parpe3(lmom)$para }) + ) + + # Adjust if user chose log-Logistic and max-lik + if(distribution == 'log-Logistic' && fit == 'max-lik'){ + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + } # end if dor the case the L-moments are not valid (std_index will be NA) + } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) + } # end estimation of f_param + # Calculate cdf based on distribution with lmom + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data, f_params), + "Gamma" = lmom::cdfgam(data, f_params), + "PearsonIII" = lmom::cdfpe3(data, f_params) + ) + + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) + + # Adjust if user chose Gamma or PearsonIII - Not tested: For future development + #if(distribution != 'log-Logistic'){ + # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point + #} + if (cross_validation == TRUE){ + std_index[nsd,] <- std_index_cv[nsd,] + } else { + std_index <- std_index_cv + } + } + } # end if for the case there is no variability + } # end if for the case all NA in acu + } # next year (in case of cross_validation or all done if cross_validation == F) + + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point + std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) + std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) + } + + } # end parametric + + return(std_index) # fitted is the only thing used after calling this function + +} + -- GitLab From bc82389d1aa70ce237850ddfeba1775fcf6af2ab Mon Sep 17 00:00:00 2001 From: allabres Date: Tue, 9 May 2023 15:37:03 +0200 Subject: [PATCH 02/42] SPEI working version --- R/PeriodSPEI.R | 246 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 194 insertions(+), 52 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index d56916b..43fdb56 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,12 +1,97 @@ -PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) + +## libraries + library(s2dv) # InsertDim, ... + library(multiApply) # Apply + library(ClimProjDiags) # Subset + library(zeallot) # multiple assignment %<-% + library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) + library(zoo) # rollapply, as.Date + library(TLMoments) # pwd (Calculate probability weighted moments based on fit) + library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) + library(lubridate) # year(), month(),... + +CST_PeriodSPEI <- function(exp, exp_cor = NULL, # lists of s2dv_cubes time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = NULL, + pet_method_exp = 'hargreaves', + pet_method_cor = 'hargreaves', standardization = TRUE, param_error = -9999, handle_infinity = FALSE, - cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', + cross_validation = FALSE, # forced to FALSE if exp_cor is NOT NULL + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + n_procs = 4){ + + # obtain variables from exp (which is a list of s2dv_cubes); some of them can be NULL (if they are NULL or not on the list) + tas_exp <- exp$tas + tasmax_exp <- exp$tasmax + tasmin_exp <- exp$tasmin + pet_exp <- exp$pet + prlr_exp <- exp$prlr + + if (!is.null(exp_cor)){ + cross_validation_call <- FALSE # forced to FALSE otherwise we cannot get the params needed to standardize exp_cor + } else { + cross_validation_call <- cross_validation + } + + # call PeriodSPEI for exp (and also obtain params of the standardization, in case standardization == TRUE; + # if it's FALSE, it's not a problem because we won't need to standardize exp_cor either) + spei_exp <- PeriodSPEI(tas = tas_exp, tasmax = tasmax_exp, tasmin = tasmin_exp, pet = pet_exp, prlr = prlr_exp, + # line above need to be s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates); prlr is always needed + time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lon_dim = lon_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method_exp, + standardization = standardization, + params = NULL, # not used if standardization is FALSE + param_error = param_error, # not used if standardization is FALSE + handle_infinity = handle_infinity, # not used if standardization is FALSE + cross_validation = cross_validation_call, # not used if standardization is FALSE + method = method, # not used if standardization is FALSE + distribution = distribution, # not used if standardization is FALSE or method is non-parametric + fit = fit, # not used if standardization is FALSE or method is non-parametric + n_procs = n_procs) + + # obtain variables from exp_cor (in case it is not NULL) + if (!is.null(exp_cor)){ + tas_cor <- exp_cor$tas + tasmax_cor <- exp_cor$tasmax + tasmin_cor <- exp_cor$tasmin + pet_cor <- exp_cor$pet + prlr_cor <- exp_cor$prlr + + # obtain spei of exp_cor with parameters of exp for the standardization + spei_cor <- PeriodSPEI(tas = tas_cor, tasmax = tasmax_cor, tasmin = tasmin_cor, pet = pet_cor, prlr = prlr_cor, + time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lon_dim = lon_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method_cor, + standardization = standardization, + params = spei_exp$params, # not used if standardization is FALSE + param_error = param_error, # not used if standardization is FALSE + handle_infinity = handle_infinity, # not used if standardization is FALSE + cross_validation = cross_validation_call, # not used if standardization is FALSE + method = method, # not used if standardization is FALSE + distribution = distribution, # not used if standardization is FALSE or method is non-parametric + fit = fit, # not used if standardization is FALSE or method is non-parametric + n_procs = n_procs) + + return(list(spei_exp = spei_exp, spei_cor = spei_cor)) + } else { + return(spei_exp) + } + +} + +PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, + time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = NULL, + standardization = TRUE, + params = NULL, # not used if standardization is FALSE + param_error = -9999, # not used if standardization is FALSE + handle_infinity = FALSE, # not used if standardization is FALSE + cross_validation = FALSE, # not used if standardization is FALSE + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', # not used if standardization is FALSE n_procs = 4){ # check provided data and parameters: @@ -27,12 +112,14 @@ PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prl # check if accumulation period is possible if(accum > dim(prlr$data)[leadtime_dim][[1]]){ - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', + dim(prlr$data)[leadtime_dim][[1]], ' months.')) } # complete dates dates <- prlr$attrs$Dates - dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) + dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), + 12, 31, sep='-'))) dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] dates_monthly <- array(0, dim=length(dates_complete_monthly)) @@ -68,7 +155,7 @@ PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prl n_procs = n_procs, accum = accum, param_error = param_error, - method = method, distribution = distribution, fit = fit) + params = params, method = method, distribution = distribution, fit = fit) } else { spei_dat <- data_accum } @@ -85,7 +172,8 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', lon_dim = 'longitude', lat_dim = 'latitude', n_procs = 4){ - lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'), pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + if(names(dim(lat_mask)[2]) == ""){names(dim(lat_mask))[2] <- 'latitude'} # extract mask of NA locations to return to NA the final result mask_NA <- array(1, dim = dim(data[[1]])) @@ -113,7 +201,8 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) } else if (pet_method == 'hargreaves_modified'){ data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) + targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), + tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) } else if (pet_method == 'thornthwaite'){ data_input <- list(lat_mask = lat_mask, tas = data$tas) targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) @@ -125,7 +214,7 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', target_dims = targetdim_input, output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to complete array of the full years dim_month = leadtime_dim, dim_year = time_dim, fun = atomic_pet, ncores = n_procs) @@ -154,7 +243,7 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to complete array of the full years accum = accum, output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), dim_month = leadtime_dim, dim_year = time_dim, @@ -181,24 +270,38 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear spei_standardization <- function(data_accum, leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, n_procs, accum, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - data_spei <- data_accum - - data_spei <- Apply(data = list(data_accum), - target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), - output_dims = c(leadtime_dim,time_dim, memb_dim), - dim_month = leadtime_dim, - dim_year = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = n_procs)$output1 + params = NULL, method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - return(data_spei) + n_leadtimes <- dim(data_accum)[leadtime_dim] + n_sdates_params <- dim(data_accum)[time_dim] + if (!cross_validation){ + n_sdates_params <- 1 + } + + if (is.null(params)){ + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data_accum, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), + dim_month = leadtime_dim, + dim_year = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = n_procs) + + return(spei) # spei is a list of data_spei and params } @@ -294,7 +397,9 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ # Accumulation at different timescales data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form if (accum > 1){ @@ -305,8 +410,8 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ } -atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ +atomic_spei <- function(data, params, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, + param_error, method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ # data: [time, sdate, memb] if (is.null(ref_period)){ ref.start <- NULL @@ -318,23 +423,30 @@ atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_ if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { - speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, - method = method, distribution = distribution, fit = fit, - dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, + method = method, distribution = distribution, fit = fit, + dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) } - return(speiX) + + return(list(speiX, params)) } ### Support functions -spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ - +spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ + # acc_data: [ftime, syear, nmemb] + # params: [syear, ftime, coef] + + n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions if (!(method %in% c('parametric', 'non-parametric'))) { stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') @@ -346,14 +458,31 @@ spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + if (cross_validation){ + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } - for(ff in 1:ftime){ # treat months independently + for(ff in 1:ftime){ # treat months independently Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) - x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = if(all(is.na(params))){NULL}else{params[,ff,]}, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, cross_validation = cross_validation) spei_mod[ff,,] <- x_spei_mod + coef_names <- names(params_ff) + if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[,ff,] <- params_ff } + + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(dim_year, dim_month, 'coef') - return(spei_mod) + return(list(spei = spei_mod, params = params_result)) } # HandleInfinity: @@ -379,7 +508,7 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm SPEI = qnorm((bp-0.44)/(length(data)+0.12)) - return(SPEI) + return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { @@ -421,6 +550,9 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm } else { loop_years <- 1 } + + params_result <- array(NA, dim = c(loop_years, dim_one)) + colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) @@ -442,6 +574,7 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm } if (!is.null(params)) { f_params = as.vector(params) + params_result[nsd,] <- f_params } else { if (length(acu.sorted) >= 4){ # else coef will be NA # Calculate probability weighted moments based on fit with lmomco or TLMoments @@ -470,16 +603,23 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm if(distribution == 'log-Logistic' && fit == 'max-lik'){ f_params = parglo.maxlik(acu.sorted, f_params)$para } + params_result[nsd,] <- f_params } # end if dor the case the L-moments are not valid (std_index will be NA) } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom - cdf_res = switch(distribution, + if (all(is.na(params_result[nsd,]))){ + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data, f_params), "Gamma" = lmom::cdfgam(data, f_params), "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - + ) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development @@ -490,20 +630,22 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm std_index[nsd,] <- std_index_cv[nsd,] } else { std_index <- std_index_cv - } + } } } # end if for the case there is no variability } # end if for the case all NA in acu } # next year (in case of cross_validation or all done if cross_validation == F) - + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - } # end parametric - return(std_index) # fitted is the only thing used after calling this function + return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + + } # end parametric } - -- GitLab From f4f75caec0005b4e9d5969b791416c39c03182ac Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 25 May 2023 17:20:51 +0200 Subject: [PATCH 03/42] Develop PeriodSPEI within the package csindicators from Alba's code --- R/PeriodSPEI.R | 740 +++++++++++++++++++++++++++++++------------------ 1 file changed, 475 insertions(+), 265 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index d56916b..1eef63d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,167 +1,334 @@ -PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) - time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = NULL, - standardization = TRUE, - param_error = -9999, - handle_infinity = FALSE, - cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', - n_procs = 4){ - - # check provided data and parameters: - if (!is.null(pet) & !is.null(pet_method)){ - print ('WARNING: pet data is provided and also a pet estimation method, the provided pet data will be used and NOT estimated') - pet_method <- NULL - } - if (is.null(pet_method) & is.null(pet)){ - stop ('variable pet needs to be provided or a pet_method selected') - } - if ((pet_method == 'hargreaves' | pet_method == 'hargreaves_modified') & (is.null(tasmax) | is.null(tasmin))){ - stop (paste0('Need to provide tasmax and tasmin for the selected method ', pet_method)) - } - if (pet_method == 'thornthwaite' & is.null(tas)){ - stop (paste0('Need to provide tas for the selected method ', pet_method)) - } - print('WARNING: temperature needs to be in C and precipitation in mm/month') # there is no check - # check if accumulation period is possible - if(accum > dim(prlr$data)[leadtime_dim][[1]]){ - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) - } +#------------------------------------------------------------------------------- +# To DO: Add documentation of same dimensions exp and exp_cor +# To DO: Add documentation for units +# TO DO: Write Documentation + +# library(s2dv) # InsertDim, ... +# library(multiApply) # Apply +# library(ClimProjDiags) # Subset +# library(zeallot) # multiple assignment %<-% +# library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) +# library(zoo) # rollapply, as.Date +# library(TLMoments) # pwd (Calculate probability weighted moments based on fit) +# library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) +# library(lubridate) # year(), month(),... + +# library(CSTools) +#------------------------------------------------------------------------------- +#'@param exp A list with the 's2dv_cube' object class containing the seasonal +#' forecast experiment in the data element. +#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile +#' PeriodSPEI should be applied. If it is not specified, the PeriodSPEI +#' is calculated from object 'exp'. +#'@param pet Multidimensional array containing the Potential EvapoTranspiration +#' data. If it is NULL it is calculated using pet_method. It is NULL by default. +#'@param time_dim +#'@param leadtime_dim +#'@param memb_dim +#'@param lat_dim +#--- other params from CSIndicators +#'@param accum +#'@param start +#'@param end +#' +# --- other params +#'@param pet_method +#'@param standardization +#'@param params +#'@param param_error +#'@param handle_infinity +#'@param cross_validation +#'@param method +#'@param distribution +#'@param fit +#'@param ncores +CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, # not used if standardization is FALSE + param_error = -9999, # not used if standardization is FALSE + handle_infinity = FALSE, # not used if standardization is FALSE + cross_validation = FALSE, # not used if standardization is FALSE + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', # not used if standardization is FALSE + ncores = 4) { + + #----------------------------------------------------------------------------- + # Part (1): Initial structure checks + # coordinates + .KnownLatNames <- CSTools:::.KnownLatNames() + + if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { + stop("Spatial coordinate names of parameter 'obsL' do not match any ", + "of the names accepted by the package.") + } + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] + + #----------------------------------------------------------------------------- + # Part (2): Call to PeriodSPEI + + res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), + dates = exp[[1]]$attrs$Dates, + lat = exp[[1]]$coords[[lat_name]], + exp_cor = lapply(exp_cor, function(x) x$data), + pet = pet, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method, standardization = standardization, + params = params, param_error = param_error, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, + fit = fit, ncores = ncores) + return(res) - # complete dates - dates <- prlr$attrs$Dates - dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim=length(dates_complete_monthly)) - for (dd in 1:length(dates)){ - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) - dates_monthly[ii] <- 1 - } +} - # Evapotranspiration estimation (unless pet is already provided) - if (is.null(pet)){ - if (pet_method == 'hargreaves'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, lat = prlr$coords$latitude) - } else if (pet_method == 'hargreaves_modified'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, prlr = prlr$data, lat = prlr$coords$latitude) - } else if (pet_method == 'thornthwaite'){ - data <- list(tas = tas$data, lat = prlr$coords$latitude) - } - pet <- evapotranspiration(data, dates_monthly, pet_method, time_dim, leadtime_dim, memb_dim, lon_dim, lat_dim, n_procs) - } - # Accumulation - diff_P_PET <- prlr$data - pet - data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, leadtime_dim, memb_dim, n_procs) - - # Standardization: - if (standardization == TRUE){ - spei_dat <- spei_standardization(data_accum = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - n_procs = n_procs, - accum = accum, - param_error = param_error, - method = method, distribution = distribution, fit = fit) - } else { - spei_dat <- data_accum - } - return(spei_dat) +PeriodSPEI <- function(exp, dates, lat, + exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, param_error = -9999, + handle_infinity = FALSE, cross_validation = FALSE, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', ncores = 4) { + + #----------------------------------------------------------------------------- + + # Part (1): Initial checks + + ## pet and pet_method + if (is.null(pet_method) & is.null(pet)) { + warning("Parameter 'pet_method' cannot be NULL if pet is not provided.") + } + if (!is.null(exp_cor)) { + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) + } + if (length(exp_cor) < 1) { + exp_cor <- NULL + } + } else { + if (lenght(pet_method) > 1) { + warning("Parameter 'pet_method' is of length 2, only first value will be used.") + pet_method <- pet_method[1] + } + } + ## exp + ## TO DO: Check order: tasmax, tasmin, prlr + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves_modified' or 'hargreaves'.") + pet_method[1] <- 'hargreaves_modified' + } + } else if (all(c('tasmin', 'tasmax') %in% names(exp))) { + # hargreaves: 'tasmin', 'tasmax' and 'lat' + if (pet_method != c('hargreaves')) { + warning("Parameter 'pet_method' needs to be 'hargreaves'.") + pet_method[1] <- 'hargreaves' + } + + } else if ('tas' %in% names(exp)) { + # thornthwaite: 'tas' (mean), 'lat' + pet_method[1] <- 'thornthwaite' + + } else { + stop("Parameter 'exp' needs to be a list with the needed variables.") + } + + # warning(' needs to be in C and precipitation in mm/month') # there is no check + + # check if accumulation period is possible + if (accum > dim(exp[[1]])[leadtime_dim]) { + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', + dim(exp[[1]])[leadtime_dim], ' months.')) + } + # method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + } + # distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + } + # fit + if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + } + + #----------------------------------------------------------------------------- + # Part (2): preparation + + # complete dates + ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) + end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) + dates_complete_daily <- as.Date(ini_date:end_date) + + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly <- array(0, dim=length(dates_complete_monthly)) + for (dd in 1:length(dates)) { + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + dates_monthly[ii] <- 1 + } + + #----------------------------------------------------------------------------- + # Part (3): Compute PeriodSPEI + k = 0 + spei_res <- NULL + computed_pet <- FALSE + + for (data in .return2list(exp, exp_cor)) { + k = k + 1 + # Evapotranspiration estimation (unless pet is already provided) + if (is.null(pet) | computed_pet) { + pet <- evapotranspiration(data, dates_monthly, pet_method = pet_method[k], time_dim, + leadtime_dim, memb_dim, lat_dim, ncores) + computed_pet <- TRUE + } + # Accumulation + diff_P_PET <- data$prlr - pet + data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, + leadtime_dim, memb_dim, ncores) + # Standardization: + if (standardization == TRUE) { + spei_dat <- spei_standardization(data_accum = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + ncores = ncores, + accum = accum, + param_error = param_error, + params = params, method = method, + distribution = distribution, fit = fit) + params <- spei_dat$params + } else { + spei_dat <- data_accum + } + spei_res[[k]] <- spei_dat[[1]] + } + + if (standardization) { + spei_res[[k+1]] <- params + if (is.null(exp_cor)) { + names(spei_res) <- c('exp', 'params') + } else { + names(spei_res) <- c('exp', 'exp_cor', 'params') + } + } else { + if (is.null(exp_cor)) { + names(spei_res) <- c('exp') + } else { + names(spei_res) <- c('exp', 'exp_cor') + } + } + + return(spei_res) + } - -## functions +.return2list <- function(data1, data2 = NULL) { + if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } +} + + + evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - lon_dim = 'longitude', lat_dim = 'latitude', - n_procs = 4){ + lat_dim = 'latitude', + ncores = 4) { - lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + ## corrected:: + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) # extract mask of NA locations to return to NA the final result - mask_NA <- array(1, dim = dim(data[[1]])) - if (pet_method == 'hargreaves'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - } - if (pet_method == 'hargreaves_modified'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - mask_NA[which(is.na(data$prlr))] <- 0 - } - if (pet_method == 'thornthwaite'){ - mask_NA[which(is.na(tas$data))] <- 0 - } - + + mask_NA <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + mask_NA[which(is.na(data$prlr))] <- 0 + } else if (pet_method == 'thornthwaite') { + mask_NA[which(is.na(data$tas))] <- 0 + } + # replace NA with 0 - for (dd in 1:length(data)){ + for (dd in 1:length(data)) { data[[dd]][which(is.na(data[[dd]]))] <- 0 } # prepare data - if (pet_method == 'hargreaves'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) - } else if (pet_method == 'hargreaves_modified'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) - } else if (pet_method == 'thornthwaite'){ - data_input <- list(lat_mask = lat_mask, tas = data$tas) - targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) - } else { - stop (paste0('Unknown pet_method ', pet_method)) - } - - PET_estimated <- Apply(data = data_input, - target_dims = targetdim_input, + target_dims_data <- lapply(data, function(x) rep(c(leadtime_dim, time_dim), 1)) + # file <- tempfile() + # file = 'out.txt' + # sink(file) + PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_pet, ncores = n_procs) - + dates_monthly = dates_monthly, + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_pet, ncores = ncores)$output1 + # sink(NULL) + # captured_message <- readLines(file) + # print(paste0('Length captured!!', length(captured_message))) + # print(paste0('unique(captured_message)', length(unique(captured_message)))) + # reorder dims in PET_estimated - dims_order <- array(NA, length(dim(data[[1]]))) - for (ord in 1:length(dim(data[[1]]))){ - dims_order[ord] <- which(names(dim(PET_estimated$output1)) == names(dim(data[[1]]))[ord]) - } - data_pet <- aperm(PET_estimated$output1, dims_order) + pos <- match(names(dim(data[[1]])),names(dim(PET_estimated))) + PET_estimated <- aperm(PET_estimated, pos) # restore original NAs from mask_NA - data_pet[which(mask_NA == 0 )] <- NA + PET_estimated[which(mask_NA == 0 )] <- NA - return(data_pet) + return(PET_estimated) } -accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', n_procs = 4){ +accumulation <- function(diff_P_PET, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', ncores = 4) { - if(!time_dim %in% names(dim(diff_P_PET))){ + if (!time_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) } - if(!leadtime_dim %in% names(dim(diff_P_PET))){ + if (!leadtime_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) } accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, accum = accum, output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_accum, ncores = n_procs) + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_accum, ncores = ncores) # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): - for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)){ + for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)) { if(!d %in% names(dim(accum_result$output1))){ accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) } @@ -179,113 +346,126 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear } spei_standardization <- function(data_accum, - leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, - n_procs, accum, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - data_spei <- data_accum - - data_spei <- Apply(data = list(data_accum), - target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), - output_dims = c(leadtime_dim,time_dim, memb_dim), - dim_month = leadtime_dim, - dim_year = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = n_procs)$output1 - - return(data_spei) + leadtime_dim, time_dim, memb_dim, handle_infinity, + cross_validation, ncores, accum, param_error, + params = NULL, method ='parametric', + distribution = 'log-Logistic', fit='ub-pwm') { -} + n_leadtimes <- dim(data_accum)[leadtime_dim] + n_sdates_params <- dim(data_accum)[time_dim] + + if (!cross_validation) { + n_sdates_params <- 1 + } -### Atomic functions + if (is.null(params)) { + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data_accum, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + leadtime_dim = leadtime_dim, + time_dim = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = ncores) + + return(spei) # spei is a list of data_spei and params +} -atomic_pet <- function(pet_method, dates_monthly, dim_month, dim_year, lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL){ +atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, + pet_method, dates_monthly, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data2)[leadtime_dim]) - n_sdates = as.numeric(dim(data2)[time_dim]) - - # create a vector from data but adding 0 to achive complete time series of the considered period - #(starting in January of the first year) so that the solar radiation estimation is computed - # in each case for the correct month - if (!is.null(data2)){ - data_tmp <- as.vector(data2) - data2 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data2[dd] <- data_tmp[count] - count <- count + 1 - } + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data3)){ - data_tmp <- as.vector(data3) - data3 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data3[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data4)){ - data_tmp <- as.vector(data4) - data4 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data4[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (pet_method == 'hargreaves'){ - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = FALSE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + # NOTE EVA: if NA.RM is FALSE this gives error + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = TRUE) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'hargreaves_modified'){ # not sure this works properly - PET <- array(NA, dim = c(ftime,n_sdates)) - for(ns in 1:n_sdates){ - tmax_mod_temp <- data2[12*(ns-1)+(1:12)] #data2[,ns] - tmin_mod_temp <- data3[12*(ns-1)+(1:12)] #data3[,ns] - pre_mod_temp <- data4[12*(ns-1)+(1:12)] #data4[,ns] - - # Computation of PET - x_PET <- hargreaves(Tmin = tmin_mod_temp, Tmax = tmax_mod_temp, lat = lat_mask, Pre = pre_mod_temp, na.rm = FALSE) - PET[1:ftime,ns] <- x_PET #dim <- (lon lat ft sd memb) - } + if (pet_method == 'hargreaves_modified') { + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE) + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'thornthwaite'){ + if (pet_method == 'thornthwaite') { x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - return(PET) - } -atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ - # data:[time, syear] +atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data)[dim_month]) - n_sdates = as.numeric(dim(data)[dim_year]) + # data:[time, syear] + ftime <- dim(data)[1] + n_sdates <- dim(data)[2] data_vector <- array(0, dim = length(dates_monthly)) count <- 1 - for (dd in 1:length(dates_monthly)){ + for (dd in 1:length(dates_monthly)) { if (dates_monthly[dd] == 1){ data_vector[dd] <- as.vector(data)[count] count <- count + 1 @@ -294,21 +474,23 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ # Accumulation at different timescales data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form - if (accum > 1){ + if (accum > 1) { accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data } return(accum_result) } - -atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ +atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, + handle_infinity, cross_validation, param_error, method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm') { # data: [time, sdate, memb] - if (is.null(ref_period)){ + if (is.null(ref_period)) { ref.start <- NULL ref.end <- NULL } else { @@ -318,55 +500,76 @@ atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_ if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { - speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, - method = method, distribution = distribution, fit = fit, - dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, + method = method, distribution = distribution, fit = fit, + leadtime_dim = leadtime_dim, time_dim = time_dim, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) } - return(speiX) + + return(list(speiX, params)) } +#------------------------------------------------------------------------------- + ### Support functions -spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ - +spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + leadtime_dim, time_dim, dim_memb, handle_infinity, cross_validation){ + # acc_data: [ftime, syear, nmemb] + # params: [syear, ftime, coef] - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') - } - - ftime <- as.numeric(dim(acc_data)[dim_month]) - n_sdates <- as.numeric(dim(acc_data)[dim_year]) + n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions + + ftime <- as.numeric(dim(acc_data)[leadtime_dim]) + n_sdates <- as.numeric(dim(acc_data)[time_dim]) nmemb <- as.numeric(dim(acc_data)[dim_memb]) spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, dim_memb) + if (cross_validation) { + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } - for(ff in 1:ftime){ # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) - x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + for (ff in 1:ftime) { # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = leadtime_dim, indices = ff, drop = FALSE) + params_tmp <- if (all(is.na(params))) {NULL} else {params[,ff,]} + c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, cross_validation = cross_validation) spei_mod[ff,,] <- x_spei_mod + coef_names <- names(params_ff) + if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[,ff,] <- params_ff } + + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - return(spei_mod) + return(list(spei = spei_mod, params = params_result)) } +#------------------------------------------------------------------------------- + # HandleInfinity: spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { # data:[ftime = 1, syear, ensemble] - - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('SPEI can be only computed using the following approach: parametric or non-parametric') - } - if (method == 'non-parametric'){ + if (method == 'non-parametric') { if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') @@ -379,48 +582,45 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm SPEI = qnorm((bp-0.44)/(length(data)+0.12)) - return(SPEI) + return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { std_index <- array(NA, c(n_sdates, nmemb)) - if (anyNA(data) && na.rm==FALSE) { + if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') } - if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { - stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') - } - if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') - } + coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), - "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma'))) - ) + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma')))) dim_one <- length(coef) if (!is.null(params)) { - if (length(params)!=dim_one) { + if (length(params) != dim_one) { stop(paste0('parameters array should have dimensions [', dim_one, ']')) } } # Select window if necessary - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data,ref.start,ref.end) - } else { - data.fit <- data - } + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data,ref.start,ref.end) + } else { + data.fit <- data + } - if(cross_validation == "TRUE") { + if (cross_validation == "TRUE") { loop_years <- n_sdates } else { loop_years <- 1 } + + params_result <- array(NA, dim = c(loop_years, dim_one)) + colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) @@ -436,12 +636,13 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm acu_sd = sd(acu.sorted) if (!is.na(acu_sd)){ if (acu_sd != 0){ - if(distribution != "log-Logistic"){ + if(distribution != "log-Logistic") { pze <- sum(acu==0)/length(acu) acu.sorted = acu.sorted[acu.sorted > 0] } if (!is.null(params)) { f_params = as.vector(params) + params_result[nsd,] <- f_params } else { if (length(acu.sorted) >= 4){ # else coef will be NA # Calculate probability weighted moments based on fit with lmomco or TLMoments @@ -470,16 +671,23 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm if(distribution == 'log-Logistic' && fit == 'max-lik'){ f_params = parglo.maxlik(acu.sorted, f_params)$para } + params_result[nsd,] <- f_params } # end if dor the case the L-moments are not valid (std_index will be NA) } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom - cdf_res = switch(distribution, + if (all(is.na(params_result[nsd,]))){ + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data, f_params), "Gamma" = lmom::cdfgam(data, f_params), "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - + ) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development @@ -490,20 +698,22 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm std_index[nsd,] <- std_index_cv[nsd,] } else { std_index <- std_index_cv - } + } } } # end if for the case there is no variability } # end if for the case all NA in acu } # next year (in case of cross_validation or all done if cross_validation == F) - + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - } # end parametric - return(std_index) # fitted is the only thing used after calling this function + return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + + } # end parametric } - -- GitLab From 4875863e48e8041e167508182d91ef3f744f7a31 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 25 May 2023 17:23:17 +0200 Subject: [PATCH 04/42] Reapply same changes as before resolving the conflicts --- R/PeriodSPEI.R | 740 +++++++++++++++++++++++++++++++------------------ 1 file changed, 475 insertions(+), 265 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index d56916b..1eef63d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,167 +1,334 @@ -PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) - time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = NULL, - standardization = TRUE, - param_error = -9999, - handle_infinity = FALSE, - cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', - n_procs = 4){ - - # check provided data and parameters: - if (!is.null(pet) & !is.null(pet_method)){ - print ('WARNING: pet data is provided and also a pet estimation method, the provided pet data will be used and NOT estimated') - pet_method <- NULL - } - if (is.null(pet_method) & is.null(pet)){ - stop ('variable pet needs to be provided or a pet_method selected') - } - if ((pet_method == 'hargreaves' | pet_method == 'hargreaves_modified') & (is.null(tasmax) | is.null(tasmin))){ - stop (paste0('Need to provide tasmax and tasmin for the selected method ', pet_method)) - } - if (pet_method == 'thornthwaite' & is.null(tas)){ - stop (paste0('Need to provide tas for the selected method ', pet_method)) - } - print('WARNING: temperature needs to be in C and precipitation in mm/month') # there is no check - # check if accumulation period is possible - if(accum > dim(prlr$data)[leadtime_dim][[1]]){ - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) - } +#------------------------------------------------------------------------------- +# To DO: Add documentation of same dimensions exp and exp_cor +# To DO: Add documentation for units +# TO DO: Write Documentation + +# library(s2dv) # InsertDim, ... +# library(multiApply) # Apply +# library(ClimProjDiags) # Subset +# library(zeallot) # multiple assignment %<-% +# library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) +# library(zoo) # rollapply, as.Date +# library(TLMoments) # pwd (Calculate probability weighted moments based on fit) +# library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) +# library(lubridate) # year(), month(),... + +# library(CSTools) +#------------------------------------------------------------------------------- +#'@param exp A list with the 's2dv_cube' object class containing the seasonal +#' forecast experiment in the data element. +#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile +#' PeriodSPEI should be applied. If it is not specified, the PeriodSPEI +#' is calculated from object 'exp'. +#'@param pet Multidimensional array containing the Potential EvapoTranspiration +#' data. If it is NULL it is calculated using pet_method. It is NULL by default. +#'@param time_dim +#'@param leadtime_dim +#'@param memb_dim +#'@param lat_dim +#--- other params from CSIndicators +#'@param accum +#'@param start +#'@param end +#' +# --- other params +#'@param pet_method +#'@param standardization +#'@param params +#'@param param_error +#'@param handle_infinity +#'@param cross_validation +#'@param method +#'@param distribution +#'@param fit +#'@param ncores +CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, # not used if standardization is FALSE + param_error = -9999, # not used if standardization is FALSE + handle_infinity = FALSE, # not used if standardization is FALSE + cross_validation = FALSE, # not used if standardization is FALSE + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', # not used if standardization is FALSE + ncores = 4) { + + #----------------------------------------------------------------------------- + # Part (1): Initial structure checks + # coordinates + .KnownLatNames <- CSTools:::.KnownLatNames() + + if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { + stop("Spatial coordinate names of parameter 'obsL' do not match any ", + "of the names accepted by the package.") + } + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] + + #----------------------------------------------------------------------------- + # Part (2): Call to PeriodSPEI + + res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), + dates = exp[[1]]$attrs$Dates, + lat = exp[[1]]$coords[[lat_name]], + exp_cor = lapply(exp_cor, function(x) x$data), + pet = pet, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method, standardization = standardization, + params = params, param_error = param_error, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, + fit = fit, ncores = ncores) + return(res) - # complete dates - dates <- prlr$attrs$Dates - dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim=length(dates_complete_monthly)) - for (dd in 1:length(dates)){ - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) - dates_monthly[ii] <- 1 - } +} - # Evapotranspiration estimation (unless pet is already provided) - if (is.null(pet)){ - if (pet_method == 'hargreaves'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, lat = prlr$coords$latitude) - } else if (pet_method == 'hargreaves_modified'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, prlr = prlr$data, lat = prlr$coords$latitude) - } else if (pet_method == 'thornthwaite'){ - data <- list(tas = tas$data, lat = prlr$coords$latitude) - } - pet <- evapotranspiration(data, dates_monthly, pet_method, time_dim, leadtime_dim, memb_dim, lon_dim, lat_dim, n_procs) - } - # Accumulation - diff_P_PET <- prlr$data - pet - data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, leadtime_dim, memb_dim, n_procs) - - # Standardization: - if (standardization == TRUE){ - spei_dat <- spei_standardization(data_accum = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - n_procs = n_procs, - accum = accum, - param_error = param_error, - method = method, distribution = distribution, fit = fit) - } else { - spei_dat <- data_accum - } - return(spei_dat) +PeriodSPEI <- function(exp, dates, lat, + exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, param_error = -9999, + handle_infinity = FALSE, cross_validation = FALSE, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', ncores = 4) { + + #----------------------------------------------------------------------------- + + # Part (1): Initial checks + + ## pet and pet_method + if (is.null(pet_method) & is.null(pet)) { + warning("Parameter 'pet_method' cannot be NULL if pet is not provided.") + } + if (!is.null(exp_cor)) { + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) + } + if (length(exp_cor) < 1) { + exp_cor <- NULL + } + } else { + if (lenght(pet_method) > 1) { + warning("Parameter 'pet_method' is of length 2, only first value will be used.") + pet_method <- pet_method[1] + } + } + ## exp + ## TO DO: Check order: tasmax, tasmin, prlr + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves_modified' or 'hargreaves'.") + pet_method[1] <- 'hargreaves_modified' + } + } else if (all(c('tasmin', 'tasmax') %in% names(exp))) { + # hargreaves: 'tasmin', 'tasmax' and 'lat' + if (pet_method != c('hargreaves')) { + warning("Parameter 'pet_method' needs to be 'hargreaves'.") + pet_method[1] <- 'hargreaves' + } + + } else if ('tas' %in% names(exp)) { + # thornthwaite: 'tas' (mean), 'lat' + pet_method[1] <- 'thornthwaite' + + } else { + stop("Parameter 'exp' needs to be a list with the needed variables.") + } + + # warning(' needs to be in C and precipitation in mm/month') # there is no check + + # check if accumulation period is possible + if (accum > dim(exp[[1]])[leadtime_dim]) { + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', + dim(exp[[1]])[leadtime_dim], ' months.')) + } + # method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + } + # distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + } + # fit + if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + } + + #----------------------------------------------------------------------------- + # Part (2): preparation + + # complete dates + ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) + end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) + dates_complete_daily <- as.Date(ini_date:end_date) + + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly <- array(0, dim=length(dates_complete_monthly)) + for (dd in 1:length(dates)) { + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + dates_monthly[ii] <- 1 + } + + #----------------------------------------------------------------------------- + # Part (3): Compute PeriodSPEI + k = 0 + spei_res <- NULL + computed_pet <- FALSE + + for (data in .return2list(exp, exp_cor)) { + k = k + 1 + # Evapotranspiration estimation (unless pet is already provided) + if (is.null(pet) | computed_pet) { + pet <- evapotranspiration(data, dates_monthly, pet_method = pet_method[k], time_dim, + leadtime_dim, memb_dim, lat_dim, ncores) + computed_pet <- TRUE + } + # Accumulation + diff_P_PET <- data$prlr - pet + data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, + leadtime_dim, memb_dim, ncores) + # Standardization: + if (standardization == TRUE) { + spei_dat <- spei_standardization(data_accum = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + ncores = ncores, + accum = accum, + param_error = param_error, + params = params, method = method, + distribution = distribution, fit = fit) + params <- spei_dat$params + } else { + spei_dat <- data_accum + } + spei_res[[k]] <- spei_dat[[1]] + } + + if (standardization) { + spei_res[[k+1]] <- params + if (is.null(exp_cor)) { + names(spei_res) <- c('exp', 'params') + } else { + names(spei_res) <- c('exp', 'exp_cor', 'params') + } + } else { + if (is.null(exp_cor)) { + names(spei_res) <- c('exp') + } else { + names(spei_res) <- c('exp', 'exp_cor') + } + } + + return(spei_res) + } - -## functions +.return2list <- function(data1, data2 = NULL) { + if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } +} + + + evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - lon_dim = 'longitude', lat_dim = 'latitude', - n_procs = 4){ + lat_dim = 'latitude', + ncores = 4) { - lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + ## corrected:: + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) # extract mask of NA locations to return to NA the final result - mask_NA <- array(1, dim = dim(data[[1]])) - if (pet_method == 'hargreaves'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - } - if (pet_method == 'hargreaves_modified'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - mask_NA[which(is.na(data$prlr))] <- 0 - } - if (pet_method == 'thornthwaite'){ - mask_NA[which(is.na(tas$data))] <- 0 - } - + + mask_NA <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + mask_NA[which(is.na(data$prlr))] <- 0 + } else if (pet_method == 'thornthwaite') { + mask_NA[which(is.na(data$tas))] <- 0 + } + # replace NA with 0 - for (dd in 1:length(data)){ + for (dd in 1:length(data)) { data[[dd]][which(is.na(data[[dd]]))] <- 0 } # prepare data - if (pet_method == 'hargreaves'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) - } else if (pet_method == 'hargreaves_modified'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) - } else if (pet_method == 'thornthwaite'){ - data_input <- list(lat_mask = lat_mask, tas = data$tas) - targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) - } else { - stop (paste0('Unknown pet_method ', pet_method)) - } - - PET_estimated <- Apply(data = data_input, - target_dims = targetdim_input, + target_dims_data <- lapply(data, function(x) rep(c(leadtime_dim, time_dim), 1)) + # file <- tempfile() + # file = 'out.txt' + # sink(file) + PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_pet, ncores = n_procs) - + dates_monthly = dates_monthly, + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_pet, ncores = ncores)$output1 + # sink(NULL) + # captured_message <- readLines(file) + # print(paste0('Length captured!!', length(captured_message))) + # print(paste0('unique(captured_message)', length(unique(captured_message)))) + # reorder dims in PET_estimated - dims_order <- array(NA, length(dim(data[[1]]))) - for (ord in 1:length(dim(data[[1]]))){ - dims_order[ord] <- which(names(dim(PET_estimated$output1)) == names(dim(data[[1]]))[ord]) - } - data_pet <- aperm(PET_estimated$output1, dims_order) + pos <- match(names(dim(data[[1]])),names(dim(PET_estimated))) + PET_estimated <- aperm(PET_estimated, pos) # restore original NAs from mask_NA - data_pet[which(mask_NA == 0 )] <- NA + PET_estimated[which(mask_NA == 0 )] <- NA - return(data_pet) + return(PET_estimated) } -accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', n_procs = 4){ +accumulation <- function(diff_P_PET, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', ncores = 4) { - if(!time_dim %in% names(dim(diff_P_PET))){ + if (!time_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) } - if(!leadtime_dim %in% names(dim(diff_P_PET))){ + if (!leadtime_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) } accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, accum = accum, output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_accum, ncores = n_procs) + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_accum, ncores = ncores) # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): - for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)){ + for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)) { if(!d %in% names(dim(accum_result$output1))){ accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) } @@ -179,113 +346,126 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear } spei_standardization <- function(data_accum, - leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, - n_procs, accum, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - data_spei <- data_accum - - data_spei <- Apply(data = list(data_accum), - target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), - output_dims = c(leadtime_dim,time_dim, memb_dim), - dim_month = leadtime_dim, - dim_year = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = n_procs)$output1 - - return(data_spei) + leadtime_dim, time_dim, memb_dim, handle_infinity, + cross_validation, ncores, accum, param_error, + params = NULL, method ='parametric', + distribution = 'log-Logistic', fit='ub-pwm') { -} + n_leadtimes <- dim(data_accum)[leadtime_dim] + n_sdates_params <- dim(data_accum)[time_dim] + + if (!cross_validation) { + n_sdates_params <- 1 + } -### Atomic functions + if (is.null(params)) { + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data_accum, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + leadtime_dim = leadtime_dim, + time_dim = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = ncores) + + return(spei) # spei is a list of data_spei and params +} -atomic_pet <- function(pet_method, dates_monthly, dim_month, dim_year, lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL){ +atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, + pet_method, dates_monthly, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data2)[leadtime_dim]) - n_sdates = as.numeric(dim(data2)[time_dim]) - - # create a vector from data but adding 0 to achive complete time series of the considered period - #(starting in January of the first year) so that the solar radiation estimation is computed - # in each case for the correct month - if (!is.null(data2)){ - data_tmp <- as.vector(data2) - data2 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data2[dd] <- data_tmp[count] - count <- count + 1 - } + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data3)){ - data_tmp <- as.vector(data3) - data3 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data3[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data4)){ - data_tmp <- as.vector(data4) - data4 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data4[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (pet_method == 'hargreaves'){ - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = FALSE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + # NOTE EVA: if NA.RM is FALSE this gives error + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = TRUE) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'hargreaves_modified'){ # not sure this works properly - PET <- array(NA, dim = c(ftime,n_sdates)) - for(ns in 1:n_sdates){ - tmax_mod_temp <- data2[12*(ns-1)+(1:12)] #data2[,ns] - tmin_mod_temp <- data3[12*(ns-1)+(1:12)] #data3[,ns] - pre_mod_temp <- data4[12*(ns-1)+(1:12)] #data4[,ns] - - # Computation of PET - x_PET <- hargreaves(Tmin = tmin_mod_temp, Tmax = tmax_mod_temp, lat = lat_mask, Pre = pre_mod_temp, na.rm = FALSE) - PET[1:ftime,ns] <- x_PET #dim <- (lon lat ft sd memb) - } + if (pet_method == 'hargreaves_modified') { + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE) + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'thornthwaite'){ + if (pet_method == 'thornthwaite') { x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - return(PET) - } -atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ - # data:[time, syear] +atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data)[dim_month]) - n_sdates = as.numeric(dim(data)[dim_year]) + # data:[time, syear] + ftime <- dim(data)[1] + n_sdates <- dim(data)[2] data_vector <- array(0, dim = length(dates_monthly)) count <- 1 - for (dd in 1:length(dates_monthly)){ + for (dd in 1:length(dates_monthly)) { if (dates_monthly[dd] == 1){ data_vector[dd] <- as.vector(data)[count] count <- count + 1 @@ -294,21 +474,23 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ # Accumulation at different timescales data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form - if (accum > 1){ + if (accum > 1) { accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data } return(accum_result) } - -atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ +atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, + handle_infinity, cross_validation, param_error, method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm') { # data: [time, sdate, memb] - if (is.null(ref_period)){ + if (is.null(ref_period)) { ref.start <- NULL ref.end <- NULL } else { @@ -318,55 +500,76 @@ atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_ if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { - speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, - method = method, distribution = distribution, fit = fit, - dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, + method = method, distribution = distribution, fit = fit, + leadtime_dim = leadtime_dim, time_dim = time_dim, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) } - return(speiX) + + return(list(speiX, params)) } +#------------------------------------------------------------------------------- + ### Support functions -spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ - +spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + leadtime_dim, time_dim, dim_memb, handle_infinity, cross_validation){ + # acc_data: [ftime, syear, nmemb] + # params: [syear, ftime, coef] - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') - } - - ftime <- as.numeric(dim(acc_data)[dim_month]) - n_sdates <- as.numeric(dim(acc_data)[dim_year]) + n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions + + ftime <- as.numeric(dim(acc_data)[leadtime_dim]) + n_sdates <- as.numeric(dim(acc_data)[time_dim]) nmemb <- as.numeric(dim(acc_data)[dim_memb]) spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, dim_memb) + if (cross_validation) { + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } - for(ff in 1:ftime){ # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) - x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + for (ff in 1:ftime) { # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = leadtime_dim, indices = ff, drop = FALSE) + params_tmp <- if (all(is.na(params))) {NULL} else {params[,ff,]} + c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, cross_validation = cross_validation) spei_mod[ff,,] <- x_spei_mod + coef_names <- names(params_ff) + if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[,ff,] <- params_ff } + + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - return(spei_mod) + return(list(spei = spei_mod, params = params_result)) } +#------------------------------------------------------------------------------- + # HandleInfinity: spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { # data:[ftime = 1, syear, ensemble] - - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('SPEI can be only computed using the following approach: parametric or non-parametric') - } - if (method == 'non-parametric'){ + if (method == 'non-parametric') { if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') @@ -379,48 +582,45 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm SPEI = qnorm((bp-0.44)/(length(data)+0.12)) - return(SPEI) + return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { std_index <- array(NA, c(n_sdates, nmemb)) - if (anyNA(data) && na.rm==FALSE) { + if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') } - if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { - stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') - } - if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') - } + coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), - "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma'))) - ) + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma')))) dim_one <- length(coef) if (!is.null(params)) { - if (length(params)!=dim_one) { + if (length(params) != dim_one) { stop(paste0('parameters array should have dimensions [', dim_one, ']')) } } # Select window if necessary - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data,ref.start,ref.end) - } else { - data.fit <- data - } + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data,ref.start,ref.end) + } else { + data.fit <- data + } - if(cross_validation == "TRUE") { + if (cross_validation == "TRUE") { loop_years <- n_sdates } else { loop_years <- 1 } + + params_result <- array(NA, dim = c(loop_years, dim_one)) + colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) @@ -436,12 +636,13 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm acu_sd = sd(acu.sorted) if (!is.na(acu_sd)){ if (acu_sd != 0){ - if(distribution != "log-Logistic"){ + if(distribution != "log-Logistic") { pze <- sum(acu==0)/length(acu) acu.sorted = acu.sorted[acu.sorted > 0] } if (!is.null(params)) { f_params = as.vector(params) + params_result[nsd,] <- f_params } else { if (length(acu.sorted) >= 4){ # else coef will be NA # Calculate probability weighted moments based on fit with lmomco or TLMoments @@ -470,16 +671,23 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm if(distribution == 'log-Logistic' && fit == 'max-lik'){ f_params = parglo.maxlik(acu.sorted, f_params)$para } + params_result[nsd,] <- f_params } # end if dor the case the L-moments are not valid (std_index will be NA) } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom - cdf_res = switch(distribution, + if (all(is.na(params_result[nsd,]))){ + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data, f_params), "Gamma" = lmom::cdfgam(data, f_params), "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - + ) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development @@ -490,20 +698,22 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm std_index[nsd,] <- std_index_cv[nsd,] } else { std_index <- std_index_cv - } + } } } # end if for the case there is no variability } # end if for the case all NA in acu } # next year (in case of cross_validation or all done if cross_validation == F) - + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - } # end parametric - return(std_index) # fitted is the only thing used after calling this function + return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + + } # end parametric } - -- GitLab From 7de3fdde479d3dd32109e9245e5486151b9d3fc1 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 26 May 2023 17:31:11 +0200 Subject: [PATCH 05/42] Improve code and write documentation --- R/PeriodSPEI.R | 250 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 163 insertions(+), 87 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 1eef63d..b73ef69 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,38 +1,36 @@ - -#------------------------------------------------------------------------------- -# To DO: Add documentation of same dimensions exp and exp_cor -# To DO: Add documentation for units -# TO DO: Write Documentation - -# library(s2dv) # InsertDim, ... -# library(multiApply) # Apply -# library(ClimProjDiags) # Subset -# library(zeallot) # multiple assignment %<-% -# library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) -# library(zoo) # rollapply, as.Date -# library(TLMoments) # pwd (Calculate probability weighted moments based on fit) -# library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) -# library(lubridate) # year(), month(),... - -# library(CSTools) -#------------------------------------------------------------------------------- -#'@param exp A list with the 's2dv_cube' object class containing the seasonal -#' forecast experiment in the data element. -#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile -#' PeriodSPEI should be applied. If it is not specified, the PeriodSPEI -#' is calculated from object 'exp'. -#'@param pet Multidimensional array containing the Potential EvapoTranspiration -#' data. If it is NULL it is calculated using pet_method. It is NULL by default. -#'@param time_dim -#'@param leadtime_dim -#'@param memb_dim -#'@param lat_dim -#--- other params from CSIndicators -#'@param accum +#'Compute the Standardised Precipitation-Evapotranspiration Index +#' +#'The Standardised Precipitation-Evapotranspiration Index (SPEI) is a +#'multiscalar drought index based on climatic data. It can be used for +#'determining the onset, duration and magnitude of drought conditions with +#'respect to normal conditions in a variety of natural and managed systems such +#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +#'using monthly (or weekly) precipitation as the input data. The SPEI uses the +#'monthly (or weekly) difference between precipitation and PET. This represents +#'a simple climatic water balance which is calculated at different time scales +#'to obtain the SPEI. +#' +#'@param exp A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each +#' variable in which the quantile PeriodSPEI should be applied. If it is not +#' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param pet A multidimensional array containing the Potential +#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +#' is NULL by default. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. #'@param start #'@param end -#' -# --- other params #'@param pet_method #'@param standardization #'@param params @@ -43,11 +41,26 @@ #'@param distribution #'@param fit #'@param ncores +#' +#'@examples +#'exp <- 1 +#' +# '@import s2dv +# '@import multiApply +# '@import ClimProjDiags +# '@import zeallot +# '@import SPEI +# '@import zoo +# '@import TLMoments +# '@import lmomco +# '@import lubridate +# '@import CSTools +# '@export CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = c('hargreaves', 'hargreaves'), + pet_method = 'hargreaves', standardization = TRUE, params = NULL, # not used if standardization is FALSE param_error = -9999, # not used if standardization is FALSE @@ -57,9 +70,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, fit = 'ub-pwm', # not used if standardization is FALSE ncores = 4) { - #----------------------------------------------------------------------------- - # Part (1): Initial structure checks - # coordinates + # Check 's2dv_cube' + .KnownLatNames <- CSTools:::.KnownLatNames() if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { @@ -86,7 +98,6 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, method = method, distribution = distribution, fit = fit, ncores = ncores) return(res) - } @@ -96,79 +107,135 @@ PeriodSPEI <- function(exp, dates, lat, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = c('hargreaves', 'hargreaves'), + pet_method = 'hargreaves', standardization = TRUE, params = NULL, param_error = -9999, handle_infinity = FALSE, cross_validation = FALSE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', ncores = 4) { - #----------------------------------------------------------------------------- - - # Part (1): Initial checks + # Initial checks - ## pet and pet_method - if (is.null(pet_method) & is.null(pet)) { - warning("Parameter 'pet_method' cannot be NULL if pet is not provided.") + ## exp + if (!inherits(exp, 'list')) { + stop("Parameter 'exp' needs to be a named list with the needed variables.") + } + if (is.null(names(exp))) { + stop("Parameter 'exp' needs to be a named list with the variable names.") } + + ## exp_cor if (!is.null(exp_cor)) { - if (length(pet_method) == 1) { - pet_method <- rep(pet_method, 2) - } - if (length(exp_cor) < 1) { + if (!inherits(exp_cor, 'list')) { + warning("Parameter 'exp_cor' needs to be a named list with the needed ", + "variables. Only 'exp' will be used.") exp_cor <- NULL } - } else { - if (lenght(pet_method) > 1) { - warning("Parameter 'pet_method' is of length 2, only first value will be used.") - pet_method <- pet_method[1] + if (is.null(names(exp_cor))) { + stop("Parameter 'exp_cor' needs to be a named list with the variable names.") } } - ## exp - ## TO DO: Check order: tasmax, tasmin, prlr - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { - warning("Parameter 'pet_method' needs to be 'hargreaves_modified' or 'hargreaves'.") - pet_method[1] <- 'hargreaves_modified' - } - } else if (all(c('tasmin', 'tasmax') %in% names(exp))) { - # hargreaves: 'tasmin', 'tasmax' and 'lat' - if (pet_method != c('hargreaves')) { - warning("Parameter 'pet_method' needs to be 'hargreaves'.") - pet_method[1] <- 'hargreaves' + + # Variable checks + if (is.null(pet)) { + ## exp (2) + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves'.") + pet_method[1] <- 'hargreaves' + } + } else if (all(c('tas', 'prlr') %in% names(exp))) { + # thornthwaite: 'tas' (mean), 'lat' + if (!any(pet_method != 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method[1] <- 'thornthwaite' + } + } else { + stop("Parameter 'exp' needs to be a named list with accepted variable names.", + "See documentation.") } - - } else if ('tas' %in% names(exp)) { - # thornthwaite: 'tas' (mean), 'lat' - pet_method[1] <- 'thornthwaite' + ## exp_cor (2) + if (!is.null(exp_cor)) { + if (length(exp_cor) < 1) { + exp_cor <- NULL + } else { + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) + } + } + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp_cor))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves'.") + pet_method[2] <- 'hargreaves' + } + } else if (all(c('tas', 'prlr') %in% names(exp_cor))) { + # thornthwaite: 'tas' (mean), 'lat' + if (!any(pet_method != 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method[2] <- 'thornthwaite' + } + } else { + stop("Parameter 'exp_cor' needs to be a list with the needed variables.") + } + } } else { - stop("Parameter 'exp' needs to be a list with the needed variables.") + if (!('prlr' %in% names(exp))) { + stop("Variable 'prlr' is not included in 'exp'.") + } + if (!is.null(exp_cor)) { + if (!('prlr' %in% names(exp_cor))) { + stop("Variable 'prlr' is not included in 'exp_cor'.") + } + } } - # warning(' needs to be in C and precipitation in mm/month') # there is no check + ## time_dim + + ## leadtime_dim + + ## memb_dim + + ## lat_dim - # check if accumulation period is possible + ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(exp[[1]])[leadtime_dim], ' months.')) } - # method + + ## start + + ## end + + ## standardization + + ## param_error + + ## handle_infinity + + ## cross_validation + + ## method if (!(method %in% c('parametric', 'non-parametric'))) { stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') } - # distribution + ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') } - # fit + ## fit if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') } - #----------------------------------------------------------------------------- - # Part (2): preparation + ## ncores + + # Data preparation # complete dates ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) @@ -183,8 +250,7 @@ PeriodSPEI <- function(exp, dates, lat, dates_monthly[ii] <- 1 } - #----------------------------------------------------------------------------- - # Part (3): Compute PeriodSPEI + # Compute PeriodSPEI k = 0 spei_res <- NULL computed_pet <- FALSE @@ -193,14 +259,21 @@ PeriodSPEI <- function(exp, dates, lat, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet) | computed_pet) { - pet <- evapotranspiration(data, dates_monthly, pet_method = pet_method[k], time_dim, - leadtime_dim, memb_dim, lat_dim, ncores) + pet <- evapotranspiration(data = data, dates_monthly = dates_monthly, + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, lat_dim = lat_dim, + ncores = ncores) computed_pet <- TRUE } + # Accumulation diff_P_PET <- data$prlr - pet - data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, - leadtime_dim, memb_dim, ncores) + data_accum <- accumulation(diff_P_PET = diff_P_PET, + dates_monthly = dates_monthly, accum = accum, + time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, ncores = ncores) # Standardization: if (standardization == TRUE) { spei_dat <- spei_standardization(data_accum = data_accum, @@ -252,7 +325,7 @@ PeriodSPEI <- function(exp, dates, lat, -evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', +evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', ncores = 4) { @@ -265,13 +338,16 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', mask_NA <- array(1, dim = dim(data[[1]])) if (pet_method == 'hargreaves') { + varnames <- c('tasmax', 'tasmin') mask_NA[which(is.na(data$tasmax))] <- 0 mask_NA[which(is.na(data$tasmin))] <- 0 } else if (pet_method == 'hargreaves_modified') { + varnames <- c('tasmax', 'tasmin', 'prlr') mask_NA[which(is.na(data$tasmax))] <- 0 mask_NA[which(is.na(data$tasmin))] <- 0 mask_NA[which(is.na(data$prlr))] <- 0 } else if (pet_method == 'thornthwaite') { + varnames <- c('tas') mask_NA[which(is.na(data$tas))] <- 0 } @@ -281,11 +357,11 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', } # prepare data - target_dims_data <- lapply(data, function(x) rep(c(leadtime_dim, time_dim), 1)) + target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) # file <- tempfile() # file = 'out.txt' # sink(file) - PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data), + PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, -- GitLab From 09753eb01a00265bdcc0480913019349b9cfae5f Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 29 May 2023 17:31:04 +0200 Subject: [PATCH 06/42] Add unit test --- R/PeriodSPEI.R | 135 ++++++++++++++++++++++--------- tests/testthat/test-PeriodSPEI.R | 113 ++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 36 deletions(-) create mode 100644 tests/testthat/test-PeriodSPEI.R diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index b73ef69..7d419d0 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -71,17 +71,26 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, ncores = 4) { # Check 's2dv_cube' + if (is.null(exp)) { + stop("Parameter 'exp' cannot be NULL.") + } + if (!all(sapply(exp, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'exp' must be a list of 's2dv_cube' class.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'exp_cor' must be a list of 's2dv_cube' class.") + } + } .KnownLatNames <- CSTools:::.KnownLatNames() if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { - stop("Spatial coordinate names of parameter 'obsL' do not match any ", + stop("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") } - lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] - #----------------------------------------------------------------------------- - # Part (2): Call to PeriodSPEI + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), dates = exp[[1]]$attrs$Dates, @@ -115,7 +124,6 @@ PeriodSPEI <- function(exp, dates, lat, fit = 'ub-pwm', ncores = 4) { # Initial checks - ## exp if (!inherits(exp, 'list')) { stop("Parameter 'exp' needs to be a named list with the needed variables.") @@ -123,17 +131,37 @@ PeriodSPEI <- function(exp, dates, lat, if (is.null(names(exp))) { stop("Parameter 'exp' needs to be a named list with the variable names.") } + if (any(sapply(exp, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'exp' needs to be a list of arrays with dimension names.") + } + dims <- lapply(exp, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'exp' variables needs to have the same dimension names.") + } ## exp_cor if (!is.null(exp_cor)) { if (!inherits(exp_cor, 'list')) { - warning("Parameter 'exp_cor' needs to be a named list with the needed ", - "variables. Only 'exp' will be used.") - exp_cor <- NULL + stop("Parameter 'exp_cor' needs to be a named list with the needed ", + "variables if it is not NULL.") } if (is.null(names(exp_cor))) { stop("Parameter 'exp_cor' needs to be a named list with the variable names.") } + if (any(sapply(exp_cor, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'exp_cor' needs to be a list of arrays with dimension names.") + } + if (!all(names(exp_cor) %in% names(exp))) { + stop("Parameter 'exp_cor' needs to have the same variable names as 'exp'.") + } + dims <- lapply(exp_cor, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'exp_cor' variables needs to have the same dimension names.") + } } # Variable checks @@ -195,19 +223,58 @@ PeriodSPEI <- function(exp, dates, lat, } ## time_dim - + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + } ## leadtime_dim - + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'exp_cor' dimension.") + } + } ## memb_dim - + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) memb_dim %in% names(dim(x))))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) memb_dim %in% names(dim(x))))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } ## lat_dim - + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") + } + } ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', - dim(exp[[1]])[leadtime_dim], ' months.')) + stop(paste0("Cannot compute accumulation of ", accum, " months because ", + "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } - ## start ## end @@ -234,7 +301,12 @@ PeriodSPEI <- function(exp, dates, lat, } ## ncores - + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } # Data preparation # complete dates @@ -388,13 +460,6 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', ncores = 4) { - if (!time_dim %in% names(dim(diff_P_PET))) { - diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) - } - if (!leadtime_dim %in% names(dim(diff_P_PET))) { - diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) - } - accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), dates_monthly = dates_monthly, @@ -510,22 +575,22 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } if (pet_method == 'hargreaves') { # NOTE EVA: if NA.RM is FALSE this gives error - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = TRUE) + x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data x_PET <- x_PET[which(dates_monthly == 1)] PET <- array(x_PET, dim = dims) } if (pet_method == 'hargreaves_modified') { - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE) + x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE)) x_PET <- x_PET[which(dates_monthly == 1)] PET <- array(x_PET, dim = dims) } if (pet_method == 'thornthwaite') { - x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) + x_PET <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data x_PET <- x_PET[which(dates_monthly == 1)] PET <- array(x_PET, dim = dims) @@ -536,8 +601,7 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { # data:[time, syear] - ftime <- dim(data)[1] - n_sdates <- dim(data)[2] + dims <- dim(data) data_vector <- array(0, dim = length(dates_monthly)) count <- 1 @@ -554,14 +618,13 @@ atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { # (depends on accu) and so that the position in the vector corresponds to the accumulated # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data - accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form + dim(data_sum_x) <- dims if (accum > 1) { - accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data + data_sum_x[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data } - - return(accum_result) - + return(data_sum_x) } + atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, handle_infinity, cross_validation, param_error, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm') { @@ -576,7 +639,7 @@ atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_peri if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + } else if (var(data, na.rm = T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R new file mode 100644 index 0000000..0a9a6bd --- /dev/null +++ b/tests/testthat/test-PeriodSPEI.R @@ -0,0 +1,113 @@ +context("CSIndicators::PeriodSPEI tests") + +############################################## +# cube1 +cube1 <- NULL +cube1$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube1) <- 's2dv_cube' + +# dat1 +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) +dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) + +set.seed(1) +exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +set.seed(2) +exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +set.seed(3) +exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) + +set.seed(1) +expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +set.seed(2) +expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +set.seed(3) +expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) + +dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) + +res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates) +source("/esarchive/scratch/erifarov/git/csindicators/R/PeriodSPEI.R") + +############################################## + +test_that("1. Initial checks CST_PeriodSPEI", { + expect_error( + CST_PeriodSPEI(exp = NULL), + "Parameter 'exp' cannot be NULL." + ) + expect_error( + CST_PeriodSPEI(exp = array(10)), + "Parameter 'exp' must be a list of 's2dv_cube' class." + ) + expect_error( + CST_PeriodSPEI(exp = list(cube1)), + paste0("Spatial coordinate names of parameter 'exp' do not match any ", + "of the names accepted by the package.") + ) +}) + +############################################## + +test_that("1. Initial checks PeriodSPEI", { + # exp + expect_error( + PeriodSPEI(exp = NULL), + "Parameter 'exp' needs to be a named list with the needed variables." + ) + expect_error( + PeriodSPEI(exp = list(1)), + "Parameter 'exp' needs to be a named list with the variable names." + ) + expect_error( + PeriodSPEI(exp = list('tasmax' = array(10))), + "Parameter 'exp' needs to be a list of arrays with dimension names." + ) + # exp_cor + expect_error( + PeriodSPEI(exp = exp1, exp_cor = 1), + paste0("Parameter 'exp_cor' needs to be a named list with the needed ", + "variables if it is not NULL.") + ) + expect_error( + PeriodSPEI(exp = exp1, exp_cor = list(1)), + "Parameter 'exp_cor' needs to be a named list with the variable names." + ) + expect_error( + PeriodSPEI(exp = exp1, exp_cor = list('tasmax' = array(10))), + "Parameter 'exp_cor' needs to be a list of arrays with dimension names." + ) + expect_error( + PeriodSPEI(exp = list('tas' = array(10, dim = c(time = 10))), + exp_cor = list('tos' = array(10, dim = c(time = 10)))), + "Parameter 'exp_cor' needs to have the same variable names as 'exp'." + ) + # exp (2) + expect_warning( + PeriodSPEI(exp = exp1, pet_method = '1', dates = dates, lat = lat), + paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves'.") + ) + # time_dim + expect_error( + PeriodSPEI(exp = exp1, ) + ) + # leadtime_dim + + # memb_dim + + # lat_dim + + # accum + +}) + +############################################## + -- GitLab From 677219da70aa26feb38d948a24dae8e2d95bf604 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 1 Jun 2023 16:25:12 +0200 Subject: [PATCH 07/42] Improve code --- R/PeriodSPEI.R | 585 ++++++++++++++++++++++++------------------------- 1 file changed, 290 insertions(+), 295 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 7d419d0..2eec05f 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -6,7 +6,7 @@ #'respect to normal conditions in a variety of natural and managed systems such #'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated #'using monthly (or weekly) precipitation as the input data. The SPEI uses the -#'monthly (or weekly) difference between precipitation and PET. This represents +#'monthly (or weekly) difference between precipitation and pet. This represents #'a simple climatic water balance which is calculated at different time scales #'to obtain the SPEI. #' @@ -59,8 +59,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = 'hargreaves', + accum = 1, start = NULL, end = NULL, + ref_period = NULL, pet_method = 'hargreaves', standardization = TRUE, params = NULL, # not used if standardization is FALSE param_error = -9999, # not used if standardization is FALSE @@ -68,7 +68,7 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, cross_validation = FALSE, # not used if standardization is FALSE method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', # not used if standardization is FALSE - ncores = 4) { + ncores = NULL) { # Check 's2dv_cube' if (is.null(exp)) { @@ -96,9 +96,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, dates = exp[[1]]$attrs$Dates, lat = exp[[1]]$coords[[lat_name]], exp_cor = lapply(exp_cor, function(x) x$data), - pet = pet, + pet = pet, ref_period = ref_period, time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, lat_dim = lat_dim, + memb_dim = memb_dim, lat_dim = lat_dim, accum = accum, start = start, end = end, pet_method = pet_method, standardization = standardization, params = params, param_error = param_error, @@ -116,7 +116,7 @@ PeriodSPEI <- function(exp, dates, lat, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = 'hargreaves', + pet_method = 'hargreaves', ref_period = NULL, standardization = TRUE, params = NULL, param_error = -9999, handle_infinity = FALSE, cross_validation = FALSE, @@ -169,14 +169,14 @@ PeriodSPEI <- function(exp, dates, lat, ## exp (2) if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + if (!(pet_method[1] %in% c('hargreaves_modified', 'hargreaves'))) { warning("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves'.") pet_method[1] <- 'hargreaves' } } else if (all(c('tas', 'prlr') %in% names(exp))) { # thornthwaite: 'tas' (mean), 'lat' - if (!any(pet_method != 'thornthwaite')) { + if (!(pet_method[1] == 'thornthwaite')) { warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") pet_method[1] <- 'thornthwaite' } @@ -196,14 +196,14 @@ PeriodSPEI <- function(exp, dates, lat, } if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp_cor))) { # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + if (!(pet_method[2] %in% c('hargreaves_modified', 'hargreaves'))) { warning("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves'.") pet_method[2] <- 'hargreaves' } } else if (all(c('tas', 'prlr') %in% names(exp_cor))) { # thornthwaite: 'tas' (mean), 'lat' - if (!any(pet_method != 'thornthwaite')) { + if (!(pet_method[2] == 'thornthwaite')) { warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") pet_method[2] <- 'thornthwaite' } @@ -334,31 +334,31 @@ PeriodSPEI <- function(exp, dates, lat, pet <- evapotranspiration(data = data, dates_monthly = dates_monthly, lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, lat_dim = lat_dim, - ncores = ncores) + lat_dim = lat_dim, ncores = ncores) computed_pet <- TRUE } # Accumulation - diff_P_PET <- data$prlr - pet - data_accum <- accumulation(diff_P_PET = diff_P_PET, + diff_p_pet <- data$prlr - pet + data_accum <- accumulation(data = diff_p_pet, dates_monthly = dates_monthly, accum = accum, - time_dim = time_dim, - leadtime_dim = leadtime_dim, - memb_dim = memb_dim, ncores = ncores) + time_dim = time_dim, leadtime_dim = leadtime_dim, + ncores = ncores) # Standardization: if (standardization == TRUE) { - spei_dat <- spei_standardization(data_accum = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - ncores = ncores, - accum = accum, - param_error = param_error, - params = params, method = method, - distribution = distribution, fit = fit) + spei_dat <- spei_standardization(data = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + ref_period = ref_period, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + ncores = ncores, + accum = accum, + param_error = param_error, + params = params, method = method, + distribution = distribution, + fit = fit) params <- spei_dat$params } else { spei_dat <- data_accum @@ -395,32 +395,29 @@ PeriodSPEI <- function(exp, dates, lat, } - +#------------------------------------------------------------------------------- evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', - time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - lat_dim = 'latitude', - ncores = 4) { + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', ncores = NULL) { - ## corrected:: lat_mask <- array(lat, dim = c(1, length(lat))) names(dim(lat_mask)) <- c('dat', lat_dim) # extract mask of NA locations to return to NA the final result - - mask_NA <- array(1, dim = dim(data[[1]])) + mask_na <- array(1, dim = dim(data[[1]])) if (pet_method == 'hargreaves') { varnames <- c('tasmax', 'tasmin') - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 } else if (pet_method == 'hargreaves_modified') { varnames <- c('tasmax', 'tasmin', 'prlr') - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - mask_NA[which(is.na(data$prlr))] <- 0 + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 + mask_na[which(is.na(data$prlr))] <- 0 } else if (pet_method == 'thornthwaite') { varnames <- c('tas') - mask_NA[which(is.na(data$tas))] <- 0 + mask_na[which(is.na(data$tas))] <- 0 } # replace NA with 0 @@ -433,7 +430,7 @@ evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreave # file <- tempfile() # file = 'out.txt' # sink(file) - PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + pet_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, @@ -442,91 +439,18 @@ evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreave fun = atomic_pet, ncores = ncores)$output1 # sink(NULL) # captured_message <- readLines(file) - # print(paste0('Length captured!!', length(captured_message))) - # print(paste0('unique(captured_message)', length(unique(captured_message)))) - - # reorder dims in PET_estimated - pos <- match(names(dim(data[[1]])),names(dim(PET_estimated))) - PET_estimated <- aperm(PET_estimated, pos) - - # restore original NAs from mask_NA - PET_estimated[which(mask_NA == 0 )] <- NA - return(PET_estimated) + # reorder dims in pet_estimated + pos <- match(names(dim(data[[1]])),names(dim(pet_estimated))) + pet_estimated <- aperm(pet_estimated, pos) -} - -accumulation <- function(diff_P_PET, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time', - memb_dim = 'ensemble', ncores = 4) { - - accum_result <- Apply(data = list(diff_P_PET), - target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, - accum = accum, - output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = atomic_accum, ncores = ncores) - - # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): - for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)) { - if(!d %in% names(dim(accum_result$output1))){ - accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) - } - } - - # reorder dims in accum_result - dims_order <- array(NA, length(dim(diff_P_PET))) - for (ord in 1:length(dim(diff_P_PET))){ - dims_order[ord] <- which(names(dim(accum_result$output1)) == names(dim(diff_P_PET))[ord]) - } - data_accum <- aperm(accum_result$output1, dims_order) + # restore original NAs from mask_na + pet_estimated[which(mask_na == 0 )] <- NA - return(data_accum) + return(pet_estimated) } -spei_standardization <- function(data_accum, - leadtime_dim, time_dim, memb_dim, handle_infinity, - cross_validation, ncores, accum, param_error, - params = NULL, method ='parametric', - distribution = 'log-Logistic', fit='ub-pwm') { - - n_leadtimes <- dim(data_accum)[leadtime_dim] - n_sdates_params <- dim(data_accum)[time_dim] - - if (!cross_validation) { - n_sdates_params <- 1 - } - - if (is.null(params)) { - params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) - names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') - } else if (length(dim(params)) < 2) { - params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) - params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim - names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') - } - - spei <- Apply(data = list(data = data_accum, params = params), - target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), - output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), - leadtime_dim = leadtime_dim, - time_dim = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = ncores) - - return(spei) # spei is a list of data_spei and params -} - atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, pet_method, dates_monthly, leadtime_dim, time_dim) { @@ -575,30 +499,52 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } if (pet_method == 'hargreaves') { # NOTE EVA: if NA.RM is FALSE this gives error - x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data - x_PET <- x_PET[which(dates_monthly == 1)] - PET <- array(x_PET, dim = dims) + x_pet <- x_pet[which(dates_monthly == 1)] + pet <- array(x_pet, dim = dims) } if (pet_method == 'hargreaves_modified') { - x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE)) - x_PET <- x_PET[which(dates_monthly == 1)] - PET <- array(x_PET, dim = dims) + x_pet <- x_pet[which(dates_monthly == 1)] + pet <- array(x_pet, dim = dims) } if (pet_method == 'thornthwaite') { - x_PET <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) + x_pet <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data - x_PET <- x_PET[which(dates_monthly == 1)] - PET <- array(x_PET, dim = dims) + x_pet <- x_pet[which(dates_monthly == 1)] + pet <- array(x_pet, dim = dims) } - return(PET) + return(pet) } -atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { +#------------------------------------------------------------------------------- + +accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + ncores = NULL) { + + accum_result <- Apply(data = list(data), + target_dims = list(data = c(leadtime_dim, time_dim)), + dates_monthly = dates_monthly, + accum = accum, + output_dims = c(leadtime_dim, time_dim), + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_accum, ncores = ncores)$output1 + + pos <- match(names(dim(accum_result)), names(dim(data))) + data_accum <- aperm(accum_result, pos) + + return(data_accum) + +} + +atomic_accum <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time') { # data:[time, syear] dims <- dim(data) @@ -606,29 +552,90 @@ atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { data_vector <- array(0, dim = length(dates_monthly)) count <- 1 for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1){ + if (dates_monthly[dd] == 1) { data_vector[dd] <- as.vector(data)[count] count <- count + 1 } } - # Accumulation at different timescales - data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added - # (depends on accu) and so that the position in the vector corresponds to the accumulated - # of the previous months (instead of the accumulated of the next months) - data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data - dim(data_sum_x) <- dims + # rollapply {zoo} A generic function for applying a function to rolling margins of an array. + data_sum_x <- rollapply(data_vector, accum, sum) + # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) + # discard the months that don't appear in the original data + data_sum_x <- data_sum_x[which(dates_monthly == 1)] + accum_result <- array(data_sum_x, dim = c(dims)) + # replace by NA when the accumulation corresponds to months that where not present in the original data if (accum > 1) { - data_sum_x[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data + accum_result[1:(accum-1), ] <- NA } - return(data_sum_x) + return(accum_result) } -atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, - handle_infinity, cross_validation, param_error, method = 'parametric', +#------------------------------------------------------------------------------- + + +spei_standardization <- function(data, accum = 1, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, + handle_infinity = FALSE, + cross_validation = FALSE, + param_error = -9999, params = NULL, + method = 'parametric', + distribution = 'log-Logistic', + fit = 'ub-pwm', + ncores = NULL) { + + n_leadtimes <- dim(data)[leadtime_dim] + n_sdates_params <- dim(data)[time_dim] + + if (!cross_validation) { + n_sdates_params <- 1 + } + + if (is.null(params)) { + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) # hardcoded + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = ref_period, + param_error = param_error, + fun = atomic_spei, + ncores = ncores) + + return(spei) # spei is a list of data_spei and params +} + + + + + +atomic_spei <- function(data, params, leadtime_dim = 'time', time_dim = 'syear', + memb_dim = 'ensemble', ref_period = NULL, + handle_infinity = FALSE, cross_validation = FALSE, + param_error = -9999, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm') { - # data: [time, sdate, memb] + + # data: [leadtime_dim, time_dim, memb_dim] + # params: [time_dim, leadtime_dim, 'coef'] + if (is.null(ref_period)) { ref.start <- NULL ref.end <- NULL @@ -639,99 +646,83 @@ atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_peri if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm = T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, - # so we assign a value to the result and skip the step + # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step + } else if (var(data, na.rm = T) == 0) { speiX <- array(param_error, dim(data)) + return(speiX) } else { - c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, - method = method, distribution = distribution, fit = fit, - leadtime_dim = leadtime_dim, time_dim = time_dim, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + n_coef_max <- dim(params)['coef'] + ftime <- as.numeric(dim(data)[leadtime_dim]) + n_sdates <- as.numeric(dim(data)[time_dim]) + nmemb <- as.numeric(dim(data)[memb_dim]) + + spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) + if (cross_validation) { + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } + for (ff in 1:ftime) { # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(data, along = leadtime_dim, + indices = ff, drop = FALSE) + params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} + + c(x_spei_mod, params_ff) %<-% spX_ftime(data = Accum_D_temp, na.rm = TRUE, method = method, + distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, + cross_validation = cross_validation) + spei_mod[ff, , ] <- x_spei_mod + coef_names <- names(params_ff) + # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + if (length(params_ff) < n_coef_max) { + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[, ff, ] <- params_ff + } + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') + return(list(spei = spei_mod, params = params_result)) } - - return(list(speiX, params)) - } #------------------------------------------------------------------------------- ### Support functions -spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, - method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', - leadtime_dim, time_dim, dim_memb, handle_infinity, cross_validation){ - - # acc_data: [ftime, syear, nmemb] - # params: [syear, ftime, coef] - - n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions - - ftime <- as.numeric(dim(acc_data)[leadtime_dim]) - n_sdates <- as.numeric(dim(acc_data)[time_dim]) - nmemb <- as.numeric(dim(acc_data)[dim_memb]) - - spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(leadtime_dim, time_dim, dim_memb) - if (cross_validation) { - params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) - } else { - params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) - } - - for (ff in 1:ftime) { # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = leadtime_dim, indices = ff, drop = FALSE) - params_tmp <- if (all(is.na(params))) {NULL} else {params[,ff,]} - c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, - fit = fit, ref.start = ref.start, ref.end = ref.end, - params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, - handle_infinity = handle_infinity, cross_validation = cross_validation) - spei_mod[ff,,] <- x_spei_mod - coef_names <- names(params_ff) - if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months - params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) - coef_names <- append(coef_names, '') - } - params_result[,ff,] <- params_ff - } +spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', + na.rm = TRUE, method = 'parametric', n_sdates, nmemb, + ref.start = NULL, ref.end = NULL, params = NULL, + handle_infinity = FALSE, cross_validation = FALSE) { - colnames(params_result) <- coef_names - names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - - return(list(spei = spei_mod, params = params_result)) -} + # data: [leadtime_dim = 1, time_dim, memb_dim] -#------------------------------------------------------------------------------- + if (anyNA(data) && na.rm == FALSE) { + stop('Error: Data must not contain NAs') # TO DO: return something? + } -# HandleInfinity: -spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, - ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { - - # data:[ftime = 1, syear, ensemble] - if (method == 'non-parametric') { - - if (anyNA(data) && na.rm == FALSE) { - stop('Error: Data must not contain NAs') - } - + bp = matrix(0, length(data), 1) for (i in 1:length(data)) { - bp[i,1] = sum(data[] <= data[i]); # Writes the rank of the data + bp[i,1] = sum(data[] <= data[i], na.rm = na.rm); # Writes the rank of the data } - - SPEI = qnorm((bp-0.44)/(length(data)+0.12)) + SPEI = qnorm((bp - 0.44)/(length(data) + 0.12)) return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { std_index <- array(NA, c(n_sdates, nmemb)) - + if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') } - coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), @@ -740,11 +731,11 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm dim_one <- length(coef) if (!is.null(params)) { - if (length(params) != dim_one) { + if (length(params)!=dim_one) { stop(paste0('parameters array should have dimensions [', dim_one, ']')) } } - + # Select window if necessary if (!is.null(ref.start) && !is.null(ref.end)) { data.fit <- window(data,ref.start,ref.end) @@ -752,107 +743,111 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm data.fit <- data } - if (cross_validation == "TRUE") { + if (cross_validation) { loop_years <- n_sdates } else { loop_years <- 1 } + params_result <- array(NA, dim = c(loop_years, dim_one)) colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) - if (cross_validation == TRUE){ + if (cross_validation) { acu <- as.vector(data.fit[,-nsd,]) } else { acu <- as.vector(data.fit) } acu.sorted <- sort.default(acu, method = "quick") - acu.sorted <- acu.sorted[!is.na(acu.sorted)] # remove NAs (no need if(na.rm) because if there are NA and na.rm=F we don't get to this point) - if (length(acu.sorted)!=0){ # else all acu was NA and we don't need to continue with this case + # remove NAs (no need if(na.rm) because if there are NA and na.rm = F we don't get to this point) + acu.sorted <- acu.sorted[!is.na(acu.sorted)] + # else all acu was NA and we don't need to continue with this case + if (length(acu.sorted) != 0) { acu_sd = sd(acu.sorted) - if (!is.na(acu_sd)){ - if (acu_sd != 0){ - if(distribution != "log-Logistic") { - pze <- sum(acu==0)/length(acu) - acu.sorted = acu.sorted[acu.sorted > 0] - } - if (!is.null(params)) { - f_params = as.vector(params) - params_result[nsd,] <- f_params - } else { - if (length(acu.sorted) >= 4){ # else coef will be NA - # Calculate probability weighted moments based on fit with lmomco or TLMoments - pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), - pwm.ub(acu.sorted, nmom=3) - #TLMoments::PWM(acu.sorted, order=0:2) - ) - - # Check L-moments validity - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))){ - - # lmom fortran functions need specific inputs L1, L2, T3 - # this is handled by lmomco internally with lmorph - fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - - # Calculate parameters based on distribution with lmom then lmomco - f_params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), error = function(e){ parglo(lmom)$para }), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), error = function(e){ pargam(lmom)$para }), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){ parpe3(lmom)$para }) - ) - - # Adjust if user chose log-Logistic and max-lik - if(distribution == 'log-Logistic' && fit == 'max-lik'){ - f_params = parglo.maxlik(acu.sorted, f_params)$para - } - params_result[nsd,] <- f_params - } # end if dor the case the L-moments are not valid (std_index will be NA) - } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) - } # end estimation of f_param - # Calculate cdf based on distribution with lmom - if (all(is.na(params_result[nsd,]))){ - cdf_res <- NA - } else { - f_params <- params_result[nsd,] - f_params <- f_params[which(!is.na(f_params))] - cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data, f_params), - "Gamma" = lmom::cdfgam(data, f_params), - "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - } - - std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) - - # Adjust if user chose Gamma or PearsonIII - Not tested: For future development - #if(distribution != 'log-Logistic'){ - # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point - #} - if (cross_validation == TRUE){ - std_index[nsd,] <- std_index_cv[nsd,] - } else { - std_index <- std_index_cv + if (!is.na(acu_sd)) { + if (acu_sd != 0) { + if (distribution != "log-Logistic") { + pze <- sum(acu == 0) / length(acu) + acu.sorted = acu.sorted[acu.sorted > 0] + } + if (!is.null(params)) { + f_params = as.vector(params) + params_result[nsd, ] <- f_params + } else { + # else coef will be NA + if (length(acu.sorted) >= 4) { + # Calculate probability weighted moments based on fit with lmomco or TLMoments + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), + pwm.ub(acu.sorted, nmom=3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) + + # Check L-moments validity + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + # lmom fortran functions need specific inputs L1, L2, T3 + # this is handled by lmomco internally with lmorph + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + + # Calculate parameters based on distribution with lmom then lmomco + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){ parglo(lmom)$para }), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){ pargam(lmom)$para }), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){ parpe3(lmom)$para })) + + # Adjust if user chose log-Logistic and max-lik + if (distribution == 'log-Logistic' && fit == 'max-lik') { + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + params_result[nsd, ] <- f_params + } # end if dor the case the L-moments are not valid (std_index will be NA) + } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) + } # end estimation of f_param + # Calculate cdf based on distribution with lmom + if (all(is.na(params_result[nsd,]))) { + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data, f_params), + "Gamma" = lmom::cdfgam(data, f_params), + "PearsonIII" = lmom::cdfpe3(data, f_params)) + } + + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) + + # Adjust if user chose Gamma or PearsonIII - Not tested: For future development + # if (distribution != 'log-Logistic') { + # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point + # } + if (cross_validation) { + std_index[nsd, ] <- std_index_cv[nsd, ] + } else { + std_index <- std_index_cv + } } - } - } # end if for the case there is no variability - } # end if for the case all NA in acu - } # next year (in case of cross_validation or all done if cross_validation == F) + } # end if for the case there is no variability + } # end if for the case all NA in acu + } # next year (in case of cross_validation or all done if cross_validation == F) - if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point + if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE - # (otherwise there will be one f_params per year; - # but the output params will be read only in the case that - # it is called with cross_validation FALSE) + # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + return(list(std_index = std_index, params = params_result)) - } # end parametric - + } } +#------------------------------------------------------------------------------- -- GitLab From 20d7a12e3b2a3bf489b677f0808a6a3349fb3dee Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 9 Jun 2023 17:21:21 +0200 Subject: [PATCH 08/42] Add documentation and improve code --- R/PeriodSPEI.R | 704 ++++++++++++++++++------------- R/zzz.R | 8 + tests/testthat/test-PeriodSPEI.R | 33 +- 3 files changed, 460 insertions(+), 285 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 2eec05f..552c0d2 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,7 +1,7 @@ #'Compute the Standardised Precipitation-Evapotranspiration Index #' -#'The Standardised Precipitation-Evapotranspiration Index (SPEI) is a -#'multiscalar drought index based on climatic data. It can be used for +#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +#'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with #'respect to normal conditions in a variety of natural and managed systems such #'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated @@ -11,7 +11,14 @@ #'to obtain the SPEI. #' #'@param exp A named list with the needed \code{s2dv_cube} objects containing -#' the seasonal forecast experiment in the data element for each variable. +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +#' Variable 'prlr' is always needed. The units for temperature variables +#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +#' precipitation ('prlr') need to be in mm/month. #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. @@ -29,46 +36,85 @@ #' dimension. By default it is set by 'latitude'. #'@param accum An integer value indicating the number of months for the #' accumulation for each variable. -#'@param start -#'@param end -#'@param pet_method -#'@param standardization -#'@param params -#'@param param_error -#'@param handle_infinity -#'@param cross_validation -#'@param method -#'@param distribution -#'@param fit -#'@param ncores +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params A multidimensional array with named dimensions for computing the +#' SPEI. This option overrides computation of fitting parameters. It needs +#' to be of same leadtime and time dimensions of exp and a dimension named +#' 'coef' with the length of the coefficients needed for the used distribution +#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +#' dimension must be of length 1. +#'@param standardization A logical value indicating wether the standardization +#' is computed. +#'@param cross_validation A logical value indicating if cross validation is +#' done (TRUE), or not (FALSE). It is FALSE by default. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tasmin and tasmax; and 'thornthwaite', that requires variable +#' 'tas'. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +#' default. +#'@param fit A character string indicating the name of the method used for +#' computing the distribution function parameters The accepteed names are: +#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. #' #'@examples -#'exp <- 1 +#'# Test random data +#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#' latitude = 2, longitude = 1, ensemble = 25) +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#' latitude = 2, longitude = 1, ensemble = 15) +#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) #' -# '@import s2dv -# '@import multiApply -# '@import ClimProjDiags -# '@import zeallot -# '@import SPEI -# '@import zoo -# '@import TLMoments -# '@import lmomco -# '@import lubridate -# '@import CSTools -# '@export +#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +#' +#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16"))) +#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#' +#'lat <- c(40,40.1) +#' +#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, +#' 'prlr' = expcor_prlr) +#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#' +#'@import multiApply +#'@import ClimProjDiags +#'@import SPEI +#'@import zoo +#'@import TLMoments +#'@import lmomco +#'@import lubridate +#'@export CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - ref_period = NULL, pet_method = 'hargreaves', - standardization = TRUE, - params = NULL, # not used if standardization is FALSE - param_error = -9999, # not used if standardization is FALSE - handle_infinity = FALSE, # not used if standardization is FALSE - cross_validation = FALSE, # not used if standardization is FALSE - method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', # not used if standardization is FALSE - ncores = NULL) { + accum = 1, ref_period = NULL, params = NULL, + standardization = TRUE, cross_validation = FALSE, + pet_method = 'hargreaves', method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm', + param_error = -9999, handle_infinity = FALSE, + na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(exp)) { @@ -83,45 +129,148 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, } } - .KnownLatNames <- CSTools:::.KnownLatNames() - - if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { + if (!any(names(exp[[1]]$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") } - lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), dates = exp[[1]]$attrs$Dates, lat = exp[[1]]$coords[[lat_name]], exp_cor = lapply(exp_cor, function(x) x$data), - pet = pet, ref_period = ref_period, - time_dim = time_dim, leadtime_dim = leadtime_dim, + pet = pet, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lat_dim = lat_dim, - accum = accum, start = start, end = end, - pet_method = pet_method, standardization = standardization, - params = params, param_error = param_error, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, - fit = fit, ncores = ncores) + accum = accum, ref_period = ref_period, params = params, + standardization = standardization, + cross_validation = cross_validation, + pet_method = pet_method, method = method, + distribution = distribution, fit = fit, + param_error = param_error, + handle_infinity = handle_infinity, na.rm = na.rm, + ncores = ncores) return(res) } - -PeriodSPEI <- function(exp, dates, lat, - exp_cor = NULL, pet = NULL, +#'Compute the Standardised Precipitation-Evapotranspiration Index +#' +#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +#'that is a multiscalar drought index based on climatic data. It can be used for +#'determining the onset, duration and magnitude of drought conditions with +#'respect to normal conditions in a variety of natural and managed systems such +#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +#'using monthly (or weekly) precipitation as the input data. The SPEI uses the +#'monthly (or weekly) difference between precipitation and pet. This represents +#'a simple climatic water balance which is calculated at different time scales +#'to obtain the SPEI. +#' +#'@param exp A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +#' Variable 'prlr' is always needed. The units for temperature variables +#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +#' precipitation ('prlr') need to be in mm/month. +#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each +#' variable in which the quantile PeriodSPEI should be applied. If it is not +#' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param pet A multidimensional array containing the Potential +#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +#' is NULL by default. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params A multidimensional array with named dimensions for computing the +#' SPEI. This option overrides computation of fitting parameters. It needs +#' to be of same leadtime and time dimensions of exp and a dimension named +#' 'coef' with the length of the coefficients needed for the used distribution +#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +#' dimension must be of length 1. +#'@param standardization A logical value indicating wether the standardization +#' is computed. +#'@param cross_validation A logical value indicating if cross validation is +#' done (TRUE), or not (FALSE). It is FALSE by default. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tasmin and tasmax; and 'thornthwaite', that requires variable +#' 'tas'. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +#' default. +#'@param fit A character string indicating the name of the method used for +#' computing the distribution function parameters The accepteed names are: +#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'# Test random data +#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#' latitude = 2, longitude = 1, ensemble = 25) +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#' latitude = 2, longitude = 1, ensemble = 15) +#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) +#' +#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +#' +#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16"))) +#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#' +#'lat <- c(40,40.1) +#' +#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, +#' 'prlr' = expcor_prlr) +#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#' +#'@import multiApply +#'@import ClimProjDiags +#'@import SPEI +#'@import zoo +#'@import TLMoments +#'@import lmomco +#'@import lubridate +#'@export +PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = 'hargreaves', ref_period = NULL, - standardization = TRUE, - params = NULL, param_error = -9999, - handle_infinity = FALSE, cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', ncores = 4) { + accum = 1, ref_period = NULL, params = NULL, + standardization = TRUE, cross_validation = FALSE, + pet_method = 'hargreaves', method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm', + param_error = -9999, handle_infinity = FALSE, + na.rm = FALSE, ncores = NULL) { # Initial checks ## exp @@ -297,7 +446,7 @@ PeriodSPEI <- function(exp, dates, lat, } ## fit if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik".') } ## ncores @@ -316,9 +465,11 @@ PeriodSPEI <- function(exp, dates, lat, dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim=length(dates_complete_monthly)) + dates_monthly <- array(0, dim = length(dates_complete_monthly)) for (dd in 1:length(dates)) { - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) dates_monthly[ii] <- 1 } @@ -331,34 +482,31 @@ PeriodSPEI <- function(exp, dates, lat, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet) | computed_pet) { - pet <- evapotranspiration(data = data, dates_monthly = dates_monthly, - lat = lat, pet_method = pet_method[k], - time_dim = time_dim, leadtime_dim = leadtime_dim, - lat_dim = lat_dim, ncores = ncores) + pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly, + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, leadtime_dim = leadtime_dim, + lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) computed_pet <- TRUE } # Accumulation diff_p_pet <- data$prlr - pet - data_accum <- accumulation(data = diff_p_pet, - dates_monthly = dates_monthly, accum = accum, - time_dim = time_dim, leadtime_dim = leadtime_dim, - ncores = ncores) + data_accum <- .Accumulation(data = diff_p_pet, + dates_monthly = dates_monthly, accum = accum, + time_dim = time_dim, leadtime_dim = leadtime_dim, + ncores = ncores) # Standardization: - if (standardization == TRUE) { - spei_dat <- spei_standardization(data = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - ref_period = ref_period, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - ncores = ncores, - accum = accum, - param_error = param_error, - params = params, method = method, - distribution = distribution, - fit = fit) + if (standardization) { + spei_dat <- .Standardization(data = data_accum, params = params, + accum = accum, time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + param_error = param_error, + method = method, distribution = distribution, + fit = fit, ncores = ncores) params <- spei_dat$params } else { spei_dat <- data_accum @@ -380,9 +528,7 @@ PeriodSPEI <- function(exp, dates, lat, names(spei_res) <- c('exp', 'exp_cor') } } - return(spei_res) - } @@ -395,11 +541,10 @@ PeriodSPEI <- function(exp, dates, lat, } -#------------------------------------------------------------------------------- - -evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', - time_dim = 'syear', leadtime_dim = 'time', - lat_dim = 'latitude', ncores = NULL) { +.Evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { lat_mask <- array(lat, dim = c(1, length(lat))) names(dim(lat_mask)) <- c('dat', lat_dim) @@ -428,31 +573,33 @@ evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreave # prepare data target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) # file <- tempfile() - # file = 'out.txt' # sink(file) - pet_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), - target_dims = c(list(lat_mask = 'dat'), target_dims_data), - output_dims = c(leadtime_dim, time_dim), - pet_method = pet_method, - dates_monthly = dates_monthly, - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = atomic_pet, ncores = ncores)$output1 - # sink(NULL) + # print(varnames) + # print(is.na(data['tasmax'])) + # print(is.na(data['tasmin'])) + pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), + fun = .evapotranspiration, + dates_monthly = dates_monthly, pet_method = pet_method, + leadtime_dim = leadtime_dim, time_dim = time_dim, + output_dims = c(leadtime_dim, time_dim), + ncores = ncores)$output1 + # sink() # captured_message <- readLines(file) - + # print(unique(captured_message)) # reorder dims in pet_estimated - pos <- match(names(dim(data[[1]])),names(dim(pet_estimated))) - pet_estimated <- aperm(pet_estimated, pos) + pos <- match(names(dim(data[[1]])), names(dim(pet))) + pet <- aperm(pet, pos) # restore original NAs from mask_na - pet_estimated[which(mask_na == 0 )] <- NA - - return(pet_estimated) + pet[which(mask_na == 0 )] <- NA + return(pet) } -atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, - pet_method, dates_monthly, leadtime_dim, time_dim) { +.evapotranspiration <- function(lat_mask, data2, data3 = NULL, data4 = NULL, + dates_monthly, pet_method = 'hargreaves', + leadtime_dim = 'time', time_dim = 'syear') { dims <- dim(data2) @@ -460,7 +607,7 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL # of the considered period # (starting in January of the first year) so that the solar radiation # estimation is computed in each case for the correct month - + any(is.na(data2)) if (!is.null(data2)) { data_tmp <- as.vector(data2) data2 <- array(0, dim = length(dates_monthly)) @@ -473,6 +620,7 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } rm(data_tmp) } + any(is.na(data3)) if (!is.null(data3)) { data_tmp <- as.vector(data3) data3 <- array(0, dim = length(dates_monthly)) @@ -499,52 +647,58 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } if (pet_method == 'hargreaves') { # NOTE EVA: if NA.RM is FALSE this gives error - x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = TRUE)) + # print('hi') + # print(any(is.na(data3))) + # print('he') + # print(any(is.na(data2))) + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = na.rm, verbose = FALSE) # line to return the vector to the size of the actual original data - x_pet <- x_pet[which(dates_monthly == 1)] - pet <- array(x_pet, dim = dims) + pet <- pet[which(dates_monthly == 1)] + pet <- array(pet, dim = dims) } if (pet_method == 'hargreaves_modified') { - x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE)) - x_pet <- x_pet[which(dates_monthly == 1)] - pet <- array(x_pet, dim = dims) + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = na.rm, + verbose = FALSE) + pet <- pet[which(dates_monthly == 1)] + pet <- array(pet, dim = dims) } if (pet_method == 'thornthwaite') { - x_pet <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) + pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = na.rm, + verbose = FALSE) # line to return the vector to the size of the actual original data - x_pet <- x_pet[which(dates_monthly == 1)] - pet <- array(x_pet, dim = dims) + pet <- pet[which(dates_monthly == 1)] + pet <- array(pet, dim = dims) } return(pet) } -#------------------------------------------------------------------------------- -accumulation <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time', - ncores = NULL) { +.Accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + ncores = NULL) { - accum_result <- Apply(data = list(data), - target_dims = list(data = c(leadtime_dim, time_dim)), - dates_monthly = dates_monthly, - accum = accum, - output_dims = c(leadtime_dim, time_dim), - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = atomic_accum, ncores = ncores)$output1 + accumulated <- Apply(data = list(data), + target_dims = list(data = c(leadtime_dim, time_dim)), + dates_monthly = dates_monthly, + accum = accum, + output_dims = c(leadtime_dim, time_dim), + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = .accumulation, + ncores = ncores)$output1 - pos <- match(names(dim(accum_result)), names(dim(data))) - data_accum <- aperm(accum_result, pos) + pos <- match(names(dim(accumulated)), names(dim(data))) + accumulated <- aperm(accumulated, pos) - return(data_accum) + return(accumulated) } -atomic_accum <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time') { +.accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time') { # data:[time, syear] dims <- dim(data) @@ -558,215 +712,205 @@ atomic_accum <- function(data, dates_monthly, accum = 1, } } # Accumulation at different timescales - # rollapply {zoo} A generic function for applying a function to rolling margins of an array. + # rollapply {zoo} A generic function for applying a function to rolling + # margins of an array. data_sum_x <- rollapply(data_vector, accum, sum) - # adds as many NAs as needed at the begining to account for the months that cannot be added - # (depends on accu) and so that the position in the vector corresponds to the accumulated - # of the previous months (instead of the accumulated of the next months) + # adds as many NAs as needed at the begining to account for the months that + # cannot be added (depends on accu) and so that the position in the vector + # corresponds to the accumulated of the previous months (instead of the + # accumulated of the next months) data_sum_x <- c(rep(NA, accum-1), data_sum_x) # discard the months that don't appear in the original data data_sum_x <- data_sum_x[which(dates_monthly == 1)] accum_result <- array(data_sum_x, dim = c(dims)) - # replace by NA when the accumulation corresponds to months that where not present in the original data + # replace by NA when the accumulation corresponds to months that where not + # present in the original data if (accum > 1) { accum_result[1:(accum-1), ] <- NA } return(accum_result) } -#------------------------------------------------------------------------------- +.Standardization <- function(data, params = NULL, accum = 1, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, cross_validation = FALSE, + handle_infinity = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', na.rm = FALSE, ncores = NULL) { - -spei_standardization <- function(data, accum = 1, time_dim = 'syear', - leadtime_dim = 'time', memb_dim = 'ensemble', - ref_period = NULL, - handle_infinity = FALSE, - cross_validation = FALSE, - param_error = -9999, params = NULL, - method = 'parametric', - distribution = 'log-Logistic', - fit = 'ub-pwm', - ncores = NULL) { - - n_leadtimes <- dim(data)[leadtime_dim] - n_sdates_params <- dim(data)[time_dim] + nleadtime <- dim(data)[leadtime_dim] + ntime <- dim(data)[time_dim] if (!cross_validation) { - n_sdates_params <- 1 + ntime <- 1 } + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + if (is.null(params)) { - params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) # hardcoded + params <- array(NA, dim = c(ntime, nleadtime, length(coef))) names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') } else if (length(dim(params)) < 2) { - params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) - params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + params <- array(params, dim = c(length(params), ntime, nleadtime)) + # dim(params): [time_dim, leadtime_dim, coef] + # with the values repeated each time_dim and leadtime_dim + params <- aperm(params, c(2,3,1)) names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else { + if (dim(params)['coef'] != length(coef)) { + stop(paste0("Params array should have 'coef' dimension of length ", + length(coef), ".")) + } } spei <- Apply(data = list(data = data, params = params), target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), - output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), + fun = .standardization, + coef = coef, leadtime_dim = leadtime_dim, time_dim = time_dim, memb_dim = memb_dim, handle_infinity = handle_infinity, cross_validation = cross_validation, method = method, distribution = distribution, fit = fit, - ref_period = ref_period, - param_error = param_error, - fun = atomic_spei, + ref_period = ref_period, param_error = param_error, + na.rm = na.rm, + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) return(spei) # spei is a list of data_spei and params } - - - - -atomic_spei <- function(data, params, leadtime_dim = 'time', time_dim = 'syear', - memb_dim = 'ensemble', ref_period = NULL, - handle_infinity = FALSE, cross_validation = FALSE, - param_error = -9999, method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm') { +.standardization <- function(data, params, coef, leadtime_dim = 'time', + time_dim = 'syear', memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + cross_validation = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] # params: [time_dim, leadtime_dim, 'coef'] - - if (is.null(ref_period)) { - ref.start <- NULL - ref.end <- NULL + + # maximum number of parameters needed to define any of the considered distributions + ncoef <- length(coef) + nleadtime <- as.numeric(dim(data)[leadtime_dim]) + ntime <- as.numeric(dim(data)[time_dim]) + nmemb <- as.numeric(dim(data)[memb_dim]) + + if (cross_validation) { + params_result <- array(data = NA, dim = c(ntime, nleadtime, ncoef)) } else { - ref.start <- ref_period[[1]] - ref.end <- ref_period[[2]] + params_result <- array(data = NA, dim = c(1, nleadtime, ncoef)) } + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') if (all(is.na(data))) { - speiX <- array(NA, dim(data)) - # if the data [time, sdate, memb] has no variability it will raise an error further down, - # so we assign a value to the result and skip the step - } else if (var(data, na.rm = T) == 0) { - speiX <- array(param_error, dim(data)) - return(speiX) + spei_mod <- array(NA, dim(data)) + # if the data [time, sdate, memb] has no variability it will raise an error + # further down, so we assign a value to the result and skip the step + } else if (any(is.na(data)) && !na.rm) { + spei_mod <- array(NA, dim(data)) + } else if (var(data, na.rm = T) == 0) { + spei_mod <- array(param_error, dim(data)) } else { - n_coef_max <- dim(params)['coef'] - ftime <- as.numeric(dim(data)[leadtime_dim]) - n_sdates <- as.numeric(dim(data)[time_dim]) - nmemb <- as.numeric(dim(data)[memb_dim]) - - spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) - if (cross_validation) { - params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + if (is.null(ref_period)) { + ref.start <- NULL + ref.end <- NULL } else { - params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + ref.start <- ref_period[[1]] + ref.end <- ref_period[[2]] } - for (ff in 1:ftime) { # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(data, along = leadtime_dim, - indices = ff, drop = FALSE) + + spei_mod <- array(data = NA, dim = c(nleadtime, ntime, nmemb)) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) + + for (ff in 1:nleadtime) { + data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, + indices = ff, drop = 'selected') params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} - - c(x_spei_mod, params_ff) %<-% spX_ftime(data = Accum_D_temp, na.rm = TRUE, method = method, - distribution = distribution, - fit = fit, ref.start = ref.start, ref.end = ref.end, - params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, - handle_infinity = handle_infinity, - cross_validation = cross_validation) - spei_mod[ff, , ] <- x_spei_mod - coef_names <- names(params_ff) - # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months - if (length(params_ff) < n_coef_max) { - params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) - coef_names <- append(coef_names, '') + + + spei_data <- .std(data = data_subset, coef = coef, + ntime = ntime, nmemb = nmemb, + method = method, + distribution = distribution, + fit = fit, na.rm = na.rm, + ref.start = ref.start, + ref.end = ref.end, + params = params_tmp, + handle_infinity = handle_infinity, + cross_validation = cross_validation) + spei_mod[ff, , ] <- spei_data[[1]] + params_ff <- spei_data[[2]] + # lengthen dimension coef of params_ff in case it doesn't match the + # corresponding dimension of parms_months + if (!is.null(params_ff)) { + if (length(params_ff) < ncoef) { + params_ff <- append(params_ff, array(NA, dim = ncoef - length(params_ff))) + } + params_result[, ff, ] <- params_ff } - params_result[, ff, ] <- params_ff } - colnames(params_result) <- coef_names - names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - return(list(spei = spei_mod, params = params_result)) } + return(list(spei = spei_mod, params = params_result)) } -#------------------------------------------------------------------------------- - -### Support functions - -spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', - na.rm = TRUE, method = 'parametric', n_sdates, nmemb, - ref.start = NULL, ref.end = NULL, params = NULL, - handle_infinity = FALSE, cross_validation = FALSE) { +.std <- function(data, coef, ntime, nmemb, method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = FALSE, + ref.start = NULL, ref.end = NULL, params = NULL, + handle_infinity = FALSE, cross_validation = FALSE) { - # data: [leadtime_dim = 1, time_dim, memb_dim] - - if (anyNA(data) && na.rm == FALSE) { - stop('Error: Data must not contain NAs') # TO DO: return something? - } + # data: [time_dim, memb_dim] + # params: NULL or [(ntime), coef] if (method == 'non-parametric') { - - bp = matrix(0, length(data), 1) + bp <- matrix(0, length(data), 1) for (i in 1:length(data)) { bp[i,1] = sum(data[] <= data[i], na.rm = na.rm); # Writes the rank of the data } - SPEI = qnorm((bp - 0.44)/(length(data) + 0.12)) - - return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation - + std_index <- qnorm((bp - 0.44)/(length(data) + 0.12)) + dim(std_index) <- c(ntime, nmemb) + # it won't return params to be used in exp_cor; also it is not using + # handle_infinity nor cross_validation + params_result <- NULL } else { - - std_index <- array(NA, c(n_sdates, nmemb)) - - if (anyNA(data) && na.rm == FALSE) { - stop('Error: Data must not contain NAs') - } - - coef = switch(distribution, - "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), - "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), - "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma')))) - - dim_one <- length(coef) - - if (!is.null(params)) { - if (length(params)!=dim_one) { - stop(paste0('parameters array should have dimensions [', dim_one, ']')) - } - } - + std_index <- array(NA, c(ntime, nmemb)) # Select window if necessary if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data,ref.start,ref.end) + data.fit <- window(data, ref.start, ref.end) } else { data.fit <- data } if (cross_validation) { - loop_years <- n_sdates + loop_years <- ntime } else { loop_years <- 1 } - - params_result <- array(NA, dim = c(loop_years, dim_one)) + params_result <- array(NA, dim = c(loop_years, length(coef))) colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) if (cross_validation) { - acu <- as.vector(data.fit[,-nsd,]) + acu <- as.vector(data.fit[-nsd, ]) } else { acu <- as.vector(data.fit) } acu.sorted <- sort.default(acu, method = "quick") - # remove NAs (no need if(na.rm) because if there are NA and na.rm = F we don't get to this point) + # remove NAs (no need if(na.rm) because if there are NA and na.rm = F + # we don't get to this point) acu.sorted <- acu.sorted[!is.na(acu.sorted)] # else all acu was NA and we don't need to continue with this case if (length(acu.sorted) != 0) { - acu_sd = sd(acu.sorted) + acu_sd <- sd(acu.sorted) if (!is.na(acu_sd)) { if (acu_sd != 0) { if (distribution != "log-Logistic") { @@ -774,17 +918,17 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', acu.sorted = acu.sorted[acu.sorted > 0] } if (!is.null(params)) { - f_params = as.vector(params) + f_params <- as.vector(params) params_result[nsd, ] <- f_params } else { # else coef will be NA if (length(acu.sorted) >= 4) { # Calculate probability weighted moments based on fit with lmomco or TLMoments pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), - pwm.ub(acu.sorted, nmom=3) - # TLMoments::PWM(acu.sorted, order = 0:2) - ) + "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), + pwm.ub(acu.sorted, nmom = 3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) # Check L-moments validity lmom <- pwm2lmom(pwm) @@ -792,23 +936,21 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', # lmom fortran functions need specific inputs L1, L2, T3 # this is handled by lmomco internally with lmorph fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - # Calculate parameters based on distribution with lmom then lmomco f_params = switch(distribution, "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){ parglo(lmom)$para }), + error = function(e){parglo(lmom)$para}), "Gamma" = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){ pargam(lmom)$para }), + error = function(e){pargam(lmom)$para}), "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){ parpe3(lmom)$para })) - + error = function(e){parpe3(lmom)$para})) # Adjust if user chose log-Logistic and max-lik if (distribution == 'log-Logistic' && fit == 'max-lik') { f_params = parglo.maxlik(acu.sorted, f_params)$para } params_result[nsd, ] <- f_params - } # end if dor the case the L-moments are not valid (std_index will be NA) - } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) + } # end for the case the L-moments are not valid (std_index will be NA) + } # end for case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom if (all(is.na(params_result[nsd,]))) { @@ -822,7 +964,7 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', "PearsonIII" = lmom::cdfpe3(data, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) + std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development # if (distribution != 'log-Logistic') { @@ -834,9 +976,9 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', std_index <- std_index_cv } } - } # end if for the case there is no variability - } # end if for the case all NA in acu - } # next year (in case of cross_validation or all done if cross_validation == F) + } # end if for the case there is no variability + } # end if for the case all NA in acu + } # next year (in case of cross_validation or all done if cross_validation == F) if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) @@ -846,8 +988,6 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', # (otherwise there will be one f_params per year; # but the output params will be read only in the case that # it is called with cross_validation FALSE) - return(list(std_index = std_index, params = params_result)) - } + return(list(std_index, params_result)) } -#------------------------------------------------------------------------------- diff --git a/R/zzz.R b/R/zzz.R index cf91639..81bd0ce 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -88,4 +88,12 @@ wind2CF <- function(wind, pc) { power <- wind2power(wind, pc) CF <- power / pc$attr$RatedPower return(CF) +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'lons', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') } \ No newline at end of file diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 0a9a6bd..7c55211 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -1,5 +1,3 @@ -context("CSIndicators::PeriodSPEI tests") - ############################################## # cube1 cube1 <- NULL @@ -97,7 +95,7 @@ test_that("1. Initial checks PeriodSPEI", { ) # time_dim expect_error( - PeriodSPEI(exp = exp1, ) + PeriodSPEI(exp = exp1, time_dim = 1) ) # leadtime_dim @@ -111,3 +109,32 @@ test_that("1. Initial checks PeriodSPEI", { ############################################## +# Test different time dimensions of exp and exp_cor +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) +dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) + +set.seed(1) +exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +set.seed(2) +exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +set.seed(3) +exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) + +set.seed(1) +expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +set.seed(2) +expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +set.seed(3) +expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) + +dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) + +res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates, + cross_validation = TRUE, na.rm = TRUE) \ No newline at end of file -- GitLab From db9ecd3627f220ddf5d8402b12dbfe5fe30f82bf Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 12 Jun 2023 21:15:35 +0200 Subject: [PATCH 09/42] Add parameter dates_expcor; improve documentation; add warning for cross_validation; return correct values within 's2dv_cube' --- NAMESPACE | 8 + R/PeriodSPEI.R | 265 ++++++++++++++++++++++--------- man/CST_PeriodSPEI.Rd | 157 ++++++++++++++++++ man/PeriodSPEI.Rd | 160 +++++++++++++++++++ tests/testthat/test-PeriodSPEI.R | 88 +++++----- 5 files changed, 559 insertions(+), 119 deletions(-) create mode 100644 man/CST_PeriodSPEI.Rd create mode 100644 man/PeriodSPEI.Rd diff --git a/NAMESPACE b/NAMESPACE index d80accb..61b6b92 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(CST_AccumulationExceedingThreshold) export(CST_MergeRefToExp) export(CST_PeriodAccumulation) export(CST_PeriodMean) +export(CST_PeriodSPEI) export(CST_QThreshold) export(CST_SelectPeriodOnData) export(CST_Threshold) @@ -17,6 +18,7 @@ export(CST_WindPowerDensity) export(MergeRefToExp) export(PeriodAccumulation) export(PeriodMean) +export(PeriodSPEI) export(QThreshold) export(SelectPeriodOnData) export(SelectPeriodOnDates) @@ -25,7 +27,13 @@ export(TotalSpellTimeExceedingThreshold) export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) +import(ClimProjDiags) +import(SPEI) +import(TLMoments) +import(lmomco) +import(lubridate) import(multiApply) +import(zoo) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 552c0d2..284c174 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -73,30 +73,53 @@ #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' +#'@return A list with elements: +#'\itemize{ +#' \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' +#' in element data from 'exp' array with the same dimensions as 'exp'. +#' If it is FALSE, it is an array with the accumulated values of PET +#' minus 'prlr' data.} +#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not +#' NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in +#' element 'data'. If 'standarization' is FALSE, only the accumulated +#' values of PET minus 'prlr' is returned.} +#' \item{'params', returned if 'standarization' is TRUE, it contains the +#' parameters used for the standarization of 'exp' that are used for +#' computing the 'SPEI' for 'exp_cor'.} +#' } +#' #'@examples #'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) -#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) -#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) -#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) -#' -#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) -#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) -#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) -#' -#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16"))) +#' +#'dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16")), 'UTC') #'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#' +#' #'lat <- c(40,40.1) -#' +#' +#'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) +#' +#'expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +#' #'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) #'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, #' 'prlr' = expcor_prlr) -#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#' +#'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), +#' Dates = dates) +#'exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), +#' Dates = dates) +#' +#'res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) #' #'@import multiApply #'@import ClimProjDiags @@ -128,18 +151,32 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, stop("Parameter 'exp_cor' must be a list of 's2dv_cube' class.") } } - + # latitude if (!any(names(exp[[1]]$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") } + # Dates + dates_exp <- exp[[1]]$attrs$Dates + if (is.null(exp[[1]]$attrs$Dates)) { + stop("Element 'Dates' is not found in 'exp$attrs' list.") + } + + if (!is.null(exp_cor)) { + if (is.null(exp_cor[[1]]$attrs$Dates)) { + stop("Element 'Dates' is not found in 'exp_cor$attrs'.") + } + } - lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] + lat_dim <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] + res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), - dates = exp[[1]]$attrs$Dates, - lat = exp[[1]]$coords[[lat_name]], - exp_cor = lapply(exp_cor, function(x) x$data), + dates_exp = exp[[1]]$attrs$Dates, + lat = exp[[1]]$coords[[lat_dim]], + exp_cor = if (is.null(exp_cor)) {NULL} else { + lapply(exp_cor, function(x) x$data)}, + dates_expcor = exp_cor[[1]]$attrs$Dates, pet = pet, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lat_dim = lat_dim, accum = accum, ref_period = ref_period, params = params, @@ -150,6 +187,22 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, param_error = param_error, handle_infinity = handle_infinity, na.rm = na.rm, ncores = ncores) + if (is.null(exp_cor)) { + exp$data <- res[[1]] + if (standardization) { + return(list(exp = exp, params = res[[2]])) + } else { + return(exp) + } + } else { + exp$data <- res[[1]] + exp_cor$data <- res[[2]] + if (standardization) { + return(list(exp = exp, exp_cor = exp_cor, params = res[[3]])) + } else { + return(list(exp = exp, exp_cor = exp_cor)) + } + } return(res) } @@ -175,9 +228,14 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #' Variable 'prlr' is always needed. The units for temperature variables #' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for #' precipitation ('prlr') need to be in mm/month. +#'@param dates_exp An array of temporal dimensions containing the Dates of +#' 'exp'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'exp'. #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param dates_expcor An array of temporal dimensions containing the Dates of +#' 'exp_cor'. It must be of class 'Date' or 'POSIXct'. #'@param pet A multidimensional array containing the Potential #' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It #' is NULL by default. @@ -229,19 +287,33 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' +#'@return A list with elements: +#'\itemize{ +#' \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from +#' 'exp' array with the same dimensions as 'exp'. If it is FALSE, it +#' is an array with the accumulated values of PET minus 'prlr' data.} +#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not +#' NULL. It is an array with the SPEI data from 'exp_cor'. If +#' 'standarization' is FALSE, only the accumulated values of PET minus +#' 'prlr' is returned.} +#' \item{'params', returned if 'standarization' is TRUE, it contains the +#' parameters used for the standarization of 'exp' that are used for +#' computing the 'SPEI' for 'exp_cor'.} +#'} +#' #'@examples #'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) -#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) -#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) -#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) +#'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) #' -#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) -#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) -#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +#'expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' #'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), #' paste0(2010:2015, "-10-16"))) @@ -262,7 +334,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'@import lmomco #'@import lubridate #'@export -PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, +PeriodSPEI <- function(exp, dates_exp, lat, + exp_cor = NULL, dates_expcor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, @@ -305,14 +378,40 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, if (!all(names(exp_cor) %in% names(exp))) { stop("Parameter 'exp_cor' needs to have the same variable names as 'exp'.") } - dims <- lapply(exp_cor, function(x) dim(x)) - first_dims <- dims[[1]] - all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + dimscor <- lapply(exp_cor, function(x) dim(x)) + first_dims <- dimscor[[1]] + all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dims, x))) if (!all_equal) { stop("Parameter 'exp_cor' variables needs to have the same dimension names.") } } + # dates + if (!(is.Date(dates_exp)) & !(is.POSIXct(dates_exp))) { + stop("Parameter 'dates_exp' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!is.null(exp_cor)) { + if (!(is.Date(dates_expcor)) & !(is.POSIXct(dates_expcor))) { + stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + } + # lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be numeric.") + } + if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'exp'.") + } + if (!is.null(exp_cor)) { + if (any(sapply(dimscor, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'exp'.") + } + } + # Variable checks if (is.null(pet)) { ## exp (2) @@ -424,17 +523,32 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } - ## start - - ## end ## standardization + if (!is.logical(standardization)) { + stop("Parameter 'standardization' must be a logical value.") + } ## param_error + if (!is.numeric(param_error)) { + stop("Parameter 'param_error' must be a numeric value.") + } ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } ## cross_validation + if (!is.logical(cross_validation)) { + stop("Parameter 'cross_validation' must be a logical value.") + } + if (cross_validation) { + warning("Detected 'cross_validation' = TRUE. This functionality ", + "is being developed, sorry for the inconvenience. It will ", + "be set to FALSE.") + cross_validation <- FALSE + } ## method if (!(method %in% c('parametric', 'non-parametric'))) { @@ -459,18 +573,23 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # Data preparation # complete dates - ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) - end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) - dates_complete_daily <- as.Date(ini_date:end_date) - - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim = length(dates_complete_monthly)) - for (dd in 1:length(dates)) { - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), - lubridate::month(dates[dd]), - 01, sep = '-'))) - dates_monthly[ii] <- 1 + dates_monthly <- NULL + k = 0 + for (dates in .return2list(dates_exp, dates_expcor)) { + k = k + 1 + ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) + end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) + dates_complete_daily <- as.Date(ini_date:end_date) + + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly[[k]] <- array(0, dim = length(dates_complete_monthly)) + for (dd in 1:length(dates)) { + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_monthly[[k]][ii] <- 1 + } } # Compute PeriodSPEI @@ -482,7 +601,7 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet) | computed_pet) { - pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly, + pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly[[k]], lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) @@ -492,10 +611,10 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # Accumulation diff_p_pet <- data$prlr - pet data_accum <- .Accumulation(data = diff_p_pet, - dates_monthly = dates_monthly, accum = accum, + dates_monthly = dates_monthly[[k]], accum = accum, time_dim = time_dim, leadtime_dim = leadtime_dim, ncores = ncores) - # Standardization: + # Standardization if (standardization) { spei_dat <- .Standardization(data = data_accum, params = params, accum = accum, time_dim = time_dim, @@ -508,10 +627,18 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, method = method, distribution = distribution, fit = fit, ncores = ncores) params <- spei_dat$params + + pos <- match(names(dim(data[[1]])), names(dim(spei_dat[[1]]))) + spei_dat[[1]] <- aperm(spei_dat[[1]], pos) + + spei_res[[k]] <- spei_dat[[1]] } else { - spei_dat <- data_accum + + pos <- match(names(dim(data[[1]])), names(dim(data_accum))) + data_accum <- aperm(data_accum, pos) + + spei_res[[k]] <- data_accum } - spei_res[[k]] <- spei_dat[[1]] } if (standardization) { @@ -572,11 +699,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # prepare data target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) - # file <- tempfile() - # sink(file) - # print(varnames) - # print(is.na(data['tasmax'])) - # print(is.na(data['tasmin'])) pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), target_dims = c(list(lat_mask = 'dat'), target_dims_data), fun = .evapotranspiration, @@ -584,9 +706,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, leadtime_dim = leadtime_dim, time_dim = time_dim, output_dims = c(leadtime_dim, time_dim), ncores = ncores)$output1 - # sink() - # captured_message <- readLines(file) - # print(unique(captured_message)) # reorder dims in pet_estimated pos <- match(names(dim(data[[1]])), names(dim(pet))) pet <- aperm(pet, pos) @@ -607,7 +726,7 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # of the considered period # (starting in January of the first year) so that the solar radiation # estimation is computed in each case for the correct month - any(is.na(data2)) + if (!is.null(data2)) { data_tmp <- as.vector(data2) data2 <- array(0, dim = length(dates_monthly)) @@ -646,11 +765,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, rm(data_tmp) } if (pet_method == 'hargreaves') { - # NOTE EVA: if NA.RM is FALSE this gives error - # print('hi') - # print(any(is.na(data3))) - # print('he') - # print(any(is.na(data2))) pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = na.rm, verbose = FALSE) # line to return the vector to the size of the actual original data @@ -832,19 +946,18 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, for (ff in 1:nleadtime) { data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, indices = ff, drop = 'selected') + params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} spei_data <- .std(data = data_subset, coef = coef, - ntime = ntime, nmemb = nmemb, - method = method, - distribution = distribution, - fit = fit, na.rm = na.rm, - ref.start = ref.start, - ref.end = ref.end, - params = params_tmp, - handle_infinity = handle_infinity, - cross_validation = cross_validation) + ntime = ntime, nmemb = nmemb, + method = method, distribution = distribution, + fit = fit, na.rm = na.rm, + ref.start = ref.start, ref.end = ref.end, + params = params_tmp, + handle_infinity = handle_infinity, + cross_validation = cross_validation) spei_mod[ff, , ] <- spei_data[[1]] params_ff <- spei_data[[2]] # lengthen dimension coef of params_ff in case it doesn't match the @@ -867,7 +980,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # data: [time_dim, memb_dim] # params: NULL or [(ntime), coef] - if (method == 'non-parametric') { bp <- matrix(0, length(data), 1) for (i in 1:length(data)) { @@ -903,12 +1015,13 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, } else { acu <- as.vector(data.fit) } - + acu.sorted <- sort.default(acu, method = "quick") # remove NAs (no need if(na.rm) because if there are NA and na.rm = F # we don't get to this point) acu.sorted <- acu.sorted[!is.na(acu.sorted)] - # else all acu was NA and we don't need to continue with this case + # else all acu was NA and we don't need to continue with this case + if (length(acu.sorted) != 0) { acu_sd <- sd(acu.sorted) if (!is.na(acu_sd)) { diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd new file mode 100644 index 0000000..2bc5c47 --- /dev/null +++ b/man/CST_PeriodSPEI.Rd @@ -0,0 +1,157 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodSPEI.R +\name{CST_PeriodSPEI} +\alias{CST_PeriodSPEI} +\title{Compute the Standardised Precipitation-Evapotranspiration Index} +\usage{ +CST_PeriodSPEI( + exp, + exp_cor = NULL, + pet = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + lat_dim = "latitude", + accum = 1, + ref_period = NULL, + params = NULL, + standardization = TRUE, + cross_validation = FALSE, + pet_method = "hargreaves", + method = "parametric", + distribution = "log-Logistic", + fit = "ub-pwm", + param_error = -9999, + handle_infinity = FALSE, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +Variable 'prlr' is always needed. The units for temperature variables +('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +precipitation ('prlr') need to be in mm/month.} + +\item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each +variable in which the quantile PeriodSPEI should be applied. If it is not +specified, the PeriodSPEI is calculated from object 'exp'.} + +\item{pet}{A multidimensional array containing the Potential +EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +is NULL by default.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{params}{A multidimensional array with named dimensions for computing the +SPEI. This option overrides computation of fitting parameters. It needs +to be of same leadtime and time dimensions of exp and a dimension named +'coef' with the length of the coefficients needed for the used distribution +(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +dimension must be of length 1.} + +\item{standardization}{A logical value indicating wether the standardization +is computed.} + +\item{cross_validation}{A logical value indicating if cross validation is +done (TRUE), or not (FALSE). It is FALSE by default.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tasmin and tasmax; and 'thornthwaite', that requires variable +'tas'.} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +default.} + +\item{fit}{A character string indicating the name of the method used for +computing the distribution function parameters The accepteed names are: +'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +that is a multiscalar drought index based on climatic data. It can be used for +determining the onset, duration and magnitude of drought conditions with +respect to normal conditions in a variety of natural and managed systems such +as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +using monthly (or weekly) precipitation as the input data. The SPEI uses the +monthly (or weekly) difference between precipitation and pet. This represents +a simple climatic water balance which is calculated at different time scales +to obtain the SPEI. +} +\examples{ +# Test random data +dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 25) +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 15) + +dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), + paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16")), 'UTC') +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) + +expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) + +exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, + 'prlr' = expcor_prlr) + +exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), + Dates = dates) +exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), + Dates = dates) + +res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) + +} diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd new file mode 100644 index 0000000..1168122 --- /dev/null +++ b/man/PeriodSPEI.Rd @@ -0,0 +1,160 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodSPEI.R +\name{PeriodSPEI} +\alias{PeriodSPEI} +\title{Compute the Standardised Precipitation-Evapotranspiration Index} +\usage{ +PeriodSPEI( + exp, + dates_exp, + lat, + exp_cor = NULL, + dates_expcor = NULL, + pet = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + lat_dim = "latitude", + accum = 1, + ref_period = NULL, + params = NULL, + standardization = TRUE, + cross_validation = FALSE, + pet_method = "hargreaves", + method = "parametric", + distribution = "log-Logistic", + fit = "ub-pwm", + param_error = -9999, + handle_infinity = FALSE, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +Variable 'prlr' is always needed. The units for temperature variables +('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +precipitation ('prlr') need to be in mm/month.} + +\item{dates_exp}{An array of temporal dimensions containing the Dates of +'exp'. It must be of class 'Date' or 'POSIXct'.} + +\item{lat}{A numeric vector containing the latitude values of 'exp'.} + +\item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each +variable in which the quantile PeriodSPEI should be applied. If it is not +specified, the PeriodSPEI is calculated from object 'exp'.} + +\item{dates_expcor}{An array of temporal dimensions containing the Dates of +'exp_cor'. It must be of class 'Date' or 'POSIXct'.} + +\item{pet}{A multidimensional array containing the Potential +EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +is NULL by default.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{params}{A multidimensional array with named dimensions for computing the +SPEI. This option overrides computation of fitting parameters. It needs +to be of same leadtime and time dimensions of exp and a dimension named +'coef' with the length of the coefficients needed for the used distribution +(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +dimension must be of length 1.} + +\item{standardization}{A logical value indicating wether the standardization +is computed.} + +\item{cross_validation}{A logical value indicating if cross validation is +done (TRUE), or not (FALSE). It is FALSE by default.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tasmin and tasmax; and 'thornthwaite', that requires variable +'tas'.} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +default.} + +\item{fit}{A character string indicating the name of the method used for +computing the distribution function parameters The accepteed names are: +'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +that is a multiscalar drought index based on climatic data. It can be used for +determining the onset, duration and magnitude of drought conditions with +respect to normal conditions in a variety of natural and managed systems such +as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +using monthly (or weekly) precipitation as the input data. The SPEI uses the +monthly (or weekly) difference between precipitation and pet. This represents +a simple climatic water balance which is calculated at different time scales +to obtain the SPEI. +} +\examples{ +# Test random data +dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 25) +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 15) +exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) + +expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) + +dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, + 'prlr' = expcor_prlr) +res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) + +} diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 7c55211..e009ad0 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -9,30 +9,34 @@ dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) set.seed(1) -exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) set.seed(2) -exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) set.seed(3) -exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) set.seed(1) -expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +expcor_tasmax <- array(rnorm(60, 29.03, 5.67), dim = dimscor) set.seed(2) -expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +expcor_tasmin <- array(rnorm(60, 15.70, 4.40), dim = dimscor) set.seed(3) -expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) -dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), paste0(2010:2015, "-10-16"))) -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16"))) +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) -res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates) -source("/esarchive/scratch/erifarov/git/csindicators/R/PeriodSPEI.R") +res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) ############################################## @@ -89,52 +93,50 @@ test_that("1. Initial checks PeriodSPEI", { ) # exp (2) expect_warning( - PeriodSPEI(exp = exp1, pet_method = '1', dates = dates, lat = lat), + PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves'.") ) # time_dim expect_error( - PeriodSPEI(exp = exp1, time_dim = 1) + PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) ) # leadtime_dim - + expect_error( + PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) + ) # memb_dim - + expect_error( + PeriodSPEI(exp = exp1, memb_dim = 1, dates_exp = dates_exp, lat = lat) + ) # lat_dim - + expect_error( + PeriodSPEI(exp = exp1, lat_dim = 1, dates_exp = dates_exp, lat = lat) + ) # accum + expect_error( + PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat) + ) }) ############################################## -# Test different time dimensions of exp and exp_cor -dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) -dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) - -set.seed(1) -exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) -set.seed(2) -exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) -set.seed(3) -exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) - -set.seed(1) -expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) -set.seed(2) -expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) -set.seed(3) -expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) - -dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) - -lat <- c(40,40.1) - -exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) +test_that("2. Output checks", { + res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) + expect_equal( + length(res), + 3 + ) + expect_equal( + dim(res[[1]]), + dims + ) + expect_equal( + dim(res[[3]])[which(!names(dim(res[[3]])) %in% c('coef', 'syear'))], + dims[which(!names(dims) %in% c('syear', 'ensemble'))] + ) +}) -res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates, - cross_validation = TRUE, na.rm = TRUE) \ No newline at end of file +############################################## \ No newline at end of file -- GitLab From 9243ba3c9cb649c4b2cac55cd769db0064c60d95 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 13 Jun 2023 17:41:38 +0200 Subject: [PATCH 10/42] Separate pet parameter to pet_exp and pet_expcor; add initial checks; add few tests --- R/PeriodSPEI.R | 247 +++++++++++++++++++++---------- man/CST_PeriodSPEI.Rd | 57 +++++-- man/PeriodSPEI.Rd | 50 +++++-- tests/testthat/test-PeriodSPEI.R | 112 ++++++++++++-- 4 files changed, 345 insertions(+), 121 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 284c174..39e6daf 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -22,9 +22,6 @@ #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. -#'@param pet A multidimensional array containing the Potential -#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -#' is NULL by default. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -42,11 +39,19 @@ #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same leadtime and time dimensions of exp and a dimension named +#' to be of same leadtime and time dimensions of 'exp' and a dimension named #' 'coef' with the length of the coefficients needed for the used distribution #' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or #' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime #' dimension must be of length 1. +#'@param pet_exp A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the +#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variables with specified 'pet_method'. It is NULL by default. +#'@param pet_expcor A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is @@ -57,7 +62,8 @@ #' variables tasmin and tasmax; and 'thornthwaite', that requires variable #' 'tas'. #'@param method A character string indicating the standardization method used. -#' If can be: 'parametric' or 'non-parametric'. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by @@ -89,17 +95,18 @@ #' } #' #'@examples -#'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) #' -#'dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), -#' paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16")), 'UTC') -#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#' +#'dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16")), 'UTC') +#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), +#' paste0(2020, "-10-16")), 'UTC') +#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' #'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) @@ -115,9 +122,9 @@ #' 'prlr' = expcor_prlr) #' #'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), -#' Dates = dates) +#' Dates = dates_exp) #'exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), -#' Dates = dates) +#' Dates = dates_expcor) #' #'res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) #' @@ -129,10 +136,11 @@ #'@import lmomco #'@import lubridate #'@export -CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, +CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', @@ -158,18 +166,21 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, } # Dates dates_exp <- exp[[1]]$attrs$Dates - if (is.null(exp[[1]]$attrs$Dates)) { - stop("Element 'Dates' is not found in 'exp$attrs' list.") + if (!'Dates' %in% names(exp[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'exp'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") } if (!is.null(exp_cor)) { - if (is.null(exp_cor[[1]]$attrs$Dates)) { - stop("Element 'Dates' is not found in 'exp_cor$attrs'.") + if (!'Dates' %in% names(exp_cor[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") } } lat_dim <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] - res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), dates_exp = exp[[1]]$attrs$Dates, @@ -177,9 +188,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, exp_cor = if (is.null(exp_cor)) {NULL} else { lapply(exp_cor, function(x) x$data)}, dates_expcor = exp_cor[[1]]$attrs$Dates, - pet = pet, time_dim = time_dim, leadtime_dim = leadtime_dim, + time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lat_dim = lat_dim, accum = accum, ref_period = ref_period, params = params, + pet_exp = pet_exp, pet_expcor = pet_expcor, standardization = standardization, cross_validation = cross_validation, pet_method = pet_method, method = method, @@ -236,9 +248,6 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #' specified, the PeriodSPEI is calculated from object 'exp'. #'@param dates_expcor An array of temporal dimensions containing the Dates of #' 'exp_cor'. It must be of class 'Date' or 'POSIXct'. -#'@param pet A multidimensional array containing the Potential -#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -#' is NULL by default. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -261,6 +270,14 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or #' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime #' dimension must be of length 1. +#'@param pet_exp A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the +#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variables with specified 'pet_method'. It is NULL by default. +#'@param pet_expcor A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is @@ -302,10 +319,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'} #' #'@examples -#'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) #'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) #'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) @@ -315,16 +331,21 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) #'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' -#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16"))) -#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#' +#'dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16"))) +#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), +#' paste0(2020, "-09-15"), +#' paste0(2020, "-10-16")), 'UTC') +#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' #'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) #'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, #' 'prlr' = expcor_prlr) -#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, +#' dates_exp = dates_exp, dates_expcor = dates_expcor) #' #'@import multiApply #'@import ClimProjDiags @@ -335,10 +356,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'@import lubridate #'@export PeriodSPEI <- function(exp, dates_exp, lat, - exp_cor = NULL, dates_expcor = NULL, pet = NULL, + exp_cor = NULL, dates_expcor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', @@ -375,28 +397,14 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (any(sapply(exp_cor, function(x) is.null(names(dim(x)))))) { stop("Parameter 'exp_cor' needs to be a list of arrays with dimension names.") } - if (!all(names(exp_cor) %in% names(exp))) { - stop("Parameter 'exp_cor' needs to have the same variable names as 'exp'.") - } dimscor <- lapply(exp_cor, function(x) dim(x)) - first_dims <- dimscor[[1]] - all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dims, x))) + first_dimscor <- dimscor[[1]] + all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dimscor, x))) if (!all_equal) { stop("Parameter 'exp_cor' variables needs to have the same dimension names.") } } - # dates - if (!(is.Date(dates_exp)) & !(is.POSIXct(dates_exp))) { - stop("Parameter 'dates_exp' is not of the correct class, ", - "only 'Date' and 'POSIXct' classes are accepted.") - } - if (!is.null(exp_cor)) { - if (!(is.Date(dates_expcor)) & !(is.POSIXct(dates_expcor))) { - stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", - "only 'Date' and 'POSIXct' classes are accepted.") - } - } # lat if (!is.numeric(lat)) { stop("Parameter 'lat' must be numeric.") @@ -413,8 +421,9 @@ PeriodSPEI <- function(exp, dates_exp, lat, } # Variable checks - if (is.null(pet)) { - ## exp (2) + ## exp (2) + pet <- vector("list", 2) + if (is.null(pet_exp)) { if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' if (!(pet_method[1] %in% c('hargreaves_modified', 'hargreaves'))) { @@ -429,12 +438,30 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_method[1] <- 'thornthwaite' } } else { - stop("Parameter 'exp' needs to be a named list with accepted variable names.", - "See documentation.") + stop("Parameter 'exp' needs to be a named list with accepted ", + "variable names if 'pet_exp' is not provided. See documentation.") } - - ## exp_cor (2) - if (!is.null(exp_cor)) { + } else { + if (!('prlr' %in% names(exp))) { + stop("Variable 'prlr' is not included in 'exp'.") + } + if (length(dim(exp[['prlr']])) != length(dim(pet_exp))) { + stop("Parameter 'pet_exp' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp'.") + } + if (!all(dim(exp[['prlr']]) %in% dim(pet_exp))) { + stop("Parameter 'pet_exp' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp'.") + } + if (any(names(dim(exp[['prlr']])) != names(dim(pet_exp)))) { + pos <- match(names(dim(exp[['prlr']])), names(dim(pet_exp))) + pet_exp <- aperm(pet_exp, pos) + } + pet[[1]] <- pet_exp + } + ## exp_cor (2) + if (!is.null(exp_cor)) { + if (is.null(pet_expcor)) { if (length(exp_cor) < 1) { exp_cor <- NULL } else { @@ -456,17 +483,28 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_method[2] <- 'thornthwaite' } } else { - stop("Parameter 'exp_cor' needs to be a list with the needed variables.") + stop("Parameter 'exp_cor' needs to be a list with the needed ", + "variables if 'pet_expcor' is not provided.") } - } - } else { - if (!('prlr' %in% names(exp))) { - stop("Variable 'prlr' is not included in 'exp'.") - } - if (!is.null(exp_cor)) { - if (!('prlr' %in% names(exp_cor))) { - stop("Variable 'prlr' is not included in 'exp_cor'.") + } else { + if (!is.null(exp_cor)) { + if (!('prlr' %in% names(exp_cor))) { + stop("Variable 'prlr' is not included in 'exp_cor'.") + } + if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") + } + if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") + } + if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { + pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) + pet_expcor <- aperm(pet_expcor, pos) + } } + pet[[2]] <- pet_expcor } } @@ -518,11 +556,50 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") } } + # dates + if (is.null(dates_exp)) { + stop("Parameter 'dates_exp' is missing, dates must be provided.") + } + if (!(is.Date(dates_exp)) & !(is.POSIXct(dates_exp))) { + stop("Parameter 'dates_exp' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!all(dim(exp[[1]])[c(time_dim, leadtime_dim)] == + dim(dates_exp)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates_exp' needs to have the same length as 'time_dim' ", + "and 'leadtime_dim' as 'exp'.") + } + + if (!is.null(exp_cor)) { + if (is.null(dates_expcor)) { + stop("Parameter 'dates_expcor' is missing, dates for 'exp_cor' must be ", + "provided if exp_cor is not NULL.") + } + if (!(is.Date(dates_expcor)) & !(is.POSIXct(dates_expcor))) { + stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!all(dim(exp_cor[[1]])[c(time_dim, leadtime_dim)] == + dim(dates_expcor)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates_expcor' needs to have the same length as ", + "'time_dim' and 'leadtime_dim' as 'exp_cor'.") + } + } ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: 'time_dim', 'leadtime_dim' and 'coef'.") + } + } ## standardization if (!is.logical(standardization)) { @@ -543,26 +620,29 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (!is.logical(cross_validation)) { stop("Parameter 'cross_validation' must be a logical value.") } - if (cross_validation) { - warning("Detected 'cross_validation' = TRUE. This functionality ", - "is being developed, sorry for the inconvenience. It will ", - "be set to FALSE.") - cross_validation <- FALSE - } + # if (cross_validation) { + # warning("Detected 'cross_validation' = TRUE. This functionality ", + # "is being developed, sorry for the inconvenience. It will ", + # "be set to FALSE.") + # cross_validation <- FALSE + # } ## method if (!(method %in% c('parametric', 'non-parametric'))) { - stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") } ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { - stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") } ## fit if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik".') + stop("Parameter 'fit' must be a character string containing one of ", + "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") } - ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | @@ -600,16 +680,19 @@ PeriodSPEI <- function(exp, dates_exp, lat, for (data in .return2list(exp, exp_cor)) { k = k + 1 # Evapotranspiration estimation (unless pet is already provided) - if (is.null(pet) | computed_pet) { - pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly[[k]], - lat = lat, pet_method = pet_method[k], - time_dim = time_dim, leadtime_dim = leadtime_dim, - lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) + if (is.null(pet[[k]]) | computed_pet) { + pet[[k]] <- .Evapotranspiration(data = data, + dates_monthly = dates_monthly[[k]], + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, + leadtime_dim = leadtime_dim, + lat_dim = lat_dim, na.rm = na.rm, + ncores = ncores) computed_pet <- TRUE } # Accumulation - diff_p_pet <- data$prlr - pet + diff_p_pet <- data$prlr - pet[[k]] data_accum <- .Accumulation(data = diff_p_pet, dates_monthly = dates_monthly[[k]], accum = accum, time_dim = time_dim, leadtime_dim = leadtime_dim, @@ -660,7 +743,9 @@ PeriodSPEI <- function(exp, dates_exp, lat, .return2list <- function(data1, data2 = NULL) { - if (is.null(data2)) { + if (is.null(data1) & is.null(data2)) { + return(NULL) + } else if (is.null(data2)) { return(list(data1)) } else { return(list(data1, data2)) diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 2bc5c47..70c9b21 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -7,7 +7,6 @@ CST_PeriodSPEI( exp, exp_cor = NULL, - pet = NULL, time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", @@ -15,6 +14,8 @@ CST_PeriodSPEI( accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, + pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = "hargreaves", @@ -42,10 +43,6 @@ precipitation ('prlr') need to be in mm/month.} variable in which the quantile PeriodSPEI should be applied. If it is not specified, the PeriodSPEI is calculated from object 'exp'.} -\item{pet}{A multidimensional array containing the Potential -EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -is NULL by default.} - \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'syear'.} @@ -69,12 +66,22 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same leadtime and time dimensions of exp and a dimension named +to be of same leadtime and time dimensions of 'exp' and a dimension named 'coef' with the length of the coefficients needed for the used distribution (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime dimension must be of length 1.} +\item{pet_exp}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp'. It must have the same dimensions of the +variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variables with specified 'pet_method'. It is NULL by default.} + +\item{pet_expcor}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +provided variables with specified 'pet_method'. It is NULL by default.} + \item{standardization}{A logical value indicating wether the standardization is computed.} @@ -88,7 +95,8 @@ variables tasmin and tasmax; and 'thornthwaite', that requires variable 'tas'.} \item{method}{A character string indicating the standardization method used. -If can be: 'parametric' or 'non-parametric'.} +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} \item{distribution}{A character string indicating the name of the distribution function to be used for computing the SPEI. The accepted names are: @@ -110,6 +118,22 @@ from data. It is FALSE by default.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } +\value{ +A list with elements: +\itemize{ + \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' + in element data from 'exp' array with the same dimensions as 'exp'. + If it is FALSE, it is an array with the accumulated values of PET + minus 'prlr' data.} + \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not + NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in + element 'data'. If 'standarization' is FALSE, only the accumulated + values of PET minus 'prlr' is returned.} + \item{'params', returned if 'standarization' is TRUE, it contains the + parameters used for the standarization of 'exp' that are used for + computing the 'SPEI' for 'exp_cor'.} +} +} \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for @@ -122,17 +146,18 @@ a simple climatic water balance which is calculated at different time scales to obtain the SPEI. } \examples{ -# Test random data dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 25) -dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) -dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), - paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16")), 'UTC') -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) - +dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), + paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16")), 'UTC') +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16")), 'UTC') +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) @@ -148,9 +173,9 @@ exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), - Dates = dates) + Dates = dates_exp) exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), - Dates = dates) + Dates = dates_expcor) res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index 1168122..cf2d23c 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -10,7 +10,6 @@ PeriodSPEI( lat, exp_cor = NULL, dates_expcor = NULL, - pet = NULL, time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", @@ -18,6 +17,8 @@ PeriodSPEI( accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, + pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = "hargreaves", @@ -53,10 +54,6 @@ specified, the PeriodSPEI is calculated from object 'exp'.} \item{dates_expcor}{An array of temporal dimensions containing the Dates of 'exp_cor'. It must be of class 'Date' or 'POSIXct'.} -\item{pet}{A multidimensional array containing the Potential -EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -is NULL by default.} - \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'syear'.} @@ -86,6 +83,16 @@ to be of same leadtime and time dimensions of exp and a dimension named 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime dimension must be of length 1.} +\item{pet_exp}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp'. It must have the same dimensions of the +variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variables with specified 'pet_method'. It is NULL by default.} + +\item{pet_expcor}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +provided variables with specified 'pet_method'. It is NULL by default.} + \item{standardization}{A logical value indicating wether the standardization is computed.} @@ -121,6 +128,21 @@ from data. It is FALSE by default.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } +\value{ +A list with elements: +\itemize{ + \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from + 'exp' array with the same dimensions as 'exp'. If it is FALSE, it + is an array with the accumulated values of PET minus 'prlr' data.} + \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not + NULL. It is an array with the SPEI data from 'exp_cor'. If + 'standarization' is FALSE, only the accumulated values of PET minus + 'prlr' is returned.} + \item{'params', returned if 'standarization' is TRUE, it contains the + parameters used for the standarization of 'exp' that are used for + computing the 'SPEI' for 'exp_cor'.} +} +} \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for @@ -133,10 +155,9 @@ a simple climatic water balance which is calculated at different time scales to obtain the SPEI. } \examples{ -# Test random data dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 25) -dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) @@ -146,15 +167,20 @@ expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) -dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) - +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), + paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), + paste0(2020, "-09-15"), + paste0(2020, "-10-16")), 'UTC') +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) -res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) } diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index e009ad0..fb9b946 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -86,11 +86,6 @@ test_that("1. Initial checks PeriodSPEI", { PeriodSPEI(exp = exp1, exp_cor = list('tasmax' = array(10))), "Parameter 'exp_cor' needs to be a list of arrays with dimension names." ) - expect_error( - PeriodSPEI(exp = list('tas' = array(10, dim = c(time = 10))), - exp_cor = list('tos' = array(10, dim = c(time = 10)))), - "Parameter 'exp_cor' needs to have the same variable names as 'exp'." - ) # exp (2) expect_warning( PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), @@ -117,26 +112,119 @@ test_that("1. Initial checks PeriodSPEI", { expect_error( PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat) ) - + # standardization + expect_error( + PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat) + ) + # param_error + expect_error( + PeriodSPEI(exp = exp1, param_error = TRUE, dates_exp = dates_exp, lat = lat) + ) + # handle_infinity + expect_error( + PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat) + ) + # cross_validation + expect_error( + PeriodSPEI(exp = exp1, cross_validation = 1, dates_exp = dates_exp, lat = lat) + ) + # method + expect_error( + PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat) + ) + # distribution + expect_error( + PeriodSPEI(exp = exp1, distribution = 1, dates_exp = dates_exp, lat = lat) + ) + # fit + expect_error( + PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat) + ) + # ncores + expect_error( + PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat) + ) }) ############################################## test_that("2. Output checks", { - res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor) + res1 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) + res2 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + standardization = FALSE) + res3 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp) + res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, standardization = FALSE) + # output dims + expect_equal( + names(res1), + c('exp', 'exp_cor', 'params') + ) expect_equal( - length(res), - 3 + names(res2), + c('exp', 'exp_cor') ) expect_equal( - dim(res[[1]]), + names(res3), + c('exp', 'params') + ) + expect_equal( + names(res4), + c('exp') + ) + expect_equal( + dim(res1[[1]]), dims ) expect_equal( - dim(res[[3]])[which(!names(dim(res[[3]])) %in% c('coef', 'syear'))], + dim(res1[[3]])[which(!names(dim(res1[[3]])) %in% c('coef', 'syear'))], dims[which(!names(dims) %in% c('syear', 'ensemble'))] ) + expect_equal( + dim(res2[[2]]), + dimscor + ) + expect_equal( + dim(res3[[2]]), + c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) + ) + # exp + # exp_cor + # pet + # time_dim + # leadtime_dim + # memb_dim + # lat_dim + # accum + # ref_period + # params + # standarization + # cross_validation + # pet_method - + # method - + # distribution - Only works for 'log-Logistic' + res5 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, distribution = 'PearsonIII') + res6 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, distribution = 'Gamma') + # fit - + res7 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'ub-pwm') + res8 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'max-lik') + res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'pp-pwm') + all.equal(res7, res8) + all.equal(res7, res9) # res9 doesn't work for this data + # param_error - + # handle_infinity - OK + res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, handle_infinity = FALSE) + # na.rm - + # ncores }) ############################################## \ No newline at end of file -- GitLab From f8c059f64c1f1e4c2192b38656a5fe8ba879ddfa Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 11:09:47 +0200 Subject: [PATCH 11/42] Return 's2dv_cube' spei CST_PeriodSPEI correctly --- NAMESPACE | 2 + R/PeriodSPEI.R | 34 +++++++++++++--- tests/testthat/test-PeriodSPEI.R | 70 ++++++++++++++++++++++++-------- 3 files changed, 83 insertions(+), 23 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 61b6b92..0723757 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,9 +27,11 @@ export(TotalSpellTimeExceedingThreshold) export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) +import(CSTools) import(ClimProjDiags) import(SPEI) import(TLMoments) +import(lmom) import(lmomco) import(lubridate) import(multiApply) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 39e6daf..213dd73 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -134,7 +134,9 @@ #'@import zoo #'@import TLMoments #'@import lmomco +#'@import lmom #'@import lubridate +#'@import CSTools #'@export CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', @@ -200,19 +202,38 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, handle_infinity = handle_infinity, na.rm = na.rm, ncores = ncores) if (is.null(exp_cor)) { - exp$data <- res[[1]] + source_files <- lapply(exp, function(x) {x$attrs$source_files}) + suppressWarnings( + res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, + varName = c('SPEI'), Dates = dates_exp, + when = Sys.time(), + source_files = source_files) + ) if (standardization) { - return(list(exp = exp, params = res[[2]])) + return(list(spei_exp = res_exp, params = res[[2]])) } else { return(exp) } } else { - exp$data <- res[[1]] - exp_cor$data <- res[[2]] + source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) + suppressWarnings( + res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, + varName = c('SPEI'), Dates = dates_exp, + source_files = source_files_exp, + when = Sys.time()) + ) + source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) + suppressWarnings( + res_expcor <- CSTools::s2dv_cube(data = res[[2]], coords = exp_cor[[1]]$coords, + varName = c('SPEI'), Dates = dates_expcor, + source_files = source_files_expcor, + when = Sys.time()) + ) + if (standardization) { - return(list(exp = exp, exp_cor = exp_cor, params = res[[3]])) + return(list(spei_exp = res_exp, spei_exp_cor = res_expcor, params = res[[3]])) } else { - return(list(exp = exp, exp_cor = exp_cor)) + return(list(spei_exp = res_exp, spei_exp_cor = res_expcor)) } } return(res) @@ -353,6 +374,7 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@import zoo #'@import TLMoments #'@import lmomco +#'@import lmom #'@import lubridate #'@export PeriodSPEI <- function(exp, dates_exp, lat, diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index fb9b946..90622ad 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -35,8 +35,32 @@ lat <- c(40,40.1) exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) -res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor) +# dat2 +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) +dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) + +set.seed(1) +exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims) +set.seed(2) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) + +set.seed(1) +expcor_tas <- array(rnorm(100, 17.23, 9.19), dim = dimscor) +set.seed(2) +expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) + +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16"))) +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) + +lat <- c(40,40.1) + +exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) +exp_cor2 <- list('tas' = expcor_tas, 'prlr' = expcor_prlr) ############################################## @@ -202,23 +226,35 @@ test_that("2. Output checks", { # ref_period # params # standarization - # cross_validation - # pet_method - - # method - + # cross_validation - Not working + # pet_method - ok + res5 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + pet_method = c('hargreaves', 'hargreaves_modified')) + res6 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + pet_method = c('hargreaves_modified', 'hargreaves')) + res7 <- PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + pet_method = c('thornthwaite', 'thornthwaite')) + # method - ok + res8 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + method = 'non-parametric') # distribution - Only works for 'log-Logistic' - res5 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, distribution = 'PearsonIII') - res6 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, distribution = 'Gamma') - # fit - - res7 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'ub-pwm') - res8 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'max-lik') res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'pp-pwm') - all.equal(res7, res8) - all.equal(res7, res9) # res9 doesn't work for this data + dates_exp = dates_exp, distribution = 'PearsonIII') # NA + res10 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, distribution = 'Gamma') # NA + # fit - + res12 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'ub-pwm') # ok + res13 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'max-lik') # ok + res14 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'pp-pwm') # NA + all.equal(res12, res13) + all.equal(res12, res14) # res14 doesn't work for this data # param_error - # handle_infinity - OK res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, -- GitLab From 387c589075ab109c100dff2bb4cdd0a34c9647a2 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 11:17:07 +0200 Subject: [PATCH 12/42] Move auxiliary function to zzz file --- R/PeriodSPEI.R | 12 ------------ R/zzz.R | 10 ++++++++++ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 213dd73..44c8552 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -763,18 +763,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, return(spei_res) } - -.return2list <- function(data1, data2 = NULL) { - if (is.null(data1) & is.null(data2)) { - return(NULL) - } else if (is.null(data2)) { - return(list(data1)) - } else { - return(list(data1, data2)) - } -} - - .Evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', lat_dim = 'latitude', na.rm = FALSE, diff --git a/R/zzz.R b/R/zzz.R index 81bd0ce..47d871d 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -96,4 +96,14 @@ wind2CF <- function(wind, pc) { .KnownLatNames <- function() { known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') +} + +.return2list <- function(data1, data2 = NULL) { + if (is.null(data1) & is.null(data2)) { + return(NULL) + } else if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } } \ No newline at end of file -- GitLab From 382dadd2d269e16e0a521dfcc66b06cec7d92c62 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 11:46:21 +0200 Subject: [PATCH 13/42] Correct Dates output for 's2dv_cube' in CST_PeriodSPEI --- R/PeriodSPEI.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 44c8552..2cd02e8 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -205,9 +205,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, source_files <- lapply(exp, function(x) {x$attrs$source_files}) suppressWarnings( res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), Dates = dates_exp, - when = Sys.time(), - source_files = source_files) + varName = c('SPEI'), + Dates = exp[[1]]$attrs$Dates, + source_files = source_files, + when = Sys.time()) ) if (standardization) { return(list(spei_exp = res_exp, params = res[[2]])) @@ -218,14 +219,16 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) suppressWarnings( res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), Dates = dates_exp, + varName = c('SPEI'), + Dates = exp[[1]]$attrs$Dates, source_files = source_files_exp, when = Sys.time()) ) source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) suppressWarnings( res_expcor <- CSTools::s2dv_cube(data = res[[2]], coords = exp_cor[[1]]$coords, - varName = c('SPEI'), Dates = dates_expcor, + varName = c('SPEI'), + Dates = exp_cor[[1]]$attrs$Dates, source_files = source_files_expcor, when = Sys.time()) ) -- GitLab From 617e68fc0b6a785b508e409179ca8c8fc339ffeb Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 17:16:58 +0200 Subject: [PATCH 14/42] Improve documentation and restrict cross_validation TRUE when exp_cor is not provided --- R/PeriodSPEI.R | 48 +++++++++++++++++--------------- man/CST_PeriodSPEI.Rd | 13 +++++---- man/PeriodSPEI.Rd | 13 +++++---- tests/testthat/test-PeriodSPEI.R | 28 ++++++++++++++++++- 4 files changed, 66 insertions(+), 36 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 2cd02e8..f5a4b3b 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -39,11 +39,11 @@ #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same leadtime and time dimensions of 'exp' and a dimension named -#' 'coef' with the length of the coefficients needed for the used distribution -#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -#' dimension must be of length 1. +#' to be of same time dimension (specified in 'time_dim') of 'exp' and a +#' dimension named 'coef' with the length of the coefficients needed for the +#' used distribution (for 'Gamma' coef dimension is of lenght 2, for +#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +#' leadtime dimension (specified in 'leadtime_dim') of length 1. #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided @@ -55,7 +55,8 @@ #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is -#' done (TRUE), or not (FALSE). It is FALSE by default. +#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +#' is not provided. It is FALSE by default. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -211,9 +212,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, when = Sys.time()) ) if (standardization) { - return(list(spei_exp = res_exp, params = res[[2]])) + return(list(exp = res_exp, params = res[[2]])) } else { - return(exp) + return(res_exp) } } else { source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) @@ -234,9 +235,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, ) if (standardization) { - return(list(spei_exp = res_exp, spei_exp_cor = res_expcor, params = res[[3]])) + return(list(exp = res_exp, exp_cor = res_expcor, params = res[[3]])) } else { - return(list(spei_exp = res_exp, spei_exp_cor = res_expcor)) + return(list(exp = res_exp, exp_cor = res_expcor)) } } return(res) @@ -289,11 +290,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same leadtime and time dimensions of exp and a dimension named -#' 'coef' with the length of the coefficients needed for the used distribution -#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -#' dimension must be of length 1. +#' to be of same time dimension (specified in 'time_dim') of 'exp' and a +#' dimension named 'coef' with the length of the coefficients needed for the +#' used distribution (for 'Gamma' coef dimension is of lenght 2, for +#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +#' leadtime dimension (specified in 'leadtime_dim') of length 1. #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided @@ -305,7 +306,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is -#' done (TRUE), or not (FALSE). It is FALSE by default. +#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +#' is not provided. It is FALSE by default. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -645,12 +647,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (!is.logical(cross_validation)) { stop("Parameter 'cross_validation' must be a logical value.") } - # if (cross_validation) { - # warning("Detected 'cross_validation' = TRUE. This functionality ", - # "is being developed, sorry for the inconvenience. It will ", - # "be set to FALSE.") - # cross_validation <- FALSE - # } + if (!is.null(exp_cor)) { + if (cross_validation & standardization) { + warning("Detected 'cross_validation' = TRUE. It will be set as FALSE ", + "since 'exp_cor' is provided.") + cross_validation <- FALSE + } + } ## method if (!(method %in% c('parametric', 'non-parametric'))) { @@ -741,7 +744,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, spei_res[[k]] <- spei_dat[[1]] } else { - pos <- match(names(dim(data[[1]])), names(dim(data_accum))) data_accum <- aperm(data_accum, pos) diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 70c9b21..2a7aaa6 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -66,11 +66,11 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same leadtime and time dimensions of 'exp' and a dimension named -'coef' with the length of the coefficients needed for the used distribution -(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -dimension must be of length 1.} +to be of same time dimension (specified in 'time_dim') of 'exp' and a +dimension named 'coef' with the length of the coefficients needed for the +used distribution (for 'Gamma' coef dimension is of lenght 2, for +'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +leadtime dimension (specified in 'leadtime_dim') of length 1.} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the @@ -86,7 +86,8 @@ provided variables with specified 'pet_method'. It is NULL by default.} is computed.} \item{cross_validation}{A logical value indicating if cross validation is -done (TRUE), or not (FALSE). It is FALSE by default.} +done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index cf2d23c..86aaa02 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -77,11 +77,11 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same leadtime and time dimensions of exp and a dimension named -'coef' with the length of the coefficients needed for the used distribution -(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -dimension must be of length 1.} +to be of same time dimension (specified in 'time_dim') of 'exp' and a +dimension named 'coef' with the length of the coefficients needed for the +used distribution (for 'Gamma' coef dimension is of lenght 2, for +'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +leadtime dimension (specified in 'leadtime_dim') of length 1.} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the @@ -97,7 +97,8 @@ provided variables with specified 'pet_method'. It is NULL by default.} is computed.} \item{cross_validation}{A logical value indicating if cross validation is -done (TRUE), or not (FALSE). It is FALSE by default.} +done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 90622ad..85a1427 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -226,7 +226,33 @@ test_that("2. Output checks", { # ref_period # params # standarization - # cross_validation - Not working + # cross_validation + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + cross_validation = TRUE), + paste0("Detected 'cross_validation' = TRUE. It will be set as FALSE ", + "since 'exp_cor' is provided.") + ) + res_crossval_T <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + cross_validation = TRUE) + res_crossval_F <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + cross_validation = FALSE) + # cross_validation = TRUE + expect_equal( + dim(res_crossval_T$exp), + dims + ) + expect_equal( + dim(res_crossval_T$params), + dims + ) + # cross_validation = FALSE + expect_equal( + dim(res_crossval_F$params)[-which(names(dim(res_crossval_F$params)) == 'coef')], + dimscor[-which(names(dimscor) == 'ensemble')] + ) + # pet_method - ok res5 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, -- GitLab From 9b861f0a0b3175848508c654dbab1fc54aca69b7 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 17:22:57 +0200 Subject: [PATCH 15/42] Remove typo --- R/PeriodSPEI.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index f5a4b3b..077137c 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -839,7 +839,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, } rm(data_tmp) } - any(is.na(data3)) if (!is.null(data3)) { data_tmp <- as.vector(data3) data3 <- array(0, dim = length(dates_monthly)) -- GitLab From 710f49ed8e21dc0510873066468c50eb0c5e4a88 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 28 Jun 2023 13:17:12 +0200 Subject: [PATCH 16/42] Correct parameter 'na.rm' in Standardization call, it was not included; corrected documentation for 'params'; added check for 'params' dimensions --- R/PeriodSPEI.R | 67 +++++++++++++++++--------------- man/CST_PeriodSPEI.Rd | 11 +++--- tests/testthat/test-PeriodSPEI.R | 24 +++++++++++- 3 files changed, 64 insertions(+), 38 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 077137c..f947531 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -39,11 +39,12 @@ #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same time dimension (specified in 'time_dim') of 'exp' and a -#' dimension named 'coef' with the length of the coefficients needed for the -#' used distribution (for 'Gamma' coef dimension is of lenght 2, for -#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a -#' leadtime dimension (specified in 'leadtime_dim') of length 1. +#' to have the following dimensions: same leadtime dimension of 'exp' +#' (specified in 'leadtime_dim'); time dimension of length 1 (specified in +#' 'time_dim'); and a dimension named 'coef' with the length of the +#' coefficients needed for the used distribution (for 'Gamma' coef dimension is +#' of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). It can't +#' have member dimension (specified in 'memb_dim'). #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided @@ -617,16 +618,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } - ## params - if (!is.null(params)) { - if (!is.numeric(params)) { - stop("Parameter 'params' must be numeric.") - } - if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { - stop("Parameter 'params' must be a multidimensional array with named ", - "dimensions: 'time_dim', 'leadtime_dim' and 'coef'.") - } - } ## standardization if (!is.logical(standardization)) { @@ -678,9 +669,29 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'ncores' must be a positive integer.") } } - # Data preparation - - # complete dates + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + + # Complete dates dates_monthly <- NULL k = 0 for (dates in .return2list(dates_exp, dates_expcor)) { @@ -736,9 +747,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, handle_infinity = handle_infinity, param_error = param_error, method = method, distribution = distribution, - fit = fit, ncores = ncores) + fit = fit, na.rm = na.rm, ncores = ncores) params <- spei_dat$params - pos <- match(names(dim(data[[1]])), names(dim(spei_dat[[1]]))) spei_dat[[1]] <- aperm(spei_dat[[1]], pos) @@ -972,11 +982,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, # with the values repeated each time_dim and leadtime_dim params <- aperm(params, c(2,3,1)) names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') - } else { - if (dim(params)['coef'] != length(coef)) { - stop(paste0("Params array should have 'coef' dimension of length ", - length(coef), ".")) - } } spei <- Apply(data = list(data = data, params = params), @@ -985,13 +990,11 @@ PeriodSPEI <- function(exp, dates_exp, lat, fun = .standardization, coef = coef, leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = ref_period, param_error = param_error, - na.rm = na.rm, + time_dim = time_dim, memb_dim = memb_dim, + ref_period = ref_period, handle_infinity = handle_infinity, + cross_validation = cross_validation, param_error = param_error, + method = method, distribution = distribution, + fit = fit, na.rm = na.rm, output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 2a7aaa6..8e8ab19 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -66,11 +66,12 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same time dimension (specified in 'time_dim') of 'exp' and a -dimension named 'coef' with the length of the coefficients needed for the -used distribution (for 'Gamma' coef dimension is of lenght 2, for -'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a -leadtime dimension (specified in 'leadtime_dim') of length 1.} +to have the following dimensions: same leadtime dimension of 'exp' +(specified in 'leadtime_dim'); time dimension of length 1 (specified in +'time_dim'); and a dimension named 'coef' with the length of the +coefficients needed for the used distribution (for 'Gamma' coef dimension is +of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). It can't +have member dimension (specified in 'memb_dim').} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 85a1427..c6b8075 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -34,6 +34,8 @@ lat <- c(40,40.1) exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) +params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, + longitude = 1, coef = 3)) # dat2 dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) @@ -182,6 +184,8 @@ test_that("2. Output checks", { dates_exp = dates_exp) res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp, standardization = FALSE) + res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = params1) # output dims expect_equal( names(res1), @@ -223,8 +227,25 @@ test_that("2. Output checks", { # memb_dim # lat_dim # accum + res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, + dates_exp = dates_exp, na.rm = TRUE) + expect_equal( + res11$exp[1,2,1,1,][1:4], + c(-0.5553128, 0.4689562, -0.4682003, -0.9956847), + tolerance = 0.0001 + ) # ref_period # params + expect_error( + PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = array(abs(rnorm(100)), dim = dimscor)), + paste0("Parameter 'params' must be a multidimensional array with named ", + "dimensions: 'syear', 'time' and 'coef'.") + ) + expect_equal( + dim(res5$params), + c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) + ) # standarization # cross_validation expect_warning( @@ -245,7 +266,7 @@ test_that("2. Output checks", { ) expect_equal( dim(res_crossval_T$params), - dims + c(syear = 6, time = 3, coef = 3, latitude = 2, longitude = 1) ) # cross_validation = FALSE expect_equal( @@ -286,6 +307,7 @@ test_that("2. Output checks", { res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp, handle_infinity = FALSE) # na.rm - + # ncores }) -- GitLab From 6f95d8e143cbf58ee7db64f2a3082163cb95096a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 5 Jul 2023 17:38:31 +0200 Subject: [PATCH 17/42] Develop SPEI in order that NA from accumulation are not used to compute standardization, then added again NA --- R/PeriodSPEI.R | 81 +++++++++++---- tests/testthat/test-PeriodSPEI.R | 172 ++++++++++++++++++++++++------- 2 files changed, 194 insertions(+), 59 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index f947531..8a0b7e2 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -410,9 +410,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, first_dims <- dims[[1]] all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) if (!all_equal) { - stop("Parameter 'exp' variables needs to have the same dimension names.") + stop("Parameter 'exp' variables need to have the same dimensions.") } + ## exp_cor if (!is.null(exp_cor)) { if (!inherits(exp_cor, 'list')) { @@ -429,7 +430,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, first_dimscor <- dimscor[[1]] all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dimscor, x))) if (!all_equal) { - stop("Parameter 'exp_cor' variables needs to have the same dimension names.") + stop("Parameter 'exp_cor' variables need to have the same dimensions.") } } @@ -437,14 +438,20 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (!is.numeric(lat)) { stop("Parameter 'lat' must be numeric.") } + if (!lat_dim %in% names(dims[[1]])) { + stop("Parameter 'exp' must have 'lat_dim' dimension.") + } if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { stop("Parameter 'lat' needs to have the same length of latitudinal", "dimension of all the variables arrays in 'exp'.") } if (!is.null(exp_cor)) { + if (!lat_dim %in% names(dimscor[[1]])) { + stop("Parameter 'exp_cor' must have 'lat_dim' dimension.") + } if (any(sapply(dimscor, FUN = function(x) x[lat_dim] != length(lat)))) { stop("Parameter 'lat' needs to have the same length of latitudinal", - "dimension of all the variables arrays in 'exp'.") + "dimension of all the variables arrays in 'exp'.") } } @@ -562,7 +569,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, } ## memb_dim if (!is.character(memb_dim) | length(memb_dim) != 1) { - stop("Parameter 'time_dim' must be a character string.") + stop("Parameter 'memb_dim' must be a character string.") } if (!all(sapply(exp, function(x) memb_dim %in% names(dim(x))))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") @@ -592,6 +599,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'dates_exp' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } + if (!time_dim %in% names(dim(dates_exp)) | !leadtime_dim %in% names(dim(dates_exp))) { + stop("Parameter 'dates_exp' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } if (!all(dim(exp[[1]])[c(time_dim, leadtime_dim)] == dim(dates_exp)[c(time_dim, leadtime_dim)])) { stop("Parameter 'dates_exp' needs to have the same length as 'time_dim' ", @@ -607,6 +618,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } + if (!time_dim %in% names(dim(dates_expcor)) | !leadtime_dim %in% names(dim(dates_expcor))) { + stop("Parameter 'dates_expcor' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } if (!all(dim(exp_cor[[1]])[c(time_dim, leadtime_dim)] == dim(dates_expcor)[c(time_dim, leadtime_dim)])) { stop("Parameter 'dates_expcor' needs to have the same length as ", @@ -738,21 +753,21 @@ PeriodSPEI <- function(exp, dates_exp, lat, ncores = ncores) # Standardization if (standardization) { - spei_dat <- .Standardization(data = data_accum, params = params, - accum = accum, time_dim = time_dim, - leadtime_dim = leadtime_dim, - memb_dim = memb_dim, - ref_period = ref_period, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - param_error = param_error, - method = method, distribution = distribution, - fit = fit, na.rm = na.rm, ncores = ncores) - params <- spei_dat$params - pos <- match(names(dim(data[[1]])), names(dim(spei_dat[[1]]))) - spei_dat[[1]] <- aperm(spei_dat[[1]], pos) - - spei_res[[k]] <- spei_dat[[1]] + spei <- .Standardization(data = data_accum, params = params, + accum = accum, time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + param_error = param_error, + method = method, distribution = distribution, + fit = fit, na.rm = na.rm, ncores = ncores) + params <- spei$params + pos <- match(names(dim(data[[1]])), names(dim(spei[[1]]))) + spei[[1]] <- aperm(spei[[1]], pos) + + spei_res[[k]] <- spei[[1]] } else { pos <- match(names(dim(data[[1]])), names(dim(data_accum))) data_accum <- aperm(data_accum, pos) @@ -961,6 +976,18 @@ PeriodSPEI <- function(exp, dates_exp, lat, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = FALSE, ncores = NULL) { + # Remove NA data due to accumulation + if (accum > 1) { + data <- ClimProjDiags::Subset(x = data, along = leadtime_dim, + indices = accum:dim(data)[leadtime_dim], + drop = FALSE) + if (!is.null(params)) { + params <- ClimProjDiags::Subset(x = params, along = leadtime_dim, + indices = accum:dim(params)[leadtime_dim], + drop = FALSE) + } + } + nleadtime <- dim(data)[leadtime_dim] ntime <- dim(data)[time_dim] @@ -995,11 +1022,23 @@ PeriodSPEI <- function(exp, dates_exp, lat, cross_validation = cross_validation, param_error = param_error, method = method, distribution = distribution, fit = fit, na.rm = na.rm, - output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + output_dims = list(spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) - return(spei) # spei is a list of data_spei and params + # Add again NA values due to accumulation + if (accum > 1) { + for (s in 1:length(spei)) { + spei[[s]] <- Apply(data = list(spei[[s]]), target_dims = leadtime_dim, + fun = function(x, accum = 1, leadtime_dim = 'time') { + res <- c(rep(NA, accum-1), x) + dim(res) <- length(res) + names(dim(res)) <- leadtime_dim + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 + } + } + return(spei) } .standardization <- function(data, params, coef, leadtime_dim = 'time', diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index c6b8075..4e42859 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -4,6 +4,19 @@ cube1 <- NULL cube1$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) class(cube1) <- 's2dv_cube' +# cube2 +cube2 <- NULL +cube2$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube2) <- 's2dv_cube' +cube2$coords <- list(lat = 1:2) + +# cube3 +cube3 <- NULL +cube3$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube3) <- 's2dv_cube' +cube3$coords <- list(lat = 1:2) +cube3$attrs$Dates <- as.Date(c(paste0(2010:2014, "-08-16"))) + # dat1 dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) @@ -23,7 +36,7 @@ set.seed(3) expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) + paste0(2010:2015, "-10-16"))) dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), @@ -38,26 +51,26 @@ params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, longitude = 1, coef = 3)) # dat2 -dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) -dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) +dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) +dimscor2 <- c(styear = 1, ftime = 3, lat = 2, lon = 1, member = 15) set.seed(1) -exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims) +exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims2) set.seed(2) -exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims2) set.seed(1) -expcor_tas <- array(rnorm(100, 17.23, 9.19), dim = dimscor) +expcor_tas <- array(rnorm(100, 17.23, 9.19), dim = dimscor2) set.seed(2) -expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) +expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor2) -dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), paste0(2010:2015, "-10-16"))) -dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) -dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), +dates_expcor2 <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), paste0(2020, "-10-16"))) -dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) +dim(dates_expcor2) <- c(sday = 1, sweek = 1, styear = 1, ftime = 3) lat <- c(40,40.1) @@ -67,6 +80,7 @@ exp_cor2 <- list('tas' = expcor_tas, 'prlr' = expcor_prlr) ############################################## test_that("1. Initial checks CST_PeriodSPEI", { + # Check 's2dv_cube' expect_error( CST_PeriodSPEI(exp = NULL), "Parameter 'exp' cannot be NULL." @@ -75,11 +89,25 @@ test_that("1. Initial checks CST_PeriodSPEI", { CST_PeriodSPEI(exp = array(10)), "Parameter 'exp' must be a list of 's2dv_cube' class." ) + # latitude expect_error( CST_PeriodSPEI(exp = list(cube1)), paste0("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") ) + # Dates + expect_error( + CST_PeriodSPEI(exp = list(cube2)), + paste0("Element 'Dates' is not found in 'attrs' list of 'exp'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + ) + expect_error( + CST_PeriodSPEI(exp = list(cube3), exp_cor = list(cube2)), + paste0("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + ) }) ############################################## @@ -95,9 +123,19 @@ test_that("1. Initial checks PeriodSPEI", { "Parameter 'exp' needs to be a named list with the variable names." ) expect_error( - PeriodSPEI(exp = list('tasmax' = array(10))), + PeriodSPEI(exp = list(tasmax = array(10))), "Parameter 'exp' needs to be a list of arrays with dimension names." ) + expect_error( + PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 11)))), + "Parameter 'exp' variables need to have the same dimensions." + ) + expect_error( + PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(ftime = 10)))), + "Parameter 'exp' variables need to have the same dimensions." + ) # exp_cor expect_error( PeriodSPEI(exp = exp1, exp_cor = 1), @@ -112,6 +150,20 @@ test_that("1. Initial checks PeriodSPEI", { PeriodSPEI(exp = exp1, exp_cor = list('tasmax' = array(10))), "Parameter 'exp_cor' needs to be a list of arrays with dimension names." ) + expect_error( + PeriodSPEI(exp = exp1, exp_cor = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 11)))), + "Parameter 'exp_cor' variables need to have the same dimensions." + ) + expect_error( + PeriodSPEI(exp = exp1, lat = 'lat'), + "Parameter 'lat' must be numeric." + ) + expect_error( + PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 10))), lat = 1:2), + "Parameter 'exp' must have 'lat_dim' dimension." + ) # exp (2) expect_warning( PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), @@ -120,55 +172,95 @@ test_that("1. Initial checks PeriodSPEI", { ) # time_dim expect_error( - PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + lat_dim = 'lat', pet_method = 'thornthwaite'), + "Parameter 'time_dim' is not found in 'exp' dimension." ) # leadtime_dim expect_error( - PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, leadtime_dim = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'leadtime_dim' must be a character string." + ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + lat_dim = 'lat', time_dim = 'ftime', pet_method = 'thornthwaite'), + "Parameter 'leadtime_dim' is not found in 'exp' dimension." ) # memb_dim expect_error( - PeriodSPEI(exp = exp1, memb_dim = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, memb_dim = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + lat_dim = 'lat', time_dim = 'ftime', leadtime_dim = 'styear', + pet_method = 'thornthwaite'), + "Parameter 'memb_dim' is not found in 'exp' dimension." ) # lat_dim expect_error( PeriodSPEI(exp = exp1, lat_dim = 1, dates_exp = dates_exp, lat = lat) ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2), + "Parameter 'exp' must have 'lat_dim' dimension." + ) # accum expect_error( - PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat), + "Cannot compute accumulation of 10 months because loaded data has only 3 months." ) # standardization expect_error( - PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat), + "Parameter 'standardization' must be a logical value." ) # param_error expect_error( - PeriodSPEI(exp = exp1, param_error = TRUE, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, param_error = TRUE, dates_exp = dates_exp, lat = lat), + "Parameter 'param_error' must be a numeric value." ) # handle_infinity expect_error( - PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'handle_infinity' must be a logical value." ) # cross_validation expect_error( - PeriodSPEI(exp = exp1, cross_validation = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, cross_validation = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'cross_validation' must be a logical value." ) # method expect_error( - PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat), + paste0("Parameter 'method' must be a character string containing one ", + "of the following methods: 'parametric' or 'non-parametric'.") ) # distribution expect_error( - PeriodSPEI(exp = exp1, distribution = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, distribution = 1, dates_exp = dates_exp, lat = lat), + paste0("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") ) # fit expect_error( - PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat), + paste0("Parameter 'fit' must be a character string containing one of ", + "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") ) # ncores expect_error( - PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat), + "Parameter 'ncores' must be a positive integer." ) }) @@ -182,10 +274,6 @@ test_that("2. Output checks", { standardization = FALSE) res3 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp) - res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, standardization = FALSE) - res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = params1) # output dims expect_equal( names(res1), @@ -199,10 +287,6 @@ test_that("2. Output checks", { names(res3), c('exp', 'params') ) - expect_equal( - names(res4), - c('exp') - ) expect_equal( dim(res1[[1]]), dims @@ -222,10 +306,6 @@ test_that("2. Output checks", { # exp # exp_cor # pet - # time_dim - # leadtime_dim - # memb_dim - # lat_dim # accum res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, dates_exp = dates_exp, na.rm = TRUE) @@ -236,6 +316,8 @@ test_that("2. Output checks", { ) # ref_period # params + res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = params1) expect_error( PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, params = array(abs(rnorm(100)), dim = dimscor)), @@ -247,6 +329,16 @@ test_that("2. Output checks", { c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) ) # standarization + res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, standardization = FALSE) + expect_equal( + names(res4), + c('exp') + ) + expect_equal( + dim(res4$exp), + c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + ) # cross_validation expect_warning( PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, @@ -281,9 +373,13 @@ test_that("2. Output checks", { res6 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, pet_method = c('hargreaves_modified', 'hargreaves')) + + # time_dim, leadtime_dim, memb_dim, lat_dim res7 <- PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - pet_method = c('thornthwaite', 'thornthwaite')) + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + pet_method = c('thornthwaite', 'thornthwaite'), + lat_dim = 'lat', time_dim = 'styear', + leadtime_dim = 'ftime', memb_dim = 'member') # method - ok res8 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, -- GitLab From 443e5075e9a85a59a4974de5d65469d29e09a630 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 20 Jul 2023 18:16:56 +0200 Subject: [PATCH 18/42] Add new function PeriodPET, improve documentation and add unit tests --- R/PeriodPET.R | 297 +++++++++++++ R/PeriodSPEI.R | 726 ++++++++++++------------------- tests/testthat/test-PeriodPET.R | 178 ++++++++ tests/testthat/test-PeriodSPEI.R | 163 +++++-- 4 files changed, 868 insertions(+), 496 deletions(-) create mode 100644 R/PeriodPET.R create mode 100644 tests/testthat/test-PeriodPET.R diff --git a/R/PeriodPET.R b/R/PeriodPET.R new file mode 100644 index 0000000..fa4079c --- /dev/null +++ b/R/PeriodPET.R @@ -0,0 +1,297 @@ +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. Reference evapotranspiration (ETo) is the amount of +#'evaporation and transpiration from a reference vegetation of grass. They are +#'usually considered equivalent. This set of functions calculate PET or ETo +#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#' +#'@param data A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +#' Variable 'prlr' is always needed. The units for temperature variables +#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +#' precipitation ('prlr') need to be in mm/month. +#'@param dates An array of temporal dimensions containing the Dates of +#' 'exp'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'exp'. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tasmin and tasmax; and 'thornthwaite', that requires variable +#' 'tas'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#' +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#' +#'lat <- c(40) +#' +#'exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#' +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@import SPEI +#'@import lubridate +#'@import multiApply +#'@export +PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + + # Initial checks + ## data + if (!inherits(data, 'list')) { + stop("Parameter 'data' needs to be a named list with the needed variables.") + } + if (is.null(names(data))) { + stop("Parameter 'data' needs to be a named list with the variable names.") + } + if (any(sapply(data, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'data' needs to be a list of arrays with dimension names.") + } + dims <- lapply(data, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'data' variables need to have the same dimensions.") + } + # lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be numeric.") + } + if (!lat_dim %in% names(dims[[1]])) { + stop("Parameter 'data' must have 'lat_dim' dimension.") + } + if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'data'.") + } + + # data (2) + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(data))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") + pet_method <- 'hargreaves_modified' + } + } else if (all(c('tasmin', 'tasmax') %in% names(data))) { + if (!(pet_method %in% c('hargreaves'))) { + warning("Parameter 'pet_method' will be set as 'hargreaves'.") + pet_method <- 'hargreaves' + } + } else if (c('tas') %in% names(data)) { + # thornthwaite: 'tas' (mean), 'lat' + if (!(pet_method == 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method <- 'thornthwaite' + } + } else { + stop("Parameter 'data' needs to be a named list with accepted ", + "variable names. See documentation.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(data, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!all(sapply(data, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + ## lat_dim + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!all(sapply(data, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'data' dimension.") + } + # dates + if (is.null(dates)) { + stop("Parameter 'dates' is missing, dates must be provided.") + } + if (!(is.Date(dates)) & !(is.POSIXct(dates))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (!all(dim(data[[1]])[c(time_dim, leadtime_dim)] == + dim(dates)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates' needs to have the same length as 'time_dim' ", + "and 'leadtime_dim' as 'data'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # complete dates + dates_monthly <- .datesmask(dates) + + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) + + # extract mask of NA locations to return to NA the final result + mask_na <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + varnames <- c('tasmax', 'tasmin') + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + varnames <- c('tasmax', 'tasmin', 'prlr') + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 + mask_na[which(is.na(data$prlr))] <- 0 + } else if (pet_method == 'thornthwaite') { + varnames <- c('tas') + mask_na[which(is.na(data$tas))] <- 0 + } + + # replace NA with 0 + for (dd in 1:length(data)) { + data[[dd]][which(is.na(data[[dd]]))] <- 0 + } + + # prepare data + target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) + pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), + fun = .pet, + dates_monthly = dates_monthly, pet_method = pet_method, + leadtime_dim = leadtime_dim, time_dim = time_dim, + output_dims = c(leadtime_dim, time_dim), + ncores = ncores)$output1 + # reorder dims in pet_estimated + pos <- match(names(dim(data[[1]])), names(dim(pet))) + pet <- aperm(pet, pos) + + # restore original NAs from mask_na + pet[which(mask_na == 0)] <- NA + + return(pet) +} + +.pet <- function(lat_mask, data2, data3 = NULL, data4 = NULL, + dates_monthly, pet_method = 'hargreaves', + leadtime_dim = 'time', time_dim = 'syear') { + + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = FALSE, verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + + if (pet_method == 'hargreaves_modified') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, + verbose = FALSE) + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + + if (pet_method == 'thornthwaite') { + pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, + verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + return(pet) +} + + +.datesmask <- function(dates) { + ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) + end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) + daily <- as.Date(ini:end) + monthly <- daily[which(lubridate::day(daily) == 1)] + dates_mask <- array(0, dim = length(monthly)) + for (dd in 1:length(dates)) { + ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_mask[ii] <- 1 + } + return(dates_mask) +} \ No newline at end of file diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 8a0b7e2..9bc1660 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,14 +1,42 @@ #'Compute the Standardised Precipitation-Evapotranspiration Index -#' +#' #'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) #'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with #'respect to normal conditions in a variety of natural and managed systems such -#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -#'using monthly (or weekly) precipitation as the input data. The SPEI uses the -#'monthly (or weekly) difference between precipitation and pet. This represents -#'a simple climatic water balance which is calculated at different time scales -#'to obtain the SPEI. +#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +#'is to compare the highest possible evapotranspiration with the current water +#'availability. The SPEI uses the monthly (or weekly) difference between +#'precipitation and potential evapotranspiration. This represents a simple +#'climatic water balance which is calculated at different time scales to obtain +#'the SPEI. This function is build to work be compatible with other tools in +#'that work with 's2dv_cube' object class. The input data must be this object +#'class. If you don't work with 's2dv_cube', see PeriodSPEI. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. On the one hand, the model to be used to calculate potential +#'evapotranspiration is specified with the pet_method parameter (hargreaves, +#'hargraves modified or thornwhite). On the other hand, to choose the time scale +#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +#'accum parameter, where you must indicate the number of time steps you want to +#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'elapsed time steps, there will be no complete accumulations until reaching the +#'time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. Also, there is a parameter to specify if the +#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +#'probability distribution to transform the original values to standardized +#'units that are comparable in space and time and at different SPEI time scales. +#'The na.rm parameter is a logical parameter used to decide whether to remove +#'the NA values from the data before doing the calculation. It must be taken +#'into account that if na.rm == FALSE and there is some NA value in the specific +#'coordinates which the SPEI is computed, standardization cannot be carried out +#'for those coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. However, when na.rm == TRUE, if the amount of data for +#'those specific coordinates is smaller than 4, it will not be possible to carry +#'out because we will not have enough data and the result will be also filled +#'with NAs for that coordinates. #' #'@param exp A named list with the needed \code{s2dv_cube} objects containing #' the seasonal forecast experiment in the data element for each variable. @@ -32,7 +60,9 @@ #'@param lat_dim A character string indicating the name of the latitudinal #' dimension. By default it is set by 'latitude'. #'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -68,33 +98,31 @@ #' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: -#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -#' default. -#'@param fit A character string indicating the name of the method used for -#' computing the distribution function parameters The accepteed names are: -#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). #'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). #'@param na.rm A logical value indicating whether NA values should be removed -#' from data. It is FALSE by default. +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' -#'@return A list with elements: -#'\itemize{ -#' \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' -#' in element data from 'exp' array with the same dimensions as 'exp'. -#' If it is FALSE, it is an array with the accumulated values of PET -#' minus 'prlr' data.} -#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not -#' NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in -#' element 'data'. If 'standarization' is FALSE, only the accumulated -#' values of PET minus 'prlr' is returned.} -#' \item{'params', returned if 'standarization' is TRUE, it contains the -#' parameters used for the standarization of 'exp' that are used for -#' computing the 'SPEI' for 'exp_cor'.} -#' } +#'@return An 's2dv_cube' object containing the SPEI multidimensional array in +#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +#'will be provided. The parameters of the standardization will only be returned +#'if 'return_params' is TRUE. The SPEI will only be computed if +#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic +#'water balance (precipitation minus evapotranspiration) will be returned. The +#'resultant arrays will have the same dimensions as the initial input data. The +#'other elements in the 's2dv_cube' will be updated with the combined +#'information of the input data arrays. #' #'@examples #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, @@ -147,9 +175,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm', + distribution = 'log-Logistic', param_error = -9999, handle_infinity = FALSE, - na.rm = FALSE, ncores = NULL) { + return_params = FALSE, na.rm = FALSE, + ncores = NULL) { # Check 's2dv_cube' if (is.null(exp)) { @@ -199,65 +228,98 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, standardization = standardization, cross_validation = cross_validation, pet_method = pet_method, method = method, - distribution = distribution, fit = fit, - param_error = param_error, - handle_infinity = handle_infinity, na.rm = na.rm, + distribution = distribution, + param_error = param_error, handle_infinity = handle_infinity, + return_params = return_params, na.rm = na.rm, ncores = ncores) - if (is.null(exp_cor)) { + + if (!is.null(exp_cor)) { + source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) source_files <- lapply(exp, function(x) {x$attrs$source_files}) - suppressWarnings( - res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), - Dates = exp[[1]]$attrs$Dates, - source_files = source_files, - when = Sys.time()) - ) - if (standardization) { - return(list(exp = res_exp, params = res[[2]])) - } else { - return(res_exp) - } + source_files <- c(exp = source_files, exp_cor = source_files_expcor) + coords <- exp_cor[[1]]$coords + Dates <- exp_cor[[1]]$attrs$Dates + metadata <- exp_cor[[1]]$attrs$Variable$metadata + metadata_names <- names(metadata) } else { - source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) + source_files <- lapply(exp, function(x) {x$attrs$source_files}) + coords <- exp[[1]]$coords + Dates <- exp[[1]]$attrs$Dates + metadata <- exp[[1]]$attrs$Variable$metadata + metadata_names <- names(metadata) + } + + if (standardization) { + varname <- 'SPEI' + } else { + varname <- 'Precipitation minus accumulated PET' + } + + if (return_params & standardization) { + metadata_names <- intersect(names(dim(res[[1]])), metadata_names) suppressWarnings( - res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), - Dates = exp[[1]]$attrs$Dates, - source_files = source_files_exp, - when = Sys.time()) + res[[1]] <- CSTools::s2dv_cube(data = res[[1]], coords = coords, + varName = varname, + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) ) - source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) + return(list(spei = res[[1]], params = res[[2]])) + } else { + metadata_names <- intersect(names(dim(res)), metadata_names) suppressWarnings( - res_expcor <- CSTools::s2dv_cube(data = res[[2]], coords = exp_cor[[1]]$coords, - varName = c('SPEI'), - Dates = exp_cor[[1]]$attrs$Dates, - source_files = source_files_expcor, - when = Sys.time()) + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = varname, + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) ) - - if (standardization) { - return(list(exp = res_exp, exp_cor = res_expcor, params = res[[3]])) - } else { - return(list(exp = res_exp, exp_cor = res_expcor)) - } + return(res) } - return(res) } #'Compute the Standardised Precipitation-Evapotranspiration Index -#' +#' #'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) #'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with #'respect to normal conditions in a variety of natural and managed systems such -#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -#'using monthly (or weekly) precipitation as the input data. The SPEI uses the -#'monthly (or weekly) difference between precipitation and pet. This represents -#'a simple climatic water balance which is calculated at different time scales -#'to obtain the SPEI. +#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +#'is to compare the highest possible evapotranspiration with the current water +#'availability. The SPEI uses the monthly (or weekly) difference between +#'precipitation and potential evapotranspiration. This represents a simple +#'climatic water balance which is calculated at different time scales to obtain +#'the SPEI. #' -#'@param exp A named list with the needed \code{s2dv_cube} objects containing +#'Next, some specifications for the calculation of this indicator will be +#'discussed. On the one hand, the model to be used to calculate potential +#'evapotranspiration is specified with the pet_method parameter (hargreaves, +#'hargraves modified or thornwhite). On the other hand, to choose the time scale +#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +#'accum parameter, where you must indicate the number of time steps you want to +#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'elapsed time steps, there will be no complete accumulations until reaching the +#'time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. Also, there is a parameter to specify if the +#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +#'probability distribution to transform the original values to standardized +#'units that are comparable in space and time and at different SPEI time scales. +#'The na.rm parameter is a logical parameter used to decide whether to remove +#'the NA values from the data before doing the calculation. It must be taken +#'into account that if na.rm == FALSE and there is some NA value in the specific +#'coordinates which the SPEI is computed, standardization cannot be carried out +#'for those coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. However, when na.rm == TRUE, if the amount of data for +#'those specific coordinates is smaller than 4, it will not be possible to carry +#'out because we will not have enough data and the result will be also filled +#'with NAs for that coordinates. +#' +#'@param exp A named list with multidimensional array objects containing #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted @@ -269,7 +331,7 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param dates_exp An array of temporal dimensions containing the Dates of #' 'exp'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'exp'. -#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each +#'@param exp_cor A named list with multidimensional array objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. #'@param dates_expcor An array of temporal dimensions containing the Dates of @@ -283,8 +345,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' computed for individual members. #'@param lat_dim A character string indicating the name of the latitudinal #' dimension. By default it is set by 'latitude'. -#'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. +#'@param accum accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -318,32 +382,31 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' If can be: 'parametric' or 'non-parametric'. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: -#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -#' default. -#'@param fit A character string indicating the name of the method used for -#' computing the distribution function parameters The accepteed names are: -#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). #'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. #'@param na.rm A logical value indicating whether NA values should be removed -#' from data. It is FALSE by default. +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' -#'@return A list with elements: -#'\itemize{ -#' \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from -#' 'exp' array with the same dimensions as 'exp'. If it is FALSE, it -#' is an array with the accumulated values of PET minus 'prlr' data.} -#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not -#' NULL. It is an array with the SPEI data from 'exp_cor'. If -#' 'standarization' is FALSE, only the accumulated values of PET minus -#' 'prlr' is returned.} -#' \item{'params', returned if 'standarization' is TRUE, it contains the -#' parameters used for the standarization of 'exp' that are used for -#' computing the 'SPEI' for 'exp_cor'.} -#'} +#'@return An 's2dv_cube' object containing the SPEI multidimensional array in +#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +#'will be provided. The parameters of the standardization will only be returned +#'if 'return_params' is TRUE. The SPEI will only be computed if +#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic +#'water balance (precipitation minus evapotranspiration) will be returned. The +#'resultant arrays will have the same dimensions as the initial input data. #' #'@examples #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, @@ -391,9 +454,9 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm', + distribution = 'log-Logistic', param_error = -9999, handle_infinity = FALSE, - na.rm = FALSE, ncores = NULL) { + return_params = FALSE, na.rm = FALSE, ncores = NULL) { # Initial checks ## exp @@ -413,7 +476,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'exp' variables need to have the same dimensions.") } - ## exp_cor if (!is.null(exp_cor)) { if (!inherits(exp_cor, 'list')) { @@ -433,53 +495,24 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'exp_cor' variables need to have the same dimensions.") } } - - # lat - if (!is.numeric(lat)) { - stop("Parameter 'lat' must be numeric.") - } - if (!lat_dim %in% names(dims[[1]])) { - stop("Parameter 'exp' must have 'lat_dim' dimension.") - } - if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { - stop("Parameter 'lat' needs to have the same length of latitudinal", - "dimension of all the variables arrays in 'exp'.") - } - if (!is.null(exp_cor)) { - if (!lat_dim %in% names(dimscor[[1]])) { - stop("Parameter 'exp_cor' must have 'lat_dim' dimension.") - } - if (any(sapply(dimscor, FUN = function(x) x[lat_dim] != length(lat)))) { - stop("Parameter 'lat' needs to have the same length of latitudinal", - "dimension of all the variables arrays in 'exp'.") - } - } - # Variable checks ## exp (2) pet <- vector("list", 2) - if (is.null(pet_exp)) { - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!(pet_method[1] %in% c('hargreaves_modified', 'hargreaves'))) { - warning("Parameter 'pet_method' needs to be 'hargreaves' or ", - "'hargreaves_modified'. It is set to 'hargreaves'.") - pet_method[1] <- 'hargreaves' - } - } else if (all(c('tas', 'prlr') %in% names(exp))) { - # thornthwaite: 'tas' (mean), 'lat' - if (!(pet_method[1] == 'thornthwaite')) { - warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") - pet_method[1] <- 'thornthwaite' - } - } else { - stop("Parameter 'exp' needs to be a named list with accepted ", - "variable names if 'pet_exp' is not provided. See documentation.") + if (!('prlr' %in% names(exp))) { + stop("Variable 'prlr' is not included in 'exp'.") + } + ## exp_cor (2) + if (!is.null(exp_cor)) { + if (!('prlr' %in% names(exp_cor))) { + stop("Variable 'prlr' is not included in 'exp_cor'.") } - } else { - if (!('prlr' %in% names(exp))) { - stop("Variable 'prlr' is not included in 'exp'.") + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) } + } + + ## pet_exp + if (!is.null(pet_exp)) { if (length(dim(exp[['prlr']])) != length(dim(pet_exp))) { stop("Parameter 'pet_exp' must have the same length of all the ", "dimensions as variable 'prlr' in 'exp'.") @@ -493,53 +526,27 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_exp <- aperm(pet_exp, pos) } pet[[1]] <- pet_exp + } else if (is.null(dates_exp)) { + stop("Parameter 'dates_exp' must be provided.") } - ## exp_cor (2) + ## pet_expcor if (!is.null(exp_cor)) { - if (is.null(pet_expcor)) { - if (length(exp_cor) < 1) { - exp_cor <- NULL - } else { - if (length(pet_method) == 1) { - pet_method <- rep(pet_method, 2) - } + if (!is.null(pet_expcor)) { + if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") } - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp_cor))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!(pet_method[2] %in% c('hargreaves_modified', 'hargreaves'))) { - warning("Parameter 'pet_method' needs to be 'hargreaves' or ", - "'hargreaves_modified'. It is set to 'hargreaves'.") - pet_method[2] <- 'hargreaves' - } - } else if (all(c('tas', 'prlr') %in% names(exp_cor))) { - # thornthwaite: 'tas' (mean), 'lat' - if (!(pet_method[2] == 'thornthwaite')) { - warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") - pet_method[2] <- 'thornthwaite' - } - } else { - stop("Parameter 'exp_cor' needs to be a list with the needed ", - "variables if 'pet_expcor' is not provided.") - } - } else { - if (!is.null(exp_cor)) { - if (!('prlr' %in% names(exp_cor))) { - stop("Variable 'prlr' is not included in 'exp_cor'.") - } - if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { - stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") - } - if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { - stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") - } - if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { - pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) - pet_expcor <- aperm(pet_expcor, pos) - } + if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") } + if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { + pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) + pet_expcor <- aperm(pet_expcor, pos) + } pet[[2]] <- pet_expcor + } else if (is.null(dates_expcor)) { + stop("Parameter 'dates_expcor' must be provided.") } } @@ -579,6 +586,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") } } + ## lat_dim if (!is.character(lat_dim) | length(lat_dim) != 1) { stop("Parameter 'lat_dim' must be a character string.") @@ -591,7 +599,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") } } - # dates + + ## dates if (is.null(dates_exp)) { stop("Parameter 'dates_exp' is missing, dates must be provided.") } @@ -628,12 +637,41 @@ PeriodSPEI <- function(exp, dates_exp, lat, "'time_dim' and 'leadtime_dim' as 'exp_cor'.") } } + ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } + ## ref_period + if (!is.null(ref_period)) { + if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% year(dates_exp))) { + warning("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, + indices = 1)) + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## standardization if (!is.logical(standardization)) { stop("Parameter 'standardization' must be a logical value.") @@ -666,17 +704,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'method' must be a character string containing one of ", "the following methods: 'parametric' or 'non-parametric'.") } + ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { stop("Parameter 'distribution' must be a character string containing one ", "of the following distributions: 'log-Logistic', 'Gamma' or ", "'PearsonIII'.") } - ## fit - if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop("Parameter 'fit' must be a character string containing one of ", - "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") - } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | @@ -684,6 +718,12 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'ncores' must be a positive integer.") } } + + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## params if (!is.null(params)) { if (!is.numeric(params)) { @@ -706,26 +746,14 @@ PeriodSPEI <- function(exp, dates_exp, lat, } } - # Complete dates - dates_monthly <- NULL - k = 0 - for (dates in .return2list(dates_exp, dates_expcor)) { - k = k + 1 - ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) - end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) - dates_complete_daily <- as.Date(ini_date:end_date) - - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly[[k]] <- array(0, dim = length(dates_complete_monthly)) - for (dd in 1:length(dates)) { - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), - lubridate::month(dates[dd]), - 01, sep = '-'))) - dates_monthly[[k]][ii] <- 1 - } + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") } + # Complete dates + dates_monthly <- .return2list(dates_exp, dates_expcor) + # Compute PeriodSPEI k = 0 spei_res <- NULL @@ -735,238 +763,60 @@ PeriodSPEI <- function(exp, dates_exp, lat, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet[[k]]) | computed_pet) { - pet[[k]] <- .Evapotranspiration(data = data, - dates_monthly = dates_monthly[[k]], - lat = lat, pet_method = pet_method[k], - time_dim = time_dim, - leadtime_dim = leadtime_dim, - lat_dim = lat_dim, na.rm = na.rm, - ncores = ncores) + pet[[k]] <- PeriodPET(data = data, dates = dates_monthly[[k]], + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, leadtime_dim = leadtime_dim, + lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) computed_pet <- TRUE } # Accumulation diff_p_pet <- data$prlr - pet[[k]] - data_accum <- .Accumulation(data = diff_p_pet, - dates_monthly = dates_monthly[[k]], accum = accum, - time_dim = time_dim, leadtime_dim = leadtime_dim, - ncores = ncores) + + accumulated <- Apply(data = list(diff_p_pet), + target_dims = list(data = c(leadtime_dim, time_dim)), + output_dims = c(leadtime_dim, time_dim), + fun = function(data, accum) { + return(rollapply(data = data, width = accum, FUN = sum)) + }, accum = accum, ncores = ncores)$output1 + # Standardization if (standardization) { - spei <- .Standardization(data = data_accum, params = params, + spei <- .Standardization(data = accumulated, params = params, accum = accum, time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, - ref_period = ref_period, + memb_dim = memb_dim, ref_period = ref_period, cross_validation = cross_validation, handle_infinity = handle_infinity, param_error = param_error, method = method, distribution = distribution, - fit = fit, na.rm = na.rm, ncores = ncores) + na.rm = na.rm, ncores = ncores) + ref_period <- NULL params <- spei$params - pos <- match(names(dim(data[[1]])), names(dim(spei[[1]]))) - spei[[1]] <- aperm(spei[[1]], pos) - - spei_res[[k]] <- spei[[1]] + spei_res <- spei[[1]] } else { - pos <- match(names(dim(data[[1]])), names(dim(data_accum))) - data_accum <- aperm(data_accum, pos) - - spei_res[[k]] <- data_accum + spei_res <- accumulated } + spei_res <- Apply(data = list(spei_res), target_dims = leadtime_dim, + fun = function(x, accum, leadtime_dim) { + res <- c(rep(NA, accum-1), x) + dim(res) <- length(res) + names(dim(res)) <- leadtime_dim + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 + pos <- match(names(dim(data[[1]])), names(dim(spei_res))) + spei_res <- aperm(spei_res, pos) } if (standardization) { - spei_res[[k+1]] <- params - if (is.null(exp_cor)) { - names(spei_res) <- c('exp', 'params') + if (return_params) { + return(list(spei = spei_res, params = params)) } else { - names(spei_res) <- c('exp', 'exp_cor', 'params') + return(spei_res) } } else { - if (is.null(exp_cor)) { - names(spei_res) <- c('exp') - } else { - names(spei_res) <- c('exp', 'exp_cor') - } - } - return(spei_res) -} - -.Evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', - time_dim = 'syear', leadtime_dim = 'time', - lat_dim = 'latitude', na.rm = FALSE, - ncores = NULL) { - - lat_mask <- array(lat, dim = c(1, length(lat))) - names(dim(lat_mask)) <- c('dat', lat_dim) - - # extract mask of NA locations to return to NA the final result - mask_na <- array(1, dim = dim(data[[1]])) - if (pet_method == 'hargreaves') { - varnames <- c('tasmax', 'tasmin') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 - } else if (pet_method == 'hargreaves_modified') { - varnames <- c('tasmax', 'tasmin', 'prlr') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 - mask_na[which(is.na(data$prlr))] <- 0 - } else if (pet_method == 'thornthwaite') { - varnames <- c('tas') - mask_na[which(is.na(data$tas))] <- 0 - } - - # replace NA with 0 - for (dd in 1:length(data)) { - data[[dd]][which(is.na(data[[dd]]))] <- 0 - } - - # prepare data - target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) - pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), - target_dims = c(list(lat_mask = 'dat'), target_dims_data), - fun = .evapotranspiration, - dates_monthly = dates_monthly, pet_method = pet_method, - leadtime_dim = leadtime_dim, time_dim = time_dim, - output_dims = c(leadtime_dim, time_dim), - ncores = ncores)$output1 - # reorder dims in pet_estimated - pos <- match(names(dim(data[[1]])), names(dim(pet))) - pet <- aperm(pet, pos) - - # restore original NAs from mask_na - pet[which(mask_na == 0 )] <- NA - - return(pet) -} - -.evapotranspiration <- function(lat_mask, data2, data3 = NULL, data4 = NULL, - dates_monthly, pet_method = 'hargreaves', - leadtime_dim = 'time', time_dim = 'syear') { - - dims <- dim(data2) - - # create a vector from data but adding 0 to achive complete time series - # of the considered period - # (starting in January of the first year) so that the solar radiation - # estimation is computed in each case for the correct month - - if (!is.null(data2)) { - data_tmp <- as.vector(data2) - data2 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data2[dd] <- data_tmp[count] - count <- count + 1 - } - } - rm(data_tmp) + return(spei_res) } - if (!is.null(data3)) { - data_tmp <- as.vector(data3) - data3 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data3[dd] <- data_tmp[count] - count <- count + 1 - } - } - rm(data_tmp) - } - if (!is.null(data4)) { - data_tmp <- as.vector(data4) - data4 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data4[dd] <- data_tmp[count] - count <- count + 1 - } - } - rm(data_tmp) - } - if (pet_method == 'hargreaves') { - pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = na.rm, verbose = FALSE) - # line to return the vector to the size of the actual original data - pet <- pet[which(dates_monthly == 1)] - pet <- array(pet, dim = dims) - } - - if (pet_method == 'hargreaves_modified') { - pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = na.rm, - verbose = FALSE) - pet <- pet[which(dates_monthly == 1)] - pet <- array(pet, dim = dims) - } - - if (pet_method == 'thornthwaite') { - pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = na.rm, - verbose = FALSE) - # line to return the vector to the size of the actual original data - pet <- pet[which(dates_monthly == 1)] - pet <- array(pet, dim = dims) - } - return(pet) -} - - -.Accumulation <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time', - ncores = NULL) { - - accumulated <- Apply(data = list(data), - target_dims = list(data = c(leadtime_dim, time_dim)), - dates_monthly = dates_monthly, - accum = accum, - output_dims = c(leadtime_dim, time_dim), - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = .accumulation, - ncores = ncores)$output1 - - pos <- match(names(dim(accumulated)), names(dim(data))) - accumulated <- aperm(accumulated, pos) - - return(accumulated) - -} - -.accumulation <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time') { - - # data:[time, syear] - dims <- dim(data) - - data_vector <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data_vector[dd] <- as.vector(data)[count] - count <- count + 1 - } - } - # Accumulation at different timescales - # rollapply {zoo} A generic function for applying a function to rolling - # margins of an array. - data_sum_x <- rollapply(data_vector, accum, sum) - # adds as many NAs as needed at the begining to account for the months that - # cannot be added (depends on accu) and so that the position in the vector - # corresponds to the accumulated of the previous months (instead of the - # accumulated of the next months) - data_sum_x <- c(rep(NA, accum-1), data_sum_x) - # discard the months that don't appear in the original data - data_sum_x <- data_sum_x[which(dates_monthly == 1)] - accum_result <- array(data_sum_x, dim = c(dims)) - # replace by NA when the accumulation corresponds to months that where not - # present in the original data - if (accum > 1) { - accum_result[1:(accum-1), ] <- NA - } - return(accum_result) } .Standardization <- function(data, params = NULL, accum = 1, time_dim = 'syear', @@ -974,19 +824,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period = NULL, cross_validation = FALSE, handle_infinity = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', na.rm = FALSE, ncores = NULL) { - - # Remove NA data due to accumulation - if (accum > 1) { - data <- ClimProjDiags::Subset(x = data, along = leadtime_dim, - indices = accum:dim(data)[leadtime_dim], - drop = FALSE) - if (!is.null(params)) { - params <- ClimProjDiags::Subset(x = params, along = leadtime_dim, - indices = accum:dim(params)[leadtime_dim], - drop = FALSE) - } - } + na.rm = FALSE, ncores = NULL) { nleadtime <- dim(data)[leadtime_dim] ntime <- dim(data)[time_dim] @@ -1021,23 +859,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period = ref_period, handle_infinity = handle_infinity, cross_validation = cross_validation, param_error = param_error, method = method, distribution = distribution, - fit = fit, na.rm = na.rm, + na.rm = na.rm, output_dims = list(spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) - - # Add again NA values due to accumulation - if (accum > 1) { - for (s in 1:length(spei)) { - spei[[s]] <- Apply(data = list(spei[[s]]), target_dims = leadtime_dim, - fun = function(x, accum = 1, leadtime_dim = 'time') { - res <- c(rep(NA, accum-1), x) - dim(res) <- length(res) - names(dim(res)) <- leadtime_dim - return(res) - }, accum = accum, leadtime_dim = leadtime_dim)$output1 - } - } return(spei) } @@ -1046,7 +871,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period = NULL, handle_infinity = FALSE, cross_validation = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', na.rm = FALSE) { + na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] # params: [time_dim, leadtime_dim, 'coef'] @@ -1068,7 +893,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, spei_mod <- array(NA, dim(data)) # if the data [time, sdate, memb] has no variability it will raise an error # further down, so we assign a value to the result and skip the step - } else if (any(is.na(data)) && !na.rm) { + } else if (anyNA(data) && !na.rm) { spei_mod <- array(NA, dim(data)) } else if (var(data, na.rm = T) == 0) { spei_mod <- array(param_error, dim(data)) @@ -1090,15 +915,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} - - spei_data <- .std(data = data_subset, coef = coef, - ntime = ntime, nmemb = nmemb, - method = method, distribution = distribution, - fit = fit, na.rm = na.rm, + spei_data <- .std(data = data_subset, coef = coef, ntime = ntime, + nmemb = nmemb, method = method, + distribution = distribution, na.rm = na.rm, ref.start = ref.start, ref.end = ref.end, - params = params_tmp, - handle_infinity = handle_infinity, + params = params_tmp, handle_infinity = handle_infinity, cross_validation = cross_validation) + spei_mod[ff, , ] <- spei_data[[1]] params_ff <- spei_data[[2]] # lengthen dimension coef of params_ff in case it doesn't match the @@ -1115,12 +938,15 @@ PeriodSPEI <- function(exp, dates_exp, lat, } .std <- function(data, coef, ntime, nmemb, method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = FALSE, + distribution = 'log-Logistic', na.rm = FALSE, ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { # data: [time_dim, memb_dim] # params: NULL or [(ntime), coef] + + fit = 'ub-pwm' # hard-coded + if (method == 'non-parametric') { bp <- matrix(0, length(data), 1) for (i in 1:length(data)) { diff --git a/tests/testthat/test-PeriodPET.R b/tests/testthat/test-PeriodPET.R new file mode 100644 index 0000000..0b7cf57 --- /dev/null +++ b/tests/testthat/test-PeriodPET.R @@ -0,0 +1,178 @@ +############################################## + +# dat1 +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + +set.seed(1) +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +set.seed(2) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +set.seed(3) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) + +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) + +# dat2 +dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) + +set.seed(1) +exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims2) +set.seed(2) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims2) + +dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) + +lat <- c(40,40.1) + +exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) + +############################################## + +# test_that("1. Initial checks CST_PeriodSPEI", { +# # Check 's2dv_cube' +# expect_error( +# CST_PeriodSPEI(exp = NULL), +# "Parameter 'exp' cannot be NULL." +# ) +# expect_error( +# CST_PeriodSPEI(exp = array(10)), +# "Parameter 'exp' must be a list of 's2dv_cube' class." +# ) +# # latitude +# expect_error( +# CST_PeriodSPEI(exp = list(cube1)), +# paste0("Spatial coordinate names of parameter 'exp' do not match any ", +# "of the names accepted by the package.") +# ) +# # Dates +# expect_error( +# CST_PeriodSPEI(exp = list(cube2)), +# paste0("Element 'Dates' is not found in 'attrs' list of 'exp'. ", +# "See 's2dv_cube' object description in README file for more ", +# "information.") +# ) +# expect_error( +# CST_PeriodSPEI(exp = list(cube3), exp_cor = list(cube2)), +# paste0("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", +# "See 's2dv_cube' object description in README file for more ", +# "information.") +# ) +# }) + +############################################## + +test_that("1. Initial checks PeriodSPEI", { + # data + expect_error( + PeriodPET(data = NULL), + "Parameter 'data' needs to be a named list with the needed variables." + ) + expect_error( + PeriodPET(data = list(1)), + "Parameter 'data' needs to be a named list with the variable names." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10))), + "Parameter 'data' needs to be a list of arrays with dimension names." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 11)))), + "Parameter 'data' variables need to have the same dimensions." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(ftime = 10)))), + "Parameter 'data' variables need to have the same dimensions." + ) + # lat + expect_error( + PeriodPET(data = exp1, lat = 'lat'), + "Parameter 'lat' must be numeric." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 10))), lat = 1:2), + "Parameter 'data' must have 'lat_dim' dimension." + ) + # data (2) + expect_warning( + PeriodPET(data = exp1, pet_method = '1', dates = dates_exp, lat = lat), + paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") + ) + # time_dim + expect_error( + PeriodPET(data = exp1, time_dim = 1, dates = dates_exp, lat = lat), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + lat_dim = 'lat', pet_method = 'thornthwaite'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + # leadtime_dim + expect_error( + PeriodPET(data = exp1, leadtime_dim = 1, dates = dates_exp, lat = lat), + "Parameter 'leadtime_dim' must be a character string." + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + lat_dim = 'lat', time_dim = 'ftime', pet_method = 'thornthwaite'), + "Parameter 'leadtime_dim' is not found in 'data' dimension." + ) + # lat_dim + expect_error( + PeriodPET(data = exp1, lat_dim = 1, dates = dates_exp, lat = lat) + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2), + "Parameter 'data' must have 'lat_dim' dimension." + ) + # na.rm + expect_error( + PeriodPET(data = exp1, na.rm = 1.5, dates = dates_exp, lat = lat), + "Parameter 'na.rm' must be one logical value." + ) + # ncores + expect_error( + PeriodPET(data = exp1, ncores = 1.5, dates = dates_exp, lat = lat), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## + +test_that("2. Output checks", { + res1 <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + res2 <- PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + pet_method = c('thornthwaite'), + lat_dim = 'lat', time_dim = 'styear', + leadtime_dim = 'ftime') + # dims + expect_equal( + dim(res1), + c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + ) + # values + expect_equal( + res1[1:4], + c(137.77342, 154.55548, 65.72859, 222.20438), + tolerance = 0.0001 + ) + expect_equal( + res2[1:4], + c(77.76124, 118.94212, 66.57568, 185.67074), + tolerance = 0.0001 + ) +}) + +############################################## \ No newline at end of file diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 4e42859..f978111 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -35,13 +35,13 @@ expcor_tasmin <- array(rnorm(60, 15.70, 4.40), dim = dimscor) set.seed(3) expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) -dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 6, time = 3) -dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), - paste0(2020, "-10-16"))) -dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) +dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16")), "UTC") +dim(dates_expcor) <- c(syear = 1, time = 3) lat <- c(40,40.1) @@ -65,7 +65,7 @@ set.seed(2) expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor2) dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) + paste0(2010:2015, "-10-16"))) dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) dates_expcor2 <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), @@ -77,6 +77,20 @@ lat <- c(40,40.1) exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) exp_cor2 <- list('tas' = expcor_tas, 'prlr' = expcor_prlr) +# cube4 +cube4_exp <- lapply(exp1, function(x) { + suppressWarnings( + CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), + varName = 'test', Dates = dates_exp) + ) +}) +cube4_expcor <- lapply(exp_cor1, function(x) { + suppressWarnings( + CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), + varName = 'test', Dates = dates_expcor) + ) +}) + ############################################## test_that("1. Initial checks CST_PeriodSPEI", { @@ -156,19 +170,19 @@ test_that("1. Initial checks PeriodSPEI", { "Parameter 'exp_cor' variables need to have the same dimensions." ) expect_error( - PeriodSPEI(exp = exp1, lat = 'lat'), + PeriodSPEI(exp = exp1, lat = 'lat', dates_exp = dates_exp), "Parameter 'lat' must be numeric." ) expect_error( - PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), - tasmin = array(10, c(time = 10))), lat = 1:2), - "Parameter 'exp' must have 'lat_dim' dimension." + PeriodSPEI(exp = list(prlr = array(10, c(time = 10, syear = 1, ensemble = 1))), + lat = 1:2, dates_exp = dates_exp), + "Parameter 'lat_dim' is not found in 'exp' dimension." ) # exp (2) expect_warning( PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", - "'hargreaves_modified'. It is set to 'hargreaves'.") + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") ) # time_dim expect_error( @@ -211,13 +225,40 @@ test_that("1. Initial checks PeriodSPEI", { expect_error( PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, dates_exp = dates_exp2, dates_expcor = dates_expcor2), - "Parameter 'exp' must have 'lat_dim' dimension." + "Parameter 'time_dim' is not found in 'exp' dimension." ) # accum expect_error( PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat), "Cannot compute accumulation of 10 months because loaded data has only 3 months." ) + # ref_period + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = 1), + paste0("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ) + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = list('a', 1)), + paste0("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ) + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = list(2012, 2011)), + paste0("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ) + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = list(2008, 2021)), + paste0("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ) # standardization expect_error( PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat), @@ -251,12 +292,6 @@ test_that("1. Initial checks PeriodSPEI", { "of the following distributions: 'log-Logistic', 'Gamma' or ", "'PearsonIII'.") ) - # fit - expect_error( - PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat), - paste0("Parameter 'fit' must be a character string containing one of ", - "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") - ) # ncores expect_error( PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat), @@ -266,37 +301,67 @@ test_that("1. Initial checks PeriodSPEI", { ############################################## +test_that("2. Output checks: CST_PeriodSPEI", { + res1 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = NULL) + res2 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = cube4_expcor) + res3 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = cube4_expcor, standardization = F) + res4 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = NULL, return_params = T) + expect_equal( + names(res1), + c("data", "dims", "coords", "attrs") + ) + expect_equal( + res2$attrs$Variable$varName, + "SPEI" + ) + expect_equal( + res3$attrs$Variable$varName, + "Precipitation minus accumulated PET" + ) + expect_equal( + names(res3), + c("data", "dims", "coords", "attrs") + ) + expect_equal( + names(res4), + c("spei", "params") + ) +}) + +############################################## + test_that("2. Output checks", { res1 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor) + dates_exp = dates_exp, dates_expcor = dates_expcor, + return_params = TRUE) res2 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, - standardization = FALSE) + standardization = FALSE) # No info about accumulation res3 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp) + dates_exp = dates_exp, return_params = TRUE) # output dims expect_equal( names(res1), - c('exp', 'exp_cor', 'params') + c('spei', 'params') ) expect_equal( - names(res2), - c('exp', 'exp_cor') + dim(res2), + c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) ) expect_equal( names(res3), - c('exp', 'params') + c('spei', 'params') ) expect_equal( dim(res1[[1]]), - dims + dimscor ) expect_equal( - dim(res1[[3]])[which(!names(dim(res1[[3]])) %in% c('coef', 'syear'))], + dim(res1[[2]])[which(!names(dim(res1[[2]])) %in% c('coef', 'syear'))], dims[which(!names(dims) %in% c('syear', 'ensemble'))] ) expect_equal( - dim(res2[[2]]), + dim(res2), dimscor ) expect_equal( @@ -310,14 +375,21 @@ test_that("2. Output checks", { res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, dates_exp = dates_exp, na.rm = TRUE) expect_equal( - res11$exp[1,2,1,1,][1:4], - c(-0.5553128, 0.4689562, -0.4682003, -0.9956847), + res11[1,3,1,1,][1:4], + c(-0.4292409, -0.1375149, -0.5564081, -0.4273380), tolerance = 0.0001 ) # ref_period + res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, + dates_exp = dates_exp, dates_expcor = dates_expcor, + na.rm = TRUE, ref_period = list(2011, 2013)) + expect_equal( + !identical(res1[[1]], res_ref), + TRUE + ) # params res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = params1) + params = params1, return_params = TRUE) expect_error( PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, params = array(abs(rnorm(100)), dim = dimscor)), @@ -333,10 +405,10 @@ test_that("2. Output checks", { dates_exp = dates_exp, standardization = FALSE) expect_equal( names(res4), - c('exp') + NULL ) expect_equal( - dim(res4$exp), + dim(res4), c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) ) # cross_validation @@ -348,12 +420,12 @@ test_that("2. Output checks", { "since 'exp_cor' is provided.") ) res_crossval_T <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - cross_validation = TRUE) + cross_validation = TRUE, return_params = TRUE) res_crossval_F <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - cross_validation = FALSE) + cross_validation = FALSE, return_params = TRUE) # cross_validation = TRUE expect_equal( - dim(res_crossval_T$exp), + dim(res_crossval_T$spei), dims ) expect_equal( @@ -373,6 +445,14 @@ test_that("2. Output checks", { res6 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, pet_method = c('hargreaves_modified', 'hargreaves')) + expect_equal( + identical(res5, res6), + FALSE + ) + expect_equal( + dim(res5), + dim(res6) + ) # time_dim, leadtime_dim, memb_dim, lat_dim res7 <- PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, @@ -389,15 +469,6 @@ test_that("2. Output checks", { dates_exp = dates_exp, distribution = 'PearsonIII') # NA res10 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp, distribution = 'Gamma') # NA - # fit - - res12 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'ub-pwm') # ok - res13 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'max-lik') # ok - res14 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'pp-pwm') # NA - all.equal(res12, res13) - all.equal(res12, res14) # res14 doesn't work for this data # param_error - # handle_infinity - OK res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, -- GitLab From 98e1c9b4cfd36ea4a7d84ae7e004f86f1ca24f8b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 31 Jul 2023 12:56:52 +0200 Subject: [PATCH 19/42] Add rolling sum in PeriodAccumulation throughout a new parameter rollingwidth --- NAMESPACE | 1 + R/PeriodAccumulation.R | 168 ++++++++++++++++------- man/CST_PeriodAccumulation.Rd | 36 +++-- man/CST_PeriodSPEI.Rd | 84 ++++++++---- man/PeriodAccumulation.Rd | 15 ++ man/PeriodPET.Rd | 82 +++++++++++ man/PeriodSPEI.Rd | 88 ++++++++---- tests/testthat/test-PeriodAccumulation.R | 60 +++++--- 8 files changed, 400 insertions(+), 134 deletions(-) create mode 100644 man/PeriodPET.Rd diff --git a/NAMESPACE b/NAMESPACE index 7cfbe75..c67f13e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(CST_WindPowerDensity) export(MergeRefToExp) export(PeriodAccumulation) export(PeriodMean) +export(PeriodPET) export(PeriodSPEI) export(QThreshold) export(SelectPeriodOnData) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 3b0d33d..b649a1d 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -10,12 +10,19 @@ #' August 21st to October 21st} #'} #' +#'There are two possible ways of performing the accumulation. The default one +#'is by accumulating a variable over a dimension specified with 'time_dim'. To +#'chose a specific time period, start and end must be used. The other method +#'is by using rollingwidth parameter. When this parameter is a positive integer, +#'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +#'is applied towards 'time_dim'. +#' #'@param data An 's2dv_cube' object as provided function \code{CST_Load} in #' package CSTools. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial -#' date of the period and the initial month of the period. By default it is set -#' to NULL and the indicator is computed using all the data provided in +#' date of the period and the initial m onth of the period. By default it is +#' set to NULL and the indicator is computed using all the data provided in #' \code{data}. #'@param end An optional parameter to defined the final date of the period to #' select from the data by providing a list of two elements: the final day of @@ -25,19 +32,27 @@ #' compute the indicator. By default, it is set to 'ftime'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. +#'@param rollingwidth An optional parameter to indicate the number of time +#' steps the rolling sum is applied to. If it is negative, the rolling sum is +#' applied backwards 'time_dim', if it is positive, it will be towards it. When +#' this parameter is NULL, the sum is applied over all 'time_dim', in a +#' specified period. It is NULL by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' #'@return An 's2dv_cube' object containing the indicator in the element -#'\code{data} with dimensions of the input parameter 'data' except the dimension -#'where the accumulation has been computed (specified with 'time_dim'). The -#''Dates' array is updated to the dates corresponding to the beginning of the -#'aggregated time period. A new element called 'time_bounds' will be added into -#'the 'attrs' element in the 's2dv_cube' object. It consists of a list -#'containing two elements, the start and end dates of the aggregated period with -#'the same dimensions of 'Dates' element. +#'\code{data}. If 'rollingwithd' is not used, it will have the dimensions of +#'the input parameter 'data' except the dimension where the accumulation has +#'been computed (specified with 'time_dim'). The 'Dates' array is updated to the +#'dates corresponding to the beginning of the aggregated time period. A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. If 'rollingwithd' is used, it will contain the same dimensions of +#'parameter 'data' and the other elements of the 's2dv_cube' will not be +#'modified. #' #'@examples #'exp <- NULL @@ -66,8 +81,8 @@ #'@importFrom ClimProjDiags Subset #'@export CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, - time_dim = 'ftime', na.rm = FALSE, - ncores = NULL) { + time_dim = 'ftime', rollingwidth = NULL, + na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -83,31 +98,38 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, } Dates <- data$attrs$Dates - total <- PeriodAccumulation(data$data, dates = Dates, start, end, - time_dim = time_dim, na.rm = na.rm, ncores = ncores) - data$data <- total - data$dims <- dim(total) + if (!is.null(rollingwidth)) { + data$data <- PeriodAccumulation(data$data, time_dim = time_dim, + rollingwidth = rollingwidth, na.rm = na.rm, + ncores = ncores) + } else { + total <- PeriodAccumulation(data$data, dates = Dates, start, end, + time_dim = time_dim, na.rm = na.rm, ncores = ncores) + data$data <- total + data$dims <- dim(total) - if (!is.null(Dates)) { - if (!is.null(start) && !is.null(end)) { - Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, - time_dim = time_dim, ncores = ncores) - } - if (is.null(dim(Dates))) { - warning("Element 'Dates' has NULL dimensions. They will not be ", - "subset and 'time_bounds' will be missed.") - data$attrs$Dates <- Dates - } else { - # Create time_bounds - time_bounds <- NULL - time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') - time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') - # Add Dates in attrs - data$attrs$Dates <- time_bounds$start - data$attrs$time_bounds <- time_bounds + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } } } + return(data) } @@ -122,6 +144,13 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' \item\code{HarR}{Harvest Total Precipitation: The total precipitation from #' August 21st to October 21st} #'} +#' +#'There are two possible ways of performing the accumulation. The default one +#'is by accumulating a variable over a dimension specified with 'time_dim'. To +#'chose a specific time period, start and end must be used. The other method +#'is by using rollingwidth parameter. When this parameter is a positive integer, +#'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +#'is applied towards 'time_dim'. #' #'@param data A multidimensional array with named dimensions. #'@param dates A multidimensional array of dates with named dimensions matching @@ -140,6 +169,11 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' compute the indicator. By default, it is set to 'time'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. +#'@param rollingwidth An optional parameter to indicate the number of time +#' steps the rolling sum is applied to. If it is negative, the rolling sum is +#' applied backwards 'time_dim', if it is positive, it will be towards it. When +#' this parameter is NULL, the sum is applied over all 'time_dim', in a +#' specified period. It is NULL by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel @@ -169,8 +203,8 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'@import multiApply #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', na.rm = FALSE, - ncores = NULL) { + time_dim = 'time', rollingwidth = NULL, + na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } @@ -181,27 +215,59 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, dim(data) <- length(data) names(dim(data)) <- time_dim } + dimnames <- names(dim(data)) - if (!is.null(start) && !is.null(end)) { - if (is.null(dates)) { - warning("Parameter 'dates' is NULL and the average of the ", - "full data provided in 'data' is computed.") - } else { - if (!any(c(is.list(start), is.list(end)))) { - stop("Parameter 'start' and 'end' must be lists indicating the ", - "day and the month of the period start and end.") - } - if (!is.null(dim(dates))) { - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + # period accumulation + if (is.null(rollingwidth)) { + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") } else { - warning("Parameter 'dates' must have named dimensions if 'start' and ", - "'end' are not NULL. All data will be used.") + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } + total <- Apply(list(data), target_dims = time_dim, fun = sum, + na.rm = na.rm, ncores = ncores)$output1 + } else { + # rolling accumulation + if (!is.numeric(rollingwidth)) { + stop("Parameter 'rollingwidth' must be a numeric value.") + } + if (abs(rollingwidth) > dim(data)[time_dim]) { + stop(paste0("Cannot compute accumulation of ", rollingwidth, " months because ", + "loaded data has only ", dim(data)[time_dim], " months.")) + } + backroll <- FALSE + if (rollingwidth < 0) { + rollingwidth <- abs(rollingwidth) + backroll <- TRUE + } + total <- Apply(data = list(data), target_dims = time_dim, + output_dims = time_dim, + fun = function(x, accum, backroll, na.rm) { + res <- rollapply(data = x, width = accum, FUN = sum, + na.rm = na.rm) + if (backroll) { + return(c(rep(NA, accum-1), res)) + } else { + return(c(res, rep(NA, accum-1))) + } + }, accum = rollingwidth, backroll = backroll, na.rm = na.rm, + ncores = ncores)$output1 + pos <- match(dimnames, names(dim(total))) + total <- aperm(total, pos) } - total <- Apply(list(data), target_dims = time_dim, fun = sum, - na.rm = na.rm, ncores = ncores)$output1 + return(total) } - diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 77f4a38..4326ea2 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -9,6 +9,7 @@ CST_PeriodAccumulation( start = NULL, end = NULL, time_dim = "ftime", + rollingwidth = NULL, na.rm = FALSE, ncores = NULL ) @@ -19,8 +20,8 @@ package CSTools.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial -date of the period and the initial month of the period. By default it is set -to NULL and the indicator is computed using all the data provided in +date of the period and the initial m onth of the period. By default it is +set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{end}{An optional parameter to defined the final date of the period to @@ -33,6 +34,12 @@ compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} +\item{rollingwidth}{An optional parameter to indicate the number of time +steps the rolling sum is applied to. If it is negative, the rolling sum is +applied backwards 'time_dim', if it is positive, it will be towards it. When +this parameter is NULL, the sum is applied over all 'time_dim', in a +specified period. It is NULL by default.} + \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -41,13 +48,16 @@ computation.} } \value{ An 's2dv_cube' object containing the indicator in the element -\code{data} with dimensions of the input parameter 'data' except the dimension -where the accumulation has been computed (specified with 'time_dim'). The -'Dates' array is updated to the dates corresponding to the beginning of the -aggregated time period. A new element called 'time_bounds' will be added into -the 'attrs' element in the 's2dv_cube' object. It consists of a list -containing two elements, the start and end dates of the aggregated period with -the same dimensions of 'Dates' element. +\code{data}. If 'rollingwithd' is not used, it will have the dimensions of +the input parameter 'data' except the dimension where the accumulation has +been computed (specified with 'time_dim'). The 'Dates' array is updated to the +dates corresponding to the beginning of the aggregated time period. A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. If 'rollingwithd' is used, it will contain the same dimensions of +parameter 'data' and the other elements of the 's2dv_cube' will not be +modified. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a @@ -60,6 +70,14 @@ by using this function: August 21st to October 21st} } } +\details{ +There are two possible ways of performing the accumulation. The default one +is by accumulating a variable over a dimension specified with 'time_dim'. To +chose a specific time period, start and end must be used. The other method +is by using rollingwidth parameter. When this parameter is a positive integer, +the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +is applied towards 'time_dim'. +} \examples{ exp <- NULL exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 8e8ab19..eaa139e 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -21,9 +21,9 @@ CST_PeriodSPEI( pet_method = "hargreaves", method = "parametric", distribution = "log-Logistic", - fit = "ub-pwm", param_error = -9999, handle_infinity = FALSE, + return_params = FALSE, na.rm = FALSE, ncores = NULL ) @@ -57,7 +57,9 @@ computed for individual members.} dimension. By default it is set by 'latitude'.} \item{accum}{An integer value indicating the number of months for the -accumulation for each variable.} +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default @@ -102,12 +104,9 @@ default.} \item{distribution}{A character string indicating the name of the distribution function to be used for computing the SPEI. The accepted names are: -'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -default.} - -\item{fit}{A character string indicating the name of the method used for -computing the distribution function parameters The accepteed names are: -'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} \item{param_error}{A numeric value with the error accepted.} @@ -115,37 +114,66 @@ computing the distribution function parameters The accepteed names are: or not (FALSE).} \item{na.rm}{A logical value indicating whether NA values should be removed -from data. It is FALSE by default.} +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } \value{ -A list with elements: -\itemize{ - \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' - in element data from 'exp' array with the same dimensions as 'exp'. - If it is FALSE, it is an array with the accumulated values of PET - minus 'prlr' data.} - \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not - NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in - element 'data'. If 'standarization' is FALSE, only the accumulated - values of PET minus 'prlr' is returned.} - \item{'params', returned if 'standarization' is TRUE, it contains the - parameters used for the standarization of 'exp' that are used for - computing the 'SPEI' for 'exp_cor'.} -} +An 's2dv_cube' object containing the SPEI multidimensional array in +element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +will be provided. The parameters of the standardization will only be returned +if 'return_params' is TRUE. The SPEI will only be computed if +'standardization' is TRUE. If 'standardization' is FALSE, only the climatic +water balance (precipitation minus evapotranspiration) will be returned. The +resultant arrays will have the same dimensions as the initial input data. The +other elements in the 's2dv_cube' will be updated with the combined +information of the input data arrays. } \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for determining the onset, duration and magnitude of drought conditions with respect to normal conditions in a variety of natural and managed systems such -as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -using monthly (or weekly) precipitation as the input data. The SPEI uses the -monthly (or weekly) difference between precipitation and pet. This represents -a simple climatic water balance which is calculated at different time scales -to obtain the SPEI. +as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +is to compare the highest possible evapotranspiration with the current water +availability. The SPEI uses the monthly (or weekly) difference between +precipitation and potential evapotranspiration. This represents a simple +climatic water balance which is calculated at different time scales to obtain +the SPEI. This function is build to work be compatible with other tools in +that work with 's2dv_cube' object class. The input data must be this object +class. If you don't work with 's2dv_cube', see PeriodSPEI. +} +\details{ +Next, some specifications for the calculation of this indicator will be +discussed. On the one hand, the model to be used to calculate potential +evapotranspiration is specified with the pet_method parameter (hargreaves, +hargraves modified or thornwhite). On the other hand, to choose the time scale +in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +accum parameter, where you must indicate the number of time steps you want to +accumulate throughout leadtime_dim. Since the accumulation is done for the +elapsed time steps, there will be no complete accumulations until reaching the +time instant equal to the value of the parameter. For this reason, in the +result, we will find that for the dimension where the accumulation has been +carried out, the values of the array will be NA since they do not include +complete accumulations. Also, there is a parameter to specify if the +standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +probability distribution to transform the original values to standardized +units that are comparable in space and time and at different SPEI time scales. +The na.rm parameter is a logical parameter used to decide whether to remove +the NA values from the data before doing the calculation. It must be taken +into account that if na.rm == FALSE and there is some NA value in the specific +coordinates which the SPEI is computed, standardization cannot be carried out +for those coordinates and therefore, the result will be filled with NA for the +specific coordinates. However, when na.rm == TRUE, if the amount of data for +those specific coordinates is smaller than 4, it will not be possible to carry +out because we will not have enough data and the result will be also filled +with NAs for that coordinates. } \examples{ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 614b65c..a3dbf46 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -10,6 +10,7 @@ PeriodAccumulation( start = NULL, end = NULL, time_dim = "time", + rollingwidth = NULL, na.rm = FALSE, ncores = NULL ) @@ -37,6 +38,12 @@ compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} +\item{rollingwidth}{An optional parameter to indicate the number of time +steps the rolling sum is applied to. If it is negative, the rolling sum is +applied backwards 'time_dim', if it is positive, it will be towards it. When +this parameter is NULL, the sum is applied over all 'time_dim', in a +specified period. It is NULL by default.} + \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -58,6 +65,14 @@ by using this function: August 21st to October 21st} } } +\details{ +There are two possible ways of performing the accumulation. The default one +is by accumulating a variable over a dimension specified with 'time_dim'. To +chose a specific time period, start and end must be used. The other method +is by using rollingwidth parameter. When this parameter is a positive integer, +the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +is applied towards 'time_dim'. +} \examples{ exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, ftime = 9, lat = 2, lon = 2)) diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd new file mode 100644 index 0000000..719d2fa --- /dev/null +++ b/man/PeriodPET.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodPET.R +\name{PeriodPET} +\alias{PeriodPET} +\title{Compute the Potential Evapotranspiration} +\usage{ +PeriodPET( + data, + dates, + lat, + pet_method = "hargreaves", + time_dim = "syear", + leadtime_dim = "time", + lat_dim = "latitude", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +Variable 'prlr' is always needed. The units for temperature variables +('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +precipitation ('prlr') need to be in mm/month.} + +\item{dates}{An array of temporal dimensions containing the Dates of +'exp'. It must be of class 'Date' or 'POSIXct'.} + +\item{lat}{A numeric vector containing the latitude values of 'exp'.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tasmin and tasmax; and 'thornthwaite', that requires variable +'tas'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Compute the Potential evapotranspiration (PET) that is the amount of +evaporation and transpiration that would occur if a sufficient water source +were available. Reference evapotranspiration (ETo) is the amount of +evaporation and transpiration from a reference vegetation of grass. They are +usually considered equivalent. This set of functions calculate PET or ETo +according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +} +\examples{ +dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) + +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +end_year <- 2012 +dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), + paste0(2010:end_year, "-09-15"), + paste0(2010:end_year, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 3, time = 3) + +lat <- c(40) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) + +res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + +} diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index 86aaa02..fc6c527 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -24,15 +24,15 @@ PeriodSPEI( pet_method = "hargreaves", method = "parametric", distribution = "log-Logistic", - fit = "ub-pwm", param_error = -9999, handle_infinity = FALSE, + return_params = FALSE, na.rm = FALSE, ncores = NULL ) } \arguments{ -\item{exp}{A named list with the needed \code{s2dv_cube} objects containing +\item{exp}{A named list with multidimensional array objects containing the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted @@ -47,7 +47,7 @@ precipitation ('prlr') need to be in mm/month.} \item{lat}{A numeric vector containing the latitude values of 'exp'.} -\item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each +\item{exp_cor}{A named list with multidimensional array objects for each variable in which the quantile PeriodSPEI should be applied. If it is not specified, the PeriodSPEI is calculated from object 'exp'.} @@ -67,8 +67,10 @@ computed for individual members.} \item{lat_dim}{A character string indicating the name of the latitudinal dimension. By default it is set by 'latitude'.} -\item{accum}{An integer value indicating the number of months for the -accumulation for each variable.} +\item{accum}{accum An integer value indicating the number of months for the +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default @@ -111,49 +113,75 @@ If can be: 'parametric' or 'non-parametric'.} \item{distribution}{A character string indicating the name of the distribution function to be used for computing the SPEI. The accepted names are: -'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -default.} - -\item{fit}{A character string indicating the name of the method used for -computing the distribution function parameters The accepteed names are: -'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} \item{param_error}{A numeric value with the error accepted.} \item{handle_infinity}{A logical value wether to return Infinite values (TRUE) or not (FALSE).} +\item{return_params}{A logical value indicating wether to return parameters +array (TRUE) or not (FALSE). It is FALSE by default.} + \item{na.rm}{A logical value indicating whether NA values should be removed -from data. It is FALSE by default.} +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } \value{ -A list with elements: -\itemize{ - \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from - 'exp' array with the same dimensions as 'exp'. If it is FALSE, it - is an array with the accumulated values of PET minus 'prlr' data.} - \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not - NULL. It is an array with the SPEI data from 'exp_cor'. If - 'standarization' is FALSE, only the accumulated values of PET minus - 'prlr' is returned.} - \item{'params', returned if 'standarization' is TRUE, it contains the - parameters used for the standarization of 'exp' that are used for - computing the 'SPEI' for 'exp_cor'.} -} +An 's2dv_cube' object containing the SPEI multidimensional array in +element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +will be provided. The parameters of the standardization will only be returned +if 'return_params' is TRUE. The SPEI will only be computed if +'standardization' is TRUE. If 'standardization' is FALSE, only the climatic +water balance (precipitation minus evapotranspiration) will be returned. The +resultant arrays will have the same dimensions as the initial input data. } \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for determining the onset, duration and magnitude of drought conditions with respect to normal conditions in a variety of natural and managed systems such -as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -using monthly (or weekly) precipitation as the input data. The SPEI uses the -monthly (or weekly) difference between precipitation and pet. This represents -a simple climatic water balance which is calculated at different time scales -to obtain the SPEI. +as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +is to compare the highest possible evapotranspiration with the current water +availability. The SPEI uses the monthly (or weekly) difference between +precipitation and potential evapotranspiration. This represents a simple +climatic water balance which is calculated at different time scales to obtain +the SPEI. +} +\details{ +Next, some specifications for the calculation of this indicator will be +discussed. On the one hand, the model to be used to calculate potential +evapotranspiration is specified with the pet_method parameter (hargreaves, +hargraves modified or thornwhite). On the other hand, to choose the time scale +in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +accum parameter, where you must indicate the number of time steps you want to +accumulate throughout leadtime_dim. Since the accumulation is done for the +elapsed time steps, there will be no complete accumulations until reaching the +time instant equal to the value of the parameter. For this reason, in the +result, we will find that for the dimension where the accumulation has been +carried out, the values of the array will be NA since they do not include +complete accumulations. Also, there is a parameter to specify if the +standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +probability distribution to transform the original values to standardized +units that are comparable in space and time and at different SPEI time scales. +The na.rm parameter is a logical parameter used to decide whether to remove +the NA values from the data before doing the calculation. It must be taken +into account that if na.rm == FALSE and there is some NA value in the specific +coordinates which the SPEI is computed, standardization cannot be carried out +for those coordinates and therefore, the result will be filled with NA for the +specific coordinates. However, when na.rm == TRUE, if the amount of data for +those specific coordinates is smaller than 4, it will not be possible to carry +out because we will not have enough data and the result will be also filled +with NAs for that coordinates. } \examples{ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 6898a93..64bf3d2 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -1,5 +1,22 @@ library(CSTools) +# dat1 +dat1 <- array(1:6, dim = c(sdate = 2, time = 3, member = 1)) + +# exp1 +exp <- NULL +exp$data <- array(1:(1 * 3 * 214 * 2), + c(memb = 1, sdate = 3, ftime = 214, lon = 2)) +exp$dims <- dim(exp$data) +exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2002", format = "%d-%m-%Y"), + as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) +dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) +class(exp) <- 's2dv_cube' + ############################################## test_that("1. Sanity Checks", { expect_error( @@ -27,14 +44,14 @@ test_that("1. Sanity Checks", { PeriodAccumulation(1:10), 55 ) - data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) + data <- array(1:24, c(sdate = 2, time = 3, lon = 4)) expect_equal( PeriodAccumulation(data), array(c(9, 12, 27, 30, 45, 48, 63, 66), c(sdate = 2, lon = 4)) ) # Test dates warning expect_warning( - PeriodAccumulation(array(1:10, c(ftime = 10)), + PeriodAccumulation(array(1:10, c(time = 10)), dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), start = list(05, 02), end = list(05, 09)), @@ -43,7 +60,7 @@ test_that("1. Sanity Checks", { ) # start and end when dates is not provided expect_warning( - PeriodAccumulation(array(1:10, c(ftime = 10)), + PeriodAccumulation(array(1:10, c(time = 10)), start = list(05, 02), end = list(05, 09)), paste0("Parameter 'dates' is NULL and the average of the full data ", "provided in 'data' is computed.") @@ -53,18 +70,6 @@ test_that("1. Sanity Checks", { ############################################## test_that("2. Seasonal", { - exp <- NULL - exp$data <- array(1:(1 * 3 * 214 * 2), - c(memb = 1, sdate = 3, ftime = 214, lon = 2)) - exp$dims <- dim(exp$data) - exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), - as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2001", format = "%d-%m-%Y"), - as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2002", format = "%d-%m-%Y"), - as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) - dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) - class(exp) <- 's2dv_cube' output <- exp output$data <- array(c(sum(exp$data[1,1,21:82,1]), sum(exp$data[1,2,21:82,1]), sum(exp$data[1,3,21:82,1]), sum(exp$data[1,1,21:82,2]), @@ -132,4 +137,27 @@ test_that("3. Subset Dates and check time_bounds", { all(lubridate::day(res$attrs$time_bounds$end) == 20), TRUE ) -}) \ No newline at end of file +}) + +############################################## + +test_that("4. Rolling", { + expect_error( + PeriodAccumulation(data = dat1, rollingwidth = 'a'), + "Parameter 'rollingwidth' must be a numeric value." + ) + expect_error( + PeriodAccumulation(data = dat1, rollingwidth = 5), + "Cannot compute accumulation of 5 months because loaded data has only 3 months." + ) + expect_equal( + PeriodAccumulation(data = dat1, rollingwidth = 2), + array(c(4,6,8, 10, NA, NA), dim = c(sdate = 2, time = 3, member = 1)) + ) + expect_equal( + PeriodAccumulation(data = dat1, rollingwidth = -3), + array(c(rep(NA, 4), 9, 12), dim = c(sdate = 2, time = 3, member = 1)) + ) + dat1[1,1,1] <- NA + PeriodAccumulation(data = dat1, rollingwidth = 2, na.rm = FALSE) +}) -- GitLab From c0919b19be5f79d9c9e2b760bc979d3e10bf3661 Mon Sep 17 00:00:00 2001 From: EVA RIFA ROVIRA Date: Wed, 2 Aug 2023 17:44:55 +0200 Subject: [PATCH 20/42] Add new function PeriodStandardization; change variable names in PeriodPET and improve PeriodAccumulation --- R/PeriodAccumulation.R | 15 ++-- R/PeriodPET.R | 44 ++++++------ R/PeriodStandardization.R | 147 ++++++++++++++++++++++++++++++++++++++ man/PeriodAccumulation.Rd | 1 + man/PeriodPET.Rd | 16 ++--- 5 files changed, 188 insertions(+), 35 deletions(-) create mode 100644 R/PeriodStandardization.R diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index b649a1d..553b1a5 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -201,10 +201,12 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' end = list(21, 10)) #' #'@import multiApply +#'@import zoo #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, time_dim = 'time', rollingwidth = NULL, - na.rm = FALSE, ncores = NULL) { + rolling_add_na = FALSE, na.rm = FALSE, + ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } @@ -258,11 +260,14 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, fun = function(x, accum, backroll, na.rm) { res <- rollapply(data = x, width = accum, FUN = sum, na.rm = na.rm) - if (backroll) { - return(c(rep(NA, accum-1), res)) - } else { - return(c(res, rep(NA, accum-1))) + if (rolling_add_na) { + if (backroll) { + res <- c(rep(NA, accum-1), res) + } else { + res <- c(res, rep(NA, accum-1)) + } } + return(res) }, accum = rollingwidth, backroll = backroll, na.rm = na.rm, ncores = ncores)$output1 pos <- match(dimnames, names(dim(total))) diff --git a/R/PeriodPET.R b/R/PeriodPET.R index fa4079c..67b6afd 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -11,19 +11,19 @@ #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -#' Variable 'prlr' is always needed. The units for temperature variables -#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -#' precipitation ('prlr') need to be in mm/month. +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. #'@param dates An array of temporal dimensions containing the Dates of #' 'exp'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'exp'. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tasmin and tasmax; and 'thornthwaite', that requires variable -#' 'tas'. +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -49,7 +49,7 @@ #' #'lat <- c(40) #' -#'exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) #' #'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) #' @@ -92,20 +92,20 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', } # data (2) - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(data))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (all(c('tmin', 'tmax', 'pr') %in% names(data))) { + # hargreaves modified: 'tmin', 'tmax', 'pr' and 'lat' if (!(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { warning("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves_modified'.") pet_method <- 'hargreaves_modified' } - } else if (all(c('tasmin', 'tasmax') %in% names(data))) { + } else if (all(c('tmin', 'tmax') %in% names(data))) { if (!(pet_method %in% c('hargreaves'))) { warning("Parameter 'pet_method' will be set as 'hargreaves'.") pet_method <- 'hargreaves' } - } else if (c('tas') %in% names(data)) { - # thornthwaite: 'tas' (mean), 'lat' + } else if (c('tmean') %in% names(data)) { + # thornthwaite: 'tmean' (mean), 'lat' if (!(pet_method == 'thornthwaite')) { warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") pet_method <- 'thornthwaite' @@ -173,17 +173,17 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', # extract mask of NA locations to return to NA the final result mask_na <- array(1, dim = dim(data[[1]])) if (pet_method == 'hargreaves') { - varnames <- c('tasmax', 'tasmin') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 + varnames <- c('tmax', 'tmin') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 } else if (pet_method == 'hargreaves_modified') { - varnames <- c('tasmax', 'tasmin', 'prlr') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 - mask_na[which(is.na(data$prlr))] <- 0 + varnames <- c('tmax', 'tmin', 'pr') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 + mask_na[which(is.na(data$pr))] <- 0 } else if (pet_method == 'thornthwaite') { - varnames <- c('tas') - mask_na[which(is.na(data$tas))] <- 0 + varnames <- c('tmean') + mask_na[which(is.na(data$tmean))] <- 0 } # replace NA with 0 diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R new file mode 100644 index 0000000..b4f21e4 --- /dev/null +++ b/R/PeriodStandardization.R @@ -0,0 +1,147 @@ + +# WIP + +PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, cross_validation = FALSE, + handle_infinity = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + na.rm = FALSE, ncores = NULL) { + # Initial checks + target_dims <- c(leadtime_dim, time_dim, memb_dim) + + if (is.null(ref_period)) { + ref.start <- NULL + ref.end <- NULL + } else { + ref.start <- ref_period[[1]] + ref.end <- ref_period[[2]] + } + + + # Standardization + if (is.null(data_cor)) { + spei <- Apply(data = list(data), + target_dims = target_dims, + fun = .standardization, + leadtime_dim = leadtime_dim, + time_dim = time_dim, memb_dim = memb_dim, + ref_period = ref_period, handle_infinity = handle_infinity, + cross_validation = cross_validation, param_error = param_error, + method = method, distribution = distribution, + na.rm = na.rm, ncores = ncores)$output1 + } else { + spei <- Apply(data = list(data, data_cor), target_dims = target_dims, + fun = .standardization, + leadtime_dim = leadtime_dim, + time_dim = time_dim, memb_dim = memb_dim, + ref_period = ref_period, handle_infinity = handle_infinity, + cross_validation = cross_validation, param_error = param_error, + method = method, distribution = distribution, + na.rm = na.rm, ncores = ncores)$output1 + } + return(spei) +} + +# data <- array(rnorm(10), c(time = 3, syear = 6, ensemble = 25)) + +# res <- .standardization(data = data) + +# data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, +# indices = ff, drop = 'selected') + +.standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', + time_dim = 'syear', memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + cross_validation = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + na.rm = FALSE) { + # data: [leadtime_dim, time_dim, memb_dim] + + # maximum number of parameters needed + nleadtime <- as.numeric(dim(data)[leadtime_dim]) + ntime <- as.numeric(dim(data)[time_dim]) + nmemb <- as.numeric(dim(data)[memb_dim]) + fit = 'ub-pwm' + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + + if (is.null(data_cor)) { + # cross_val = TRUE + spei_mod <- data*NA + print(dim(spei_mod)) + for (ff in 1:dim(data)[leadtime_dim]) { + data2 <- data[ff, , ] + params_result <- array(dim = c(dim(data)[time_dim], length(coef))) + print(dim(data2)) + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data2, ref.start, ref.end) + } else { + data.fit <- data2 + } + for (nsd in 1:dim(data)[time_dim]) { + acu <- as.vector(data.fit[-nsd, ]) + acu.sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu.sorted <- acu.sorted[!is.na(acu.sorted)] + } + if (!any(is.na(acu.sorted))) { + print('Inside acu.sorted') + if (length(acu.sorted) != 0) { + acu_sd <- sd(acu.sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + pze <- sum(acu == 0) / length(acu) + acu.sorted <- acu.sorted[acu.sorted > 0] + } + if (length(acu.sorted) >= 4) { + print('acu.sorted') + print(acu.sorted) + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), + pwm.ub(acu.sorted, nmom = 3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){parglo(lmom)$para}), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){pargam(lmom)$para}), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + params_result[nsd, ] <- f_params + } + } + if (all(is.na(params_result[nsd,]))) { + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data, f_params), + "Gamma" = lmom::cdfgam(data, f_params), + "PearsonIII" = lmom::cdfpe3(data, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) + spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] + } + } + } + } + } + } else { + # cross_val = FALSE + + } + return(spei_mod) +} + diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index a3dbf46..c07afea 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -11,6 +11,7 @@ PeriodAccumulation( end = NULL, time_dim = "time", rollingwidth = NULL, + rolling_add_na = FALSE, na.rm = FALSE, ncores = NULL ) diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd index 719d2fa..69aad45 100644 --- a/man/PeriodPET.Rd +++ b/man/PeriodPET.Rd @@ -21,11 +21,11 @@ PeriodPET( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -Variable 'prlr' is always needed. The units for temperature variables -('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -precipitation ('prlr') need to be in mm/month.} +variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +Variable 'pr' is always needed. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} \item{dates}{An array of temporal dimensions containing the Dates of 'exp'. It must be of class 'Date' or 'POSIXct'.} @@ -35,8 +35,8 @@ precipitation ('prlr') need to be in mm/month.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have -variables tasmin and tasmax; and 'thornthwaite', that requires variable -'tas'.} +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'syear'.} @@ -75,7 +75,7 @@ dim(dates_exp) <- c(syear = 3, time = 3) lat <- c(40) -exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) -- GitLab From b0f124fe7f9babddc312662fb0808472633d9c75 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 3 Aug 2023 18:31:48 +0200 Subject: [PATCH 21/42] Develop PeriodStandardization for exp_cor: reorganized code --- R/PeriodStandardization.R | 110 +++++++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 20 deletions(-) diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index b4f21e4..88ab915 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -1,7 +1,7 @@ # WIP -PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', +PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', ref_period = NULL, cross_validation = FALSE, handle_infinity = FALSE, param_error = -9999, @@ -40,22 +40,33 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } + + # add NA + if (!is.null(accum)) { + spei <- Apply(data = list(spei), target_dims = leadtime_dim, + output_dims = leadtime_dim, + fun = function(x, accum, leadtime_dim) { + res <- c(rep(NA, accum-1), x) + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 + } + if (is.null(data_cor)) { + pos <- match(names(dim(data)), names(dim(spei))) + spei <- aperm(spei, pos) + } else { + pos <- match(names(dim(data_cor)), names(dim(spei))) + spei <- aperm(spei, pos) + } return(spei) } -# data <- array(rnorm(10), c(time = 3, syear = 6, ensemble = 25)) - -# res <- .standardization(data = data) - -# data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, -# indices = ff, drop = 'selected') - .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', time_dim = 'syear', memb_dim = 'ensemble', ref_period = NULL, handle_infinity = FALSE, cross_validation = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { + print(summary(data)) # data: [leadtime_dim, time_dim, memb_dim] # maximum number of parameters needed @@ -72,11 +83,10 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', if (is.null(data_cor)) { # cross_val = TRUE spei_mod <- data*NA - print(dim(spei_mod)) + params_result <- array(data = NA, dim = c(ntime, nleadtime, length(coef))) for (ff in 1:dim(data)[leadtime_dim]) { data2 <- data[ff, , ] - params_result <- array(dim = c(dim(data)[time_dim], length(coef))) - print(dim(data2)) + dim(data2) <- c(ntime, nmemb) if (!is.null(ref.start) && !is.null(ref.end)) { data.fit <- window(data2, ref.start, ref.end) } else { @@ -89,7 +99,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', acu.sorted <- acu.sorted[!is.na(acu.sorted)] } if (!any(is.na(acu.sorted))) { - print('Inside acu.sorted') if (length(acu.sorted) != 0) { acu_sd <- sd(acu.sorted) if (!is.na(acu_sd) & acu_sd != 0) { @@ -98,8 +107,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', acu.sorted <- acu.sorted[acu.sorted > 0] } if (length(acu.sorted) >= 4) { - print('acu.sorted') - print(acu.sorted) pwm = switch(fit, "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), pwm.ub(acu.sorted, nmom = 3) @@ -118,18 +125,18 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', if (distribution == 'log-Logistic' && fit == 'max-lik') { f_params = parglo.maxlik(acu.sorted, f_params)$para } - params_result[nsd, ] <- f_params + params_result[nsd, ff, ] <- f_params } } - if (all(is.na(params_result[nsd,]))) { + if (all(is.na(params_result[nsd, ff, ]))) { cdf_res <- NA } else { - f_params <- params_result[nsd,] + f_params <- params_result[nsd, ff, ] f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data, f_params), - "Gamma" = lmom::cdfgam(data, f_params), - "PearsonIII" = lmom::cdfpe3(data, f_params)) + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] @@ -140,7 +147,70 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } } else { # cross_val = FALSE + spei_mod <- data_cor*NA + params_result <- array(data = NA, dim = c(1, length(coef))) + for (ff in 1:dim(data)[leadtime_dim]) { + data_cor2 <- data_cor[ff, , ] + dim(data_cor2) <- c(1, nmemb) + data2 <- data[ff, , ] + dim(data2) <- c(ntime, nmemb) + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data2, ref.start, ref.end) + } else { + data.fit <- data2 + } + acu <- as.vector(data.fit) + acu.sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu.sorted <- acu.sorted[!is.na(acu.sorted)] + } + if (!any(is.na(acu.sorted))) { + if (length(acu.sorted) != 0) { + acu_sd <- sd(acu.sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + pze <- sum(acu == 0) / length(acu) + acu.sorted <- acu.sorted[acu.sorted > 0] + } + if (length(acu.sorted) >= 4) { + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), + pwm.ub(acu.sorted, nmom = 3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){parglo(lmom)$para}), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){pargam(lmom)$para}), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + params_result[1, ] <- f_params + } + } + if (all(is.na(params_result[1, ]))) { + cdf_res <- NA + } else { + f_params <- params_result[1, ] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(1, nmemb)) + spei_mod[ff, , ] <- std_index_cv + } + } + } + } } return(spei_mod) } -- GitLab From 9b78395ef69a7f58a17dd6f0f3afb1a994745621 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 7 Aug 2023 09:24:00 +0200 Subject: [PATCH 22/42] Change variable names in PeriodSPEI --- R/PeriodSPEI.R | 89 +++++++++++++++++++++++-------------------- man/CST_PeriodSPEI.Rd | 24 ++++++------ man/PeriodSPEI.Rd | 24 ++++++------ 3 files changed, 72 insertions(+), 65 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 9bc1660..5e78d7d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -42,11 +42,11 @@ #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -#' Variable 'prlr' is always needed. The units for temperature variables -#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -#' precipitation ('prlr') need to be in mm/month. +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. @@ -77,11 +77,11 @@ #' have member dimension (specified in 'memb_dim'). #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the -#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided #' variables with specified 'pet_method'. It is NULL by default. #'@param pet_expcor A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the #' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. @@ -91,8 +91,8 @@ #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tasmin and tasmax; and 'thornthwaite', that requires variable -#' 'tas'. +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. @@ -147,9 +147,9 @@ #'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) #'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' -#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, -#' 'prlr' = expcor_prlr) +#'exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#'exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, +#' 'pr' = expcor_prlr) #' #'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), #' Dates = dates_exp) @@ -323,11 +323,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -#' Variable 'prlr' is always needed. The units for temperature variables -#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -#' precipitation ('prlr') need to be in mm/month. +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. #'@param dates_exp An array of temporal dimensions containing the Dates of #' 'exp'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'exp'. @@ -362,11 +362,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' leadtime dimension (specified in 'leadtime_dim') of length 1. #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the -#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided #' variables with specified 'pet_method'. It is NULL by default. #'@param pet_expcor A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the #' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. @@ -376,8 +376,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tasmin and tasmax; and 'thornthwaite', that requires variable -#' 'tas'. +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. #'@param distribution A character string indicating the name of the distribution @@ -431,9 +431,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' -#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, -#' 'prlr' = expcor_prlr) +#'exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#'exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, +#' 'pr' = expcor_prlr) #'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, #' dates_exp = dates_exp, dates_expcor = dates_expcor) #' @@ -498,13 +498,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, # Variable checks ## exp (2) pet <- vector("list", 2) - if (!('prlr' %in% names(exp))) { - stop("Variable 'prlr' is not included in 'exp'.") + if (!('pr' %in% names(exp))) { + stop("Variable 'pr' is not included in 'exp'.") } ## exp_cor (2) if (!is.null(exp_cor)) { - if (!('prlr' %in% names(exp_cor))) { - stop("Variable 'prlr' is not included in 'exp_cor'.") + if (!('pr' %in% names(exp_cor))) { + stop("Variable 'pr' is not included in 'exp_cor'.") } if (length(pet_method) == 1) { pet_method <- rep(pet_method, 2) @@ -513,16 +513,16 @@ PeriodSPEI <- function(exp, dates_exp, lat, ## pet_exp if (!is.null(pet_exp)) { - if (length(dim(exp[['prlr']])) != length(dim(pet_exp))) { + if (length(dim(exp[['pr']])) != length(dim(pet_exp))) { stop("Parameter 'pet_exp' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp'.") + "dimensions as variable 'pr' in 'exp'.") } - if (!all(dim(exp[['prlr']]) %in% dim(pet_exp))) { + if (!all(dim(exp[['pr']]) %in% dim(pet_exp))) { stop("Parameter 'pet_exp' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp'.") + "dimensions as variable 'pr' in 'exp'.") } - if (any(names(dim(exp[['prlr']])) != names(dim(pet_exp)))) { - pos <- match(names(dim(exp[['prlr']])), names(dim(pet_exp))) + if (any(names(dim(exp[['pr']])) != names(dim(pet_exp)))) { + pos <- match(names(dim(exp[['pr']])), names(dim(pet_exp))) pet_exp <- aperm(pet_exp, pos) } pet[[1]] <- pet_exp @@ -532,16 +532,16 @@ PeriodSPEI <- function(exp, dates_exp, lat, ## pet_expcor if (!is.null(exp_cor)) { if (!is.null(pet_expcor)) { - if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { + if (length(dim(exp_cor[['pr']])) != length(dim(pet_expcor))) { stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") + "dimensions as variable 'pr' in 'exp_cor'.") } - if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { + if (!all(dim(exp_cor[['pr']]) %in% dim(pet_expcor))) { stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") + "dimensions as variable 'pr' in 'exp_cor'.") } - if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { - pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) + if (any(names(dim(exp_cor[['pr']])) != names(dim(pet_expcor)))) { + pos <- match(names(dim(exp_cor[['pr']])), names(dim(pet_expcor))) pet_expcor <- aperm(pet_expcor, pos) } pet[[2]] <- pet_expcor @@ -767,11 +767,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) + print("Step (1): PET") + print(summary(pet[[k]])) computed_pet <- TRUE } # Accumulation - diff_p_pet <- data$prlr - pet[[k]] + diff_p_pet <- data$pr - pet[[k]] accumulated <- Apply(data = list(diff_p_pet), target_dims = list(data = c(leadtime_dim, time_dim)), @@ -779,6 +781,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, fun = function(data, accum) { return(rollapply(data = data, width = accum, FUN = sum)) }, accum = accum, ncores = ncores)$output1 + print("Step (2): Accumulation") + print(summary(accumulated)) # Standardization if (standardization) { @@ -791,6 +795,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores) + ref_period <- NULL params <- spei$params spei_res <- spei[[1]] @@ -806,6 +811,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, }, accum = accum, leadtime_dim = leadtime_dim)$output1 pos <- match(names(dim(data[[1]])), names(dim(spei_res))) spei_res <- aperm(spei_res, pos) + print("Step (3): Standardization") + print(summary(spei_res)) } if (standardization) { diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index eaa139e..19bf92a 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -33,11 +33,11 @@ CST_PeriodSPEI( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -Variable 'prlr' is always needed. The units for temperature variables -('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -precipitation ('prlr') need to be in mm/month.} +variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +Variable 'pr' is always needed. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} \item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each variable in which the quantile PeriodSPEI should be applied. If it is not @@ -77,12 +77,12 @@ have member dimension (specified in 'memb_dim').} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the -variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variable 'pr' of 'exp'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{pet_expcor}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{standardization}{A logical value indicating wether the standardization @@ -95,8 +95,8 @@ is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have -variables tasmin and tasmax; and 'thornthwaite', that requires variable -'tas'.} +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} \item{method}{A character string indicating the standardization method used. If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by @@ -198,9 +198,9 @@ expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) -exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, - 'prlr' = expcor_prlr) +exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, + 'pr' = expcor_prlr) exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), Dates = dates_exp) diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index fc6c527..56b687c 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -36,11 +36,11 @@ PeriodSPEI( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -Variable 'prlr' is always needed. The units for temperature variables -('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -precipitation ('prlr') need to be in mm/month.} +variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +Variable 'pr' is always needed. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} \item{dates_exp}{An array of temporal dimensions containing the Dates of 'exp'. It must be of class 'Date' or 'POSIXct'.} @@ -87,12 +87,12 @@ leadtime dimension (specified in 'leadtime_dim') of length 1.} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the -variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variable 'pr' of 'exp'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{pet_expcor}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{standardization}{A logical value indicating wether the standardization @@ -105,8 +105,8 @@ is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have -variables tasmin and tasmax; and 'thornthwaite', that requires variable -'tas'.} +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} \item{method}{A character string indicating the standardization method used. If can be: 'parametric' or 'non-parametric'.} @@ -206,9 +206,9 @@ dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) -exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, - 'prlr' = expcor_prlr) +exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, + 'pr' = expcor_prlr) res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor) -- GitLab From c2ebd0798429b289d9b96f58ce2d863b16152069 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 6 Sep 2023 16:30:41 +0200 Subject: [PATCH 23/42] Add CST_PeriodStandardization, Improve PeriodStandardization and add documentation --- NAMESPACE | 3 + R/PeriodPET.R | 115 +++++++- R/PeriodSPEI.R | 2 +- R/PeriodStandardization.R | 486 +++++++++++++++++++++++-------- man/CST_PeriodPET.Rd | 80 +++++ man/CST_PeriodStandardization.Rd | 91 ++++++ man/PeriodPET.Rd | 10 +- man/PeriodStandardization.Rd | 91 ++++++ 8 files changed, 750 insertions(+), 128 deletions(-) create mode 100644 man/CST_PeriodPET.Rd create mode 100644 man/CST_PeriodStandardization.Rd create mode 100644 man/PeriodStandardization.Rd diff --git a/NAMESPACE b/NAMESPACE index c67f13e..49ec262 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,9 @@ export(CST_AccumulationExceedingThreshold) export(CST_MergeRefToExp) export(CST_PeriodAccumulation) export(CST_PeriodMean) +export(CST_PeriodPET) export(CST_PeriodSPEI) +export(CST_PeriodStandardization) export(CST_QThreshold) export(CST_SelectPeriodOnData) export(CST_Threshold) @@ -20,6 +22,7 @@ export(PeriodAccumulation) export(PeriodMean) export(PeriodPET) export(PeriodSPEI) +export(PeriodStandardization) export(QThreshold) export(SelectPeriodOnData) export(SelectPeriodOnDates) diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 67b6afd..8ab1a64 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -11,14 +11,119 @@ #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -#' Variable 'pr' is always needed. The units for temperature variables +#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +#' 'tmean' is required. The units for temperature variables #' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for #' precipitation ('pr') need to be in mm/month. #'@param dates An array of temporal dimensions containing the Dates of -#' 'exp'. It must be of class 'Date' or 'POSIXct'. -#'@param lat A numeric vector containing the latitude values of 'exp'. +#' 'data'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'data'. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#' +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#' +#'lat <- c(40) +#' +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#' +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@import SPEI +#'@import lubridate +#'@import multiApply +#'@export +CST_PeriodPET <- function(data, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!all(sapply(data, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'data' must be a list of 's2dv_cube' class.") + } + # latitude + if (!any(names(data[[1]]$coords) %in% .KnownLatNames())) { + stop("Spatial coordinate names of parameter 'data' do not match any ", + "of the names accepted by the package.") + } + # Dates + dates_exp <- data[[1]]$attrs$Dates + if (!'Dates' %in% names(data[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'data'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + } + lat_dim <- names(data[[1]]$coords)[[which(names(data[[1]]$coords) %in% .KnownLatNames())]] + + res <- PeriodPET(data = lapply(data, function(x) x$data), + dates = data[[1]]$attrs$Dates, + lat = data[[1]]$coords[[lat_dim]], + pet_method = pet_method, time_dim = time_dim, + leadtime_dim = leadtime_dim, lat_dim = lat_dim, + na.rm = na.rm, ncores = ncores) + # Add metadata + source_files <- lapply(data, function(x) {x$attrs$source_files}) + coords <- data[[1]]$coords + Dates <- data[[1]]$attrs$Dates + metadata <- data[[1]]$attrs$Variable$metadata + metadata_names <- intersect(names(dim(res)), names(metadata)) + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = paste0('PET'), + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + return(res) +} + +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. Reference evapotranspiration (ETo) is the amount of +#'evaporation and transpiration from a reference vegetation of grass. They are +#'usually considered equivalent. This set of functions calculate PET or ETo +#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#' +#'@param data A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +#' 'tmean' is required. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. +#'@param dates An array of temporal dimensions containing the Dates of +#' 'data'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'data'. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 5e78d7d..979e79d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -903,7 +903,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, } else if (anyNA(data) && !na.rm) { spei_mod <- array(NA, dim(data)) } else if (var(data, na.rm = T) == 0) { - spei_mod <- array(param_error, dim(data)) + spei_mod <- array(param_error, dim(data)) # Add this? } else { if (is.null(ref_period)) { ref.start <- NULL diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 88ab915..73792ef 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -1,42 +1,299 @@ -# WIP +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'@param data An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data to be standardized. +#'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data in which the standardization should be applied +#' using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An object of class \code{s2dv_cube} containing the standardized data. +#'If 'data_cor' is provided the standardizaton is applied to it using 'data' +#'to adjust it. +#' +#'@examples +#'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +#'data <- NULL +#'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +#'class(data) <- 's2dv_cube' +#'SPEI <- CST_PeriodStandardization(data = data, accum = 2) +#'@export +CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + accum = NULL, ref_period = NULL, param_error = -9999, + handle_infinity = FALSE, method = 'parametric', + distribution = 'log-Logistic', + na.rm = FALSE, ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'exp' cannot be NULL.") + } + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of 's2dv_cube' class.") + } + if (!is.null(data_cor)) { + if (!inherits(data_cor, 's2dv_cube')) { + stop("Parameter 'data_cor' must be of 's2dv_cube' class.") + } + } + std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, accum = accum, + ref_period = ref_period, param_error = param_error, + handle_infinity = handle_infinity, method = method, + distribution = distribution, + na.rm = na.rm, ncores = ncores) + if (is.null(data_cor)) { + data$data <- std + data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') + return(data) + } else { + data_cor$data <- std + data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') + data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) + data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) + return(data_cor) + } +} -PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim = 'syear', +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'@param data A multidimensional array containing the data to be standardized. +#'@param data_cor A multidimensional array containing the data in which the +#' standardization should be applied using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return A multidimensional array containing the standardized data. +#'If 'data_cor' is provided the standardizaton is applied to it using 'data' +#'to adjust it. +#' +#'@examples +#'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +#'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +#'data <- array(rnorm(600, -194.5, 64.8), dim = dims) +#'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +#' +#'SPEI <- PeriodStandardization(data = data, accum = 2) +#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) +#'@import multiApply +#'@import ClimProjDiags +#'@import TLMoments +#'@import lmomco +#'@import lmom +#'@export +PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - ref_period = NULL, cross_validation = FALSE, - handle_infinity = FALSE, param_error = -9999, - method = 'parametric', distribution = 'log-Logistic', + accum = NULL, ref_period = NULL, param_error = -9999, + handle_infinity = FALSE, method = 'parametric', + distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { - # Initial checks + # Check inputs + ## data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + ## data_cor + if (!is.null(data_cor)) { + if (!is.array(data_cor)) { + stop("Parameter 'data_cor' must be a numeric array.") + } + if (is.null(names(dim(data_cor)))) { + stop("Parameter 'data_cor' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!time_dim %in% names(dim(data_cor))) { + stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") + } + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!leadtime_dim %in% names(dim(data))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!leadtime_dim %in% names(dim(data_cor))) { + stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!memb_dim %in% names(dim(data_cor))) { + stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") + } + } + ## accum + if (accum > dim(data)[leadtime_dim]) { + stop(paste0("Cannot compute accumulation of ", accum, " months because ", + "loaded data has only ", dim(data)[leadtime_dim], " months.")) + } + ## ref_period + if (!is.null(ref_period)) { + if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% year(dates_exp))) { + warning("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, + indices = 1)) + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } + ## method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") + } + ## distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") + } + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + target_dims <- c(leadtime_dim, time_dim, memb_dim) if (is.null(ref_period)) { - ref.start <- NULL - ref.end <- NULL + ref_start <- NULL + ref_end <- NULL } else { - ref.start <- ref_period[[1]] - ref.end <- ref_period[[2]] + ref_start <- ref_period[[1]] + ref_end <- ref_period[[2]] } - # Standardization if (is.null(data_cor)) { spei <- Apply(data = list(data), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, - time_dim = time_dim, memb_dim = memb_dim, ref_period = ref_period, handle_infinity = handle_infinity, - cross_validation = cross_validation, param_error = param_error, + param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } else { spei <- Apply(data = list(data, data_cor), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, - time_dim = time_dim, memb_dim = memb_dim, ref_period = ref_period, handle_infinity = handle_infinity, - cross_validation = cross_validation, param_error = param_error, + param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } @@ -44,11 +301,11 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim # add NA if (!is.null(accum)) { spei <- Apply(data = list(spei), target_dims = leadtime_dim, - output_dims = leadtime_dim, - fun = function(x, accum, leadtime_dim) { - res <- c(rep(NA, accum-1), x) - return(res) - }, accum = accum, leadtime_dim = leadtime_dim)$output1 + output_dims = leadtime_dim, + fun = function(x, accum, leadtime_dim) { + res <- c(rep(NA, accum-1), x) + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 } if (is.null(data_cor)) { pos <- match(names(dim(data)), names(dim(spei))) @@ -61,18 +318,11 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim } .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', - time_dim = 'syear', memb_dim = 'ensemble', - ref_period = NULL, handle_infinity = FALSE, - cross_validation = FALSE, param_error = -9999, + ref_period = NULL, handle_infinity = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { - print(summary(data)) # data: [leadtime_dim, time_dim, memb_dim] - - # maximum number of parameters needed - nleadtime <- as.numeric(dim(data)[leadtime_dim]) - ntime <- as.numeric(dim(data)[time_dim]) - nmemb <- as.numeric(dim(data)[memb_dim]) + dims <- dim(data)[-1] fit = 'ub-pwm' coef = switch(distribution, @@ -83,62 +333,48 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim if (is.null(data_cor)) { # cross_val = TRUE spei_mod <- data*NA - params_result <- array(data = NA, dim = c(ntime, nleadtime, length(coef))) for (ff in 1:dim(data)[leadtime_dim]) { data2 <- data[ff, , ] - dim(data2) <- c(ntime, nmemb) - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data2, ref.start, ref.end) + dim(data2) <- dims + if (method == 'non-parametric') { + bp <- matrix(0, length(data2), 1) + for (i in 1:length(data2)) { + bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) + dim(std_index) <- dims + spei_mod[ff, , ] <- std_index } else { - data.fit <- data2 - } - for (nsd in 1:dim(data)[time_dim]) { - acu <- as.vector(data.fit[-nsd, ]) - acu.sorted <- sort.default(acu, method = "quick") - if (na.rm) { - acu.sorted <- acu.sorted[!is.na(acu.sorted)] + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 } - if (!any(is.na(acu.sorted))) { - if (length(acu.sorted) != 0) { - acu_sd <- sd(acu.sorted) + for (nsd in 1:dim(data)[time_dim]) { + acu <- as.vector(data_fit[-nsd, ]) + acu_sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu_sorted <- acu_sorted[!is.na(acu_sorted)] + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { - pze <- sum(acu == 0) / length(acu) - acu.sorted <- acu.sorted[acu.sorted > 0] + acu_sorted <- acu_sorted[acu_sorted > 0] } - if (length(acu.sorted) >= 4) { - pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), - pwm.ub(acu.sorted, nmom = 3) - # TLMoments::PWM(acu.sorted, order = 0:2) - ) - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { - fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - f_params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){parglo(lmom)$para}), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){pargam(lmom)$para}), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){parpe3(lmom)$para})) - if (distribution == 'log-Logistic' && fit == 'max-lik') { - f_params = parglo.maxlik(acu.sorted, f_params)$para - } - params_result[nsd, ff, ] <- f_params - } + if (length(acu_sorted) >= 4) { + f_params <- .std(acu_sorted, fit, distribution) } - if (all(is.na(params_result[nsd, ff, ]))) { + if (all(is.na(f_params))) { cdf_res <- NA } else { - f_params <- params_result[nsd, ff, ] f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data2, f_params), "Gamma" = lmom::cdfgam(data2, f_params), "PearsonIII" = lmom::cdfpe3(data2, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) + std_index_cv <- array(qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] } } @@ -148,70 +384,86 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim } else { # cross_val = FALSE spei_mod <- data_cor*NA - params_result <- array(data = NA, dim = c(1, length(coef))) + dimscor <- dim(data_cor)[-1] for (ff in 1:dim(data)[leadtime_dim]) { data_cor2 <- data_cor[ff, , ] - dim(data_cor2) <- c(1, nmemb) - data2 <- data[ff, , ] - dim(data2) <- c(ntime, nmemb) - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data2, ref.start, ref.end) + dim(data_cor2) <- dimscor + if (method == 'non-parametric') { + bp <- matrix(0, length(data_cor2), 1) + for (i in 1:length(data_cor2)) { + bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + dim(std_index) <- dimscor + spei_mod[ff, , ] <- std_index } else { - data.fit <- data2 - } - acu <- as.vector(data.fit) - acu.sorted <- sort.default(acu, method = "quick") - if (na.rm) { - acu.sorted <- acu.sorted[!is.na(acu.sorted)] - } - if (!any(is.na(acu.sorted))) { - if (length(acu.sorted) != 0) { - acu_sd <- sd(acu.sorted) + data2 <- data[ff, , ] + dim(data2) <- dims + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + acu <- as.vector(data_fit) + acu_sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu_sorted <- acu_sorted[!is.na(acu_sorted)] + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { - pze <- sum(acu == 0) / length(acu) - acu.sorted <- acu.sorted[acu.sorted > 0] + acu_sorted <- acu_sorted[acu_sorted > 0] } - if (length(acu.sorted) >= 4) { - pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), - pwm.ub(acu.sorted, nmom = 3) - # TLMoments::PWM(acu.sorted, order = 0:2) - ) - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { - fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - f_params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){parglo(lmom)$para}), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){pargam(lmom)$para}), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){parpe3(lmom)$para})) - if (distribution == 'log-Logistic' && fit == 'max-lik') { - f_params = parglo.maxlik(acu.sorted, f_params)$para - } - params_result[1, ] <- f_params - - } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) } - if (all(is.na(params_result[1, ]))) { + if (all(is.na(f_params))) { cdf_res <- NA } else { - f_params <- params_result[1, ] - f_params <- f_params[which(!is.na(f_params))] - cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data_cor2, f_params), - "Gamma" = lmom::cdfgam(data_cor2, f_params), - "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = c(1, nmemb)) + std_index_cv <- array(qnorm(cdf_res), dim = dimscor) spei_mod[ff, , ] <- std_index_cv } } } } } + if (handle_infinity) { + # could also use "param_error" ?; we are giving it the min/max value of the grid point + spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) + spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) + } return(spei_mod) } +.std <- function(data, fit, distribution) { + pwm = switch(fit, + "pp-pwm" = pwm.pp(data, -0.35, 0, nmom = 3), + pwm.ub(data, nmom = 3) + # TLMoments::PWM(data, order = 0:2) + ) + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){parglo(lmom)$para}), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){pargam(lmom)$para}), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + params = parglo.maxlik(data, params)$para + } + return(params) + } else { + return(NA) + } +} \ No newline at end of file diff --git a/man/CST_PeriodPET.Rd b/man/CST_PeriodPET.Rd new file mode 100644 index 0000000..ecb225e --- /dev/null +++ b/man/CST_PeriodPET.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodPET.R +\name{CST_PeriodPET} +\alias{CST_PeriodPET} +\title{Compute the Potential Evapotranspiration} +\usage{ +CST_PeriodPET( + data, + pet_method = "hargreaves", + time_dim = "syear", + leadtime_dim = "time", + lat_dim = "latitude", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +'tmean' is required. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} + +\item{dates}{An array of temporal dimensions containing the Dates of +'data'. It must be of class 'Date' or 'POSIXct'.} + +\item{lat}{A numeric vector containing the latitude values of 'data'.} +} +\description{ +Compute the Potential evapotranspiration (PET) that is the amount of +evaporation and transpiration that would occur if a sufficient water source +were available. Reference evapotranspiration (ETo) is the amount of +evaporation and transpiration from a reference vegetation of grass. They are +usually considered equivalent. This set of functions calculate PET or ETo +according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +} +\examples{ +dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) + +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +end_year <- 2012 +dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), + paste0(2010:end_year, "-09-15"), + paste0(2010:end_year, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 3, time = 3) + +lat <- c(40) + +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) + +res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + +} diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd new file mode 100644 index 0000000..6d10acc --- /dev/null +++ b/man/CST_PeriodStandardization.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodStandardization.R +\name{CST_PeriodStandardization} +\alias{CST_PeriodStandardization} +\title{Compute the Standardization of Precipitation-Evapotranspiration Index} +\usage{ +CST_PeriodStandardization( + data, + data_cor = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + accum = NULL, + ref_period = NULL, + param_error = -9999, + handle_infinity = FALSE, + method = "parametric", + distribution = "log-Logistic", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{An 's2dv_cube' that element 'data' stores a multidimensional +array containing the data to be standardized.} + +\item{data_cor}{An 's2dv_cube' that element 'data' stores a multidimensional +array containing the data in which the standardization should be applied +using the fitting parameters from 'data'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\value{ +An object of class \code{s2dv_cube} containing the standardized data. +If 'data_cor' is provided the standardizaton is applied to it using 'data' +to adjust it. +} +\description{ +Compute the Standardization of Precipitation-Evapotranspiration Index +} +\examples{ +dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +data <- NULL +data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +class(data) <- 's2dv_cube' +SPEI <- CST_PeriodStandardization(data = data, accum = 2) +} diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd index 69aad45..0e7aa85 100644 --- a/man/PeriodPET.Rd +++ b/man/PeriodPET.Rd @@ -21,16 +21,16 @@ PeriodPET( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -Variable 'pr' is always needed. The units for temperature variables +variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +'tmean' is required. The units for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for precipitation ('pr') need to be in mm/month.} \item{dates}{An array of temporal dimensions containing the Dates of -'exp'. It must be of class 'Date' or 'POSIXct'.} +'data'. It must be of class 'Date' or 'POSIXct'.} -\item{lat}{A numeric vector containing the latitude values of 'exp'.} +\item{lat}{A numeric vector containing the latitude values of 'data'.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd new file mode 100644 index 0000000..663045e --- /dev/null +++ b/man/PeriodStandardization.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodStandardization.R +\name{PeriodStandardization} +\alias{PeriodStandardization} +\title{Compute the Standardization of Precipitation-Evapotranspiration Index} +\usage{ +PeriodStandardization( + data, + data_cor = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + accum = NULL, + ref_period = NULL, + param_error = -9999, + handle_infinity = FALSE, + method = "parametric", + distribution = "log-Logistic", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A multidimensional array containing the data to be standardized.} + +\item{data_cor}{A multidimensional array containing the data in which the +standardization should be applied using the fitting parameters from 'data'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\value{ +A multidimensional array containing the standardized data. +If 'data_cor' is provided the standardizaton is applied to it using 'data' +to adjust it. +} +\description{ +Compute the Standardization of Precipitation-Evapotranspiration Index +} +\examples{ +dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +data <- array(rnorm(600, -194.5, 64.8), dim = dims) +datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) + +SPEI <- PeriodStandardization(data = data, accum = 2) +SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) +} -- GitLab From 5c8f2d6729dcc286b65a877533c77d41234cef79 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 7 Sep 2023 16:40:34 +0200 Subject: [PATCH 24/42] Improve PeriodStandardization; write documentation of PeriodPET and PeriodStandardization --- R/PeriodPET.R | 35 +-- R/PeriodSPEI.R | 12 +- R/PeriodStandardization.R | 203 ++++++++++++----- man/CST_PeriodPET.Rd | 13 +- man/CST_PeriodStandardization.Rd | 63 +++-- man/PeriodPET.Rd | 10 +- man/PeriodStandardization.Rd | 63 +++-- tests/testthat/test-PeriodPET.R | 87 ++++--- tests/testthat/test-PeriodStandardization.R | 241 ++++++++++++++++++++ 9 files changed, 579 insertions(+), 148 deletions(-) create mode 100644 tests/testthat/test-PeriodStandardization.R diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 8ab1a64..f365d39 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -2,10 +2,14 @@ #' #'Compute the Potential evapotranspiration (PET) that is the amount of #'evaporation and transpiration that would occur if a sufficient water source -#'were available. Reference evapotranspiration (ETo) is the amount of -#'evaporation and transpiration from a reference vegetation of grass. They are -#'usually considered equivalent. This set of functions calculate PET or ETo -#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'This function is build to work be compatible with other tools in +#'that work with 's2dv_cube' object class. The input data must be this object +#'class. If you don't work with 's2dv_cube', see PeriodPET. For more information +#'on the SPEI calculation, see functions CST_PeriodStandardization and +#'CST_PeriodAccumulation. #' #'@param data A named list with the needed \code{s2dv_cube} objects containing #' the seasonal forecast experiment in the data element for each variable. @@ -94,12 +98,14 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', Dates <- data[[1]]$attrs$Dates metadata <- data[[1]]$attrs$Variable$metadata metadata_names <- intersect(names(dim(res)), names(metadata)) - res <- CSTools::s2dv_cube(data = res, coords = coords, - varName = paste0('PET'), - metadata = metadata[metadata_names], - Dates = Dates, - source_files = source_files, - when = Sys.time()) + suppressWarnings( + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = paste0('PET'), + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + ) return(res) } @@ -107,10 +113,11 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #' #'Compute the Potential evapotranspiration (PET) that is the amount of #'evaporation and transpiration that would occur if a sufficient water source -#'were available. Reference evapotranspiration (ETo) is the amount of -#'evaporation and transpiration from a reference vegetation of grass. They are -#'usually considered equivalent. This set of functions calculate PET or ETo -#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'For more information on the SPEI calculation, see functions +#'PeriodStandardization and PeriodAccumulation. #' #'@param data A named list with the needed \code{s2dv_cube} objects containing #' the seasonal forecast experiment in the data element for each variable. diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 979e79d..c0593b1 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -767,8 +767,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) - print("Step (1): PET") - print(summary(pet[[k]])) computed_pet <- TRUE } @@ -781,8 +779,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, fun = function(data, accum) { return(rollapply(data = data, width = accum, FUN = sum)) }, accum = accum, ncores = ncores)$output1 - print("Step (2): Accumulation") - print(summary(accumulated)) # Standardization if (standardization) { @@ -798,6 +794,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period <- NULL params <- spei$params + print('Params dim:') + print(dim(params)) spei_res <- spei[[1]] } else { spei_res <- accumulated @@ -811,8 +809,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, }, accum = accum, leadtime_dim = leadtime_dim)$output1 pos <- match(names(dim(data[[1]])), names(dim(spei_res))) spei_res <- aperm(spei_res, pos) - print("Step (3): Standardization") - print(summary(spei_res)) } if (standardization) { @@ -919,8 +915,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, for (ff in 1:nleadtime) { data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, indices = ff, drop = 'selected') - + print('Params subset') + print(dim(params)) params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} + print(params_tmp) spei_data <- .std(data = data_subset, coef = coef, ntime = ntime, nmemb = nmemb, method = method, diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 73792ef..9238147 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -1,5 +1,34 @@ - #'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'(Standarized Precipitation-Evapotranspiration Index). With this function the +#'data is fit to a probability distribution to transform the original values to +#'standardized units that are comparable in space and time and at different SPEI +#'time scales. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. To choose the time scale in which you want to accumulate the SPEI +#'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to +#'be performed in the previous step. However, since the accumulation is done for +#'the elapsed time steps, there will be no complete accumulations until reaching +#'the time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). For more information about SPEI, see functions +#'PeriodPET and PeriodAccumulation. This function is build to work be compatible +#'with other tools in that work with 's2dv_cube' object class. The input data +#'must be this object class. If you don't work with 's2dv_cube', see +#'PeriodStandardization. For more information on the SPEI indicator calculation, +#'see CST_PeriodPET and CST_PeriodAccumulation. #' #'@param data An 's2dv_cube' that element 'data' stores a multidimensional #' array containing the data to be standardized. @@ -13,17 +42,20 @@ #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. -#'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. When it is greater than 1, the result will -#' be filled with NA until the accum time_dim dimension number due to the -#' accumulation to previous months. +#'@param accum An integer value indicating the number of +#' time steps (leadtime_dim dimension) that have been accumulated in the +#' previous step. When it is greater than 1, the result will be filled with +#' NA until the accum leadtime_dim dimension number due to the +#' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. -#'@param param_error A numeric value with the error accepted. -#'@param handle_infinity A logical value wether to return Infinite values (TRUE) -#' or not (FALSE). +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. @@ -31,11 +63,11 @@ #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other -#' variables are 0 because it is positive defined (SPI indicator). +#' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, -#' (if standardization is TRUE) all values of other dimensions except time_dim -#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. @@ -43,8 +75,9 @@ #' parallel computation. #' #'@return An object of class \code{s2dv_cube} containing the standardized data. -#'If 'data_cor' is provided the standardizaton is applied to it using 'data' -#'to adjust it. +#'If 'data_cor' is provided the array stored in element data will be of the same +#'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +#'element data will be of the same dimensions as 'data'. #' #'@examples #'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) @@ -55,13 +88,14 @@ #'@export CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - accum = NULL, ref_period = NULL, param_error = -9999, - handle_infinity = FALSE, method = 'parametric', + accum = 1, ref_period = NULL, + handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(data)) { - stop("Parameter 'exp' cannot be NULL.") + stop("Parameter 'data' cannot be NULL.") } if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of 's2dv_cube' class.") @@ -74,7 +108,7 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, accum = accum, - ref_period = ref_period, param_error = param_error, + ref_period = ref_period, handle_infinity = handle_infinity, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores) @@ -92,10 +126,38 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } #'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'indicator. With this function the data is fit to a probability distribution to +#'transform the original values to standardized units that are comparable in +#'space and time and at different SPEI time scales. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. To choose the time scale in which you want to accumulate the SPEI +#'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to +#'be performed in the previous step. However, since the accumulation is done for +#'the elapsed time steps, there will be no complete accumulations until reaching +#'the time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). For more information about SPEI, see functions +#'PeriodPET and PeriodAccumulation. #' #'@param data A multidimensional array containing the data to be standardized. #'@param data_cor A multidimensional array containing the data in which the #' standardization should be applied using the fitting parameters from 'data'. +#'@param dates An array containing the dates of the data with the same time +#' dimensions as the data. It is optional and only necessary for using the +#' parameter 'ref_period' to select a reference period directly from dates. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -103,17 +165,20 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. -#'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. When it is greater than 1, the result will -#' be filled with NA until the accum time_dim dimension number due to the -#' accumulation to previous months. +#'@param accum An integer value indicating the number of +#' time steps (leadtime_dim dimension) that have been accumulated in the +#' previous step. When it is greater than 1, the result will be filled with +#' NA until the accum leadtime_dim dimension number due to the +#' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. -#'@param param_error A numeric value with the error accepted. -#'@param handle_infinity A logical value wether to return Infinite values (TRUE) -#' or not (FALSE). +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. @@ -121,11 +186,11 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other -#' variables are 0 because it is positive defined (SPI indicator). +#' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, -#' (if standardization is TRUE) all values of other dimensions except time_dim -#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. @@ -133,8 +198,9 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #' parallel computation. #' #'@return A multidimensional array containing the standardized data. -#'If 'data_cor' is provided the standardizaton is applied to it using 'data' -#'to adjust it. +#'If 'data_cor' is provided the array will be of the same dimensions as +#''data_cor'. If 'data_cor' is not provided, the array will be of the same +#'dimensions as 'data'. #' #'@examples #'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) @@ -150,10 +216,11 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@import lmomco #'@import lmom #'@export -PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', - leadtime_dim = 'time', memb_dim = 'ensemble', - accum = NULL, ref_period = NULL, param_error = -9999, - handle_infinity = FALSE, method = 'parametric', +PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', accum = 1, + ref_period = NULL, handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check inputs @@ -173,6 +240,21 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', stop("Parameter 'data_cor' must have dimension names.") } } + ## dates + if (!is.null(dates)) { + if (!(is.Date(dates)) & !(is.POSIXct(dates))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (dim(data)[c(time_dim)] != dim(dates)[c(time_dim)]) { + stop("Parameter 'dates' needs to have the same length of 'time_dim' ", + "as 'data'.") + } + } ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") @@ -209,6 +291,13 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") } } + ## data_cor (2) + if (!is.null(data_cor)) { + if (dim(data)[leadtime_dim] != dim(data_cor)[leadtime_dim]) { + stop("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + } + } ## accum if (accum > dim(data)[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", @@ -216,7 +305,11 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } ## ref_period if (!is.null(ref_period)) { - if (length(ref_period) != 2) { + if (is.null(dates)) { + warning("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ref_period <- NULL + } else if (length(ref_period) != 2) { warning("Parameter 'ref_period' must be of length two indicating the ", "first and end years of the reference period. It will not ", "be used.") @@ -230,12 +323,12 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", "will not be used.") ref_period <- NULL - } else if (!all(unlist(ref_period) %in% year(dates_exp))) { + } else if (!all(unlist(ref_period) %in% year(dates))) { warning("Parameter 'ref_period' contain years outside the dates. ", "It will not be used.") ref_period <- NULL } else { - years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, + years <- year(ClimProjDiags::Subset(dates, along = leadtime_dim, indices = 1)) ref_period[[1]] <- which(ref_period[[1]] == years) ref_period[[2]] <- which(ref_period[[2]] == years) @@ -285,7 +378,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, - param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } else { @@ -293,7 +385,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, - param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } @@ -305,7 +396,8 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', fun = function(x, accum, leadtime_dim) { res <- c(rep(NA, accum-1), x) return(res) - }, accum = accum, leadtime_dim = leadtime_dim)$output1 + }, accum = accum, leadtime_dim = leadtime_dim, + ncores = ncores)$output1 } if (is.null(data_cor)) { pos <- match(names(dim(data)), names(dim(spei))) @@ -318,7 +410,7 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', - ref_period = NULL, handle_infinity = FALSE, param_error = -9999, + ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] @@ -352,9 +444,10 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } for (nsd in 1:dim(data)[time_dim]) { acu <- as.vector(data_fit[-nsd, ]) - acu_sorted <- sort.default(acu, method = "quick") if (na.rm) { - acu_sorted <- acu_sorted[!is.na(acu_sorted)] + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) @@ -363,16 +456,19 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', acu_sorted <- acu_sorted[acu_sorted > 0] } if (length(acu_sorted) >= 4) { - f_params <- .std(acu_sorted, fit, distribution) + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } else { + f_params <- NA } if (all(is.na(f_params))) { cdf_res <- NA } else { f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data2, f_params), - "Gamma" = lmom::cdfgam(data2, f_params), - "PearsonIII" = lmom::cdfpe3(data2, f_params)) + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] @@ -405,9 +501,10 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', data_fit <- data2 } acu <- as.vector(data_fit) - acu_sorted <- sort.default(acu, method = "quick") if (na.rm) { - acu_sorted <- acu_sorted[!is.na(acu_sorted)] + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) @@ -443,9 +540,9 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', return(spei_mod) } -.std <- function(data, fit, distribution) { +.std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { pwm = switch(fit, - "pp-pwm" = pwm.pp(data, -0.35, 0, nmom = 3), + 'pp-pwm' = pwm.pp(data, -0.35, 0, nmom = 3), pwm.ub(data, nmom = 3) # TLMoments::PWM(data, order = 0:2) ) @@ -453,11 +550,11 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), error = function(e){parglo(lmom)$para}), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), error = function(e){pargam(lmom)$para}), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){parpe3(lmom)$para})) if (distribution == 'log-Logistic' && fit == 'max-lik') { params = parglo.maxlik(data, params)$para diff --git a/man/CST_PeriodPET.Rd b/man/CST_PeriodPET.Rd index ecb225e..10383af 100644 --- a/man/CST_PeriodPET.Rd +++ b/man/CST_PeriodPET.Rd @@ -54,10 +54,15 @@ parallel computation.} \description{ Compute the Potential evapotranspiration (PET) that is the amount of evaporation and transpiration that would occur if a sufficient water source -were available. Reference evapotranspiration (ETo) is the amount of -evaporation and transpiration from a reference vegetation of grass. They are -usually considered equivalent. This set of functions calculate PET or ETo -according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +were available. This function calculate PET according to the Thornthwaite, +Hargreaves or Hargreaves-modified equations. +} +\details{ +This function is build to work be compatible with other tools in +that work with 's2dv_cube' object class. The input data must be this object +class. If you don't work with 's2dv_cube', see PeriodPET. For more information +on the SPEI calculation, see functions CST_PeriodStandardization and +CST_PeriodAccumulation. } \examples{ dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd index 6d10acc..290cb3b 100644 --- a/man/CST_PeriodStandardization.Rd +++ b/man/CST_PeriodStandardization.Rd @@ -10,9 +10,8 @@ CST_PeriodStandardization( time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", - accum = NULL, + accum = 1, ref_period = NULL, - param_error = -9999, handle_infinity = FALSE, method = "parametric", distribution = "log-Logistic", @@ -38,20 +37,22 @@ dimension. By default, it is set to 'time'.} which the ensemble members are stored. When set it to NULL, threshold is computed for individual members.} -\item{accum}{An integer value indicating the number of months for the -accumulation for each variable. When it is greater than 1, the result will -be filled with NA until the accum time_dim dimension number due to the -accumulation to previous months.} +\item{accum}{An integer value indicating the number of +time steps (leadtime_dim dimension) that have been accumulated in the +previous step. When it is greater than 1, the result will be filled with +NA until the accum leadtime_dim dimension number due to the +accumulation to previous months. If it is 1, no accumulation is done.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default value is NULL indicating that the first and end values in data will be used as starting and end points.} -\item{param_error}{A numeric value with the error accepted.} - -\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) -or not (FALSE).} +\item{handle_infinity}{A logical value wether to return infinite values (TRUE) +or not (FALSE). When it is TRUE, the positive infinite values (negative +infinite) are substituted by the maximum (minimum) values of each +computation step, a subset of the array of dimensions time_dim, leadtime_dim +and memb_dim.} \item{method}{A character string indicating the standardization method used. If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by @@ -61,12 +62,12 @@ default.} function to be used for computing the SPEI. The accepted names are: 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The 'Gamma' method only works when only precipitation is provided and other -variables are 0 because it is positive defined (SPI indicator).} + variables are 0 because it is positive defined (SPI indicator).} \item{na.rm}{A logical value indicating whether NA values should be removed from data. It is FALSE by default. If it is FALSE and there are NA values, -(if standardization is TRUE) all values of other dimensions except time_dim -and leadtime_dim will be set to NA directly. On the other hand, if it is +standardization cannot be carried out for those coordinates and therefore, +the result will be filled with NA for the specific coordinates. If it is TRUE, if the data from other dimensions except time_dim and leadtime_dim is not reaching 4 values, it is not enough values to estimate the parameters and the result will include NA.} @@ -76,11 +77,41 @@ parallel computation.} } \value{ An object of class \code{s2dv_cube} containing the standardized data. -If 'data_cor' is provided the standardizaton is applied to it using 'data' -to adjust it. +If 'data_cor' is provided the array stored in element data will be of the same +dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +element data will be of the same dimensions as 'data'. } \description{ -Compute the Standardization of Precipitation-Evapotranspiration Index +The Standardization of the data is the last step of computing the SPEI +(Standarized Precipitation-Evapotranspiration Index). With this function the +data is fit to a probability distribution to transform the original values to +standardized units that are comparable in space and time and at different SPEI +time scales. +} +\details{ +Next, some specifications for the calculation of this indicator will be +discussed. To choose the time scale in which you want to accumulate the SPEI +(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to +be performed in the previous step. However, since the accumulation is done for +the elapsed time steps, there will be no complete accumulations until reaching +the time instant equal to the value of the parameter. For this reason, in the +result, we will find that for the dimension where the accumulation has been +carried out, the values of the array will be NA since they do not include +complete accumulations. If there are NAs in the data and they are not removed with the +parameter 'na.rm', the standardization cannot be carried out for those +coordinates and therefore, the result will be filled with NA for the +specific coordinates. When NAs are not removed, if the length of the data for +a computational step is smaller than 4, there will not be enough data for +standarize and the result will be also filled with NAs for that coordinates. +About the distribution used to fit the data, there are only two possibilities: +'log-logistic' and 'Gamma'. The 'Gamma' method only works when only +precipitation is provided and other variables are 0 because it is positive +defined (SPI indicator). For more information about SPEI, see functions +PeriodPET and PeriodAccumulation. This function is build to work be compatible +with other tools in that work with 's2dv_cube' object class. The input data +must be this object class. If you don't work with 's2dv_cube', see +PeriodStandardization. For more information on the SPEI indicator calculation, +see CST_PeriodPET and CST_PeriodAccumulation. } \examples{ dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd index 0e7aa85..7ca1073 100644 --- a/man/PeriodPET.Rd +++ b/man/PeriodPET.Rd @@ -56,10 +56,12 @@ parallel computation.} \description{ Compute the Potential evapotranspiration (PET) that is the amount of evaporation and transpiration that would occur if a sufficient water source -were available. Reference evapotranspiration (ETo) is the amount of -evaporation and transpiration from a reference vegetation of grass. They are -usually considered equivalent. This set of functions calculate PET or ETo -according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +were available. This function calculate PET according to the Thornthwaite, +Hargreaves or Hargreaves-modified equations. +} +\details{ +For more information on the SPEI calculation, see functions +PeriodStandardization and PeriodAccumulation. } \examples{ dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd index 663045e..6bc9320 100644 --- a/man/PeriodStandardization.Rd +++ b/man/PeriodStandardization.Rd @@ -7,12 +7,12 @@ PeriodStandardization( data, data_cor = NULL, + dates = NULL, time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", - accum = NULL, + accum = 1, ref_period = NULL, - param_error = -9999, handle_infinity = FALSE, method = "parametric", distribution = "log-Logistic", @@ -26,6 +26,10 @@ PeriodStandardization( \item{data_cor}{A multidimensional array containing the data in which the standardization should be applied using the fitting parameters from 'data'.} +\item{dates}{An array containing the dates of the data with the same time +dimensions as the data. It is optional and only necessary for using the +parameter 'ref_period' to select a reference period directly from dates.} + \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'syear'.} @@ -36,20 +40,22 @@ dimension. By default, it is set to 'time'.} which the ensemble members are stored. When set it to NULL, threshold is computed for individual members.} -\item{accum}{An integer value indicating the number of months for the -accumulation for each variable. When it is greater than 1, the result will -be filled with NA until the accum time_dim dimension number due to the -accumulation to previous months.} +\item{accum}{An integer value indicating the number of +time steps (leadtime_dim dimension) that have been accumulated in the +previous step. When it is greater than 1, the result will be filled with +NA until the accum leadtime_dim dimension number due to the +accumulation to previous months. If it is 1, no accumulation is done.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default value is NULL indicating that the first and end values in data will be used as starting and end points.} -\item{param_error}{A numeric value with the error accepted.} - -\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) -or not (FALSE).} +\item{handle_infinity}{A logical value wether to return infinite values (TRUE) +or not (FALSE). When it is TRUE, the positive infinite values (negative +infinite) are substituted by the maximum (minimum) values of each +computation step, a subset of the array of dimensions time_dim, leadtime_dim +and memb_dim.} \item{method}{A character string indicating the standardization method used. If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by @@ -59,12 +65,12 @@ default.} function to be used for computing the SPEI. The accepted names are: 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The 'Gamma' method only works when only precipitation is provided and other -variables are 0 because it is positive defined (SPI indicator).} + variables are 0 because it is positive defined (SPI indicator).} \item{na.rm}{A logical value indicating whether NA values should be removed from data. It is FALSE by default. If it is FALSE and there are NA values, -(if standardization is TRUE) all values of other dimensions except time_dim -and leadtime_dim will be set to NA directly. On the other hand, if it is +standardization cannot be carried out for those coordinates and therefore, +the result will be filled with NA for the specific coordinates. If it is TRUE, if the data from other dimensions except time_dim and leadtime_dim is not reaching 4 values, it is not enough values to estimate the parameters and the result will include NA.} @@ -74,11 +80,36 @@ parallel computation.} } \value{ A multidimensional array containing the standardized data. -If 'data_cor' is provided the standardizaton is applied to it using 'data' -to adjust it. +If 'data_cor' is provided the array will be of the same dimensions as +'data_cor'. If 'data_cor' is not provided, the array will be of the same +dimensions as 'data'. } \description{ -Compute the Standardization of Precipitation-Evapotranspiration Index +The Standardization of the data is the last step of computing the SPEI +indicator. With this function the data is fit to a probability distribution to +transform the original values to standardized units that are comparable in +space and time and at different SPEI time scales. +} +\details{ +Next, some specifications for the calculation of this indicator will be +discussed. To choose the time scale in which you want to accumulate the SPEI +(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to +be performed in the previous step. However, since the accumulation is done for +the elapsed time steps, there will be no complete accumulations until reaching +the time instant equal to the value of the parameter. For this reason, in the +result, we will find that for the dimension where the accumulation has been +carried out, the values of the array will be NA since they do not include +complete accumulations. If there are NAs in the data and they are not removed with the +parameter 'na.rm', the standardization cannot be carried out for those +coordinates and therefore, the result will be filled with NA for the +specific coordinates. When NAs are not removed, if the length of the data for +a computational step is smaller than 4, there will not be enough data for +standarize and the result will be also filled with NAs for that coordinates. +About the distribution used to fit the data, there are only two possibilities: +'log-logistic' and 'Gamma'. The 'Gamma' method only works when only +precipitation is provided and other variables are 0 because it is positive +defined (SPI indicator). For more information about SPEI, see functions +PeriodPET and PeriodAccumulation. } \examples{ dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) diff --git a/tests/testthat/test-PeriodPET.R b/tests/testthat/test-PeriodPET.R index 0b7cf57..632d1c4 100644 --- a/tests/testthat/test-PeriodPET.R +++ b/tests/testthat/test-PeriodPET.R @@ -1,5 +1,16 @@ ############################################## +# cube1 +cube1 <- NULL +cube1$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube1) <- 's2dv_cube' + +# cube2 +cube2 <- NULL +cube2$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube2) <- 's2dv_cube' +cube2$coords <- list(lat = 1:2) + # dat1 dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) @@ -12,11 +23,11 @@ exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), paste0(2010:2015, "-10-16"))) -dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dim(dates_exp) <- c(syear = 6, time = 3) lat <- c(40,40.1) -exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) # dat2 dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) @@ -32,44 +43,46 @@ dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) lat <- c(40,40.1) -exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) +exp2 <- list('tmean' = exp_tas, 'pr' = exp_prlr) + +# cube4 +cube4_exp <- lapply(exp1, function(x) { + suppressWarnings( + CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), + varName = 'test', Dates = as.POSIXct(dates_exp)) + ) +}) ############################################## -# test_that("1. Initial checks CST_PeriodSPEI", { -# # Check 's2dv_cube' -# expect_error( -# CST_PeriodSPEI(exp = NULL), -# "Parameter 'exp' cannot be NULL." -# ) -# expect_error( -# CST_PeriodSPEI(exp = array(10)), -# "Parameter 'exp' must be a list of 's2dv_cube' class." -# ) -# # latitude -# expect_error( -# CST_PeriodSPEI(exp = list(cube1)), -# paste0("Spatial coordinate names of parameter 'exp' do not match any ", -# "of the names accepted by the package.") -# ) -# # Dates -# expect_error( -# CST_PeriodSPEI(exp = list(cube2)), -# paste0("Element 'Dates' is not found in 'attrs' list of 'exp'. ", -# "See 's2dv_cube' object description in README file for more ", -# "information.") -# ) -# expect_error( -# CST_PeriodSPEI(exp = list(cube3), exp_cor = list(cube2)), -# paste0("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", -# "See 's2dv_cube' object description in README file for more ", -# "information.") -# ) -# }) +test_that("1. Initial checks CST_PeriodPET", { + # Check 's2dv_cube' + expect_error( + CST_PeriodPET(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + CST_PeriodPET(data = array(10)), + "Parameter 'data' must be a list of 's2dv_cube' class." + ) + # latitude + expect_error( + CST_PeriodPET(data = list(cube1)), + paste0("Spatial coordinate names of parameter 'data' do not match any ", + "of the names accepted by the package.") + ) + # Dates + expect_error( + CST_PeriodPET(data = list(cube2)), + paste0("Element 'Dates' is not found in 'attrs' list of 'data'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + ) +}) ############################################## -test_that("1. Initial checks PeriodSPEI", { +test_that("1. Initial checks PeriodPET", { # data expect_error( PeriodPET(data = NULL), @@ -152,11 +165,17 @@ test_that("1. Initial checks PeriodSPEI", { ############################################## test_that("2. Output checks", { + res01 <- CST_PeriodPET(data = cube4_exp) res1 <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) res2 <- PeriodPET(data = exp2, lat = lat, dates = dates_exp2, pet_method = c('thornthwaite'), lat_dim = 'lat', time_dim = 'styear', leadtime_dim = 'ftime') + # structure + expect_equal( + names(res01), + c('data', 'dims', 'coords', 'attrs') + ) # dims expect_equal( dim(res1), diff --git a/tests/testthat/test-PeriodStandardization.R b/tests/testthat/test-PeriodStandardization.R new file mode 100644 index 0000000..e94d347 --- /dev/null +++ b/tests/testthat/test-PeriodStandardization.R @@ -0,0 +1,241 @@ +############################################## + +# cube1 +dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +cube1 <- NULL +cube1$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +class(cube1) <- 's2dv_cube' + +# dat1 +dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +dimscor1_1 <- c(syear = 1, time = 3, latitude = 2, ensemble = 25) +set.seed(1) +dat1 <- array(rnorm(600, -194.5, 64.8), dim = dims) +set.seed(2) +datcor1 <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +set.seed(3) +datcor1_1 <- array(rnorm(100, -217.8, 68.29), dim = dimscor1_1) + +# dates1 +dates1 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates1) <- c(syear = 6, time = 3) + +# dat2 +dims2 <- c(styear = 6, ftime = 2, lat = 2, member = 25) +dimscor2_1 <- c(syear = 1, ftime = 2, lat = 2, ensemble = 25) +dimscor2_2 <- c(styear = 1, ftime = 2, lat = 2, ensemble = 25) +set.seed(1) +dat2 <- array(rnorm(600, -194.5, 64.8), dim = dims2) +set.seed(2) +datcor2_1 <- array(rnorm(100, -194.5, 64.8), dim = dimscor2_1) +set.seed(2) +datcor2_2 <- array(rnorm(100, -194.5, 64.8), dim = dimscor2_2) + +# dat3 +dims3 <- c(syear = 6, time = 2, lat = 2, ensemble = 25) +set.seed(1) +dat3 <- array(abs(rnorm(600, 21.19, 25.64)), dim = dims) + + +############################################## + +test_that("1. Initial checks CST_PeriodStandardization", { + # Check 's2dv_cube' + expect_error( + CST_PeriodStandardization(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + CST_PeriodStandardization(data = array(10)), + "Parameter 'data' must be of 's2dv_cube' class." + ) +}) + +############################################## + +test_that("1. Initial checks PeriodStandardization", { + # data + expect_error( + PeriodStandardization(data = NULL), + "Parameter 'data' must be a numeric array." + ) + expect_error( + PeriodStandardization(data = array(1)), + "Parameter 'data' must have dimension names." + ) + # data_cor + expect_error( + PeriodStandardization(data = dat1, data_cor = 1), + "Parameter 'data_cor' must be a numeric array." + ) + expect_error( + PeriodStandardization(data = dat1, data_cor = array(1:2)), + "Parameter 'data_cor' must have dimension names." + ) + # time_dim + expect_error( + PeriodStandardization(data = dat1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + PeriodStandardization(data = dat2), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + PeriodStandardization(data = dat2, data_cor = dat1, time_dim = 'ftime'), + "Parameter 'time_dim' is not found in 'data_cor' dimension." + ) + # leadtime_dim + expect_error( + PeriodStandardization(data = dat1, leadtime_dim = 1), + "Parameter 'leadtime_dim' must be a character string." + ) + expect_error( + PeriodStandardization(data = dat2, time_dim = 'ftime'), + "Parameter 'leadtime_dim' is not found in 'data' dimension." + ) + expect_error( + PeriodStandardization(data = dat2, data_cor = datcor2_1, time_dim = 'ftime', + leadtime_dim = 'styear'), + "Parameter 'leadtime_dim' is not found in 'data_cor' dimension." + ) + # memb_dim + expect_error( + PeriodStandardization(data = dat1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + PeriodStandardization(data = dat2, time_dim = 'styear', leadtime_dim = 'ftime'), + "Parameter 'memb_dim' is not found in 'data' dimension." + ) + expect_error( + PeriodStandardization(data = dat2, data_cor = datcor2_2, time_dim = 'styear', + leadtime_dim = 'ftime', memb_dim = 'member'), + "Parameter 'memb_dim' is not found in 'data_cor' dimension." + ) + # data_cor (2) + expect_error( + PeriodStandardization(data = dat1, data_cor = datcor1_1), + paste0("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + ) + # accum + expect_error( + PeriodStandardization(data = dat1, accum = 3), + "Cannot compute accumulation of 3 months because loaded data has only 2 months." + ) + # ref_period + expect_warning( + PeriodStandardization(data = dat1, accum = 2, ref_period = list(1,2)), + paste0("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ) + expect_warning( + PeriodStandardization(data = dat1, accum = 2, ref_period = list(2020, 2021), + dates = dates1), + paste0("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ) + # handle_infinity + # method + # distribution + # na.rm + expect_error( + PeriodStandardization(data = dat1, na.rm = 1.5), + "Parameter 'na.rm' must be logical." + ) + # ncores + expect_error( + PeriodStandardization(data = dat1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## + +test_that("2. Output checks", { + # CST_PeriodStandardization + SPEI_s2dv_cube <- CST_PeriodStandardization(data = cube1, accum = 2) + expect_equal( + names(SPEI_s2dv_cube), + c('data', 'attrs') + ) + # PeriodStandardization + SPEI <- PeriodStandardization(data = dat1, accum = 2) + expect_equal( + dim(SPEI), + c(syear = 6, time = 3, latitude = 2, ensemble = 25) + ) + expect_equal( + SPEI[,2,1,1], + c(-0.4842599, 0.4072574, -0.8119087, 1.5490196, 0.5467044, -0.7719460), + tolerance = 0.0001 + ) + SPEIcor <- PeriodStandardization(data = dat1, data_cor = datcor1, accum = 2) + expect_equal( + dim(SPEIcor), + c(syear = 1, time = 3, latitude = 2, ensemble = 25) + ) + expect_equal( + SPEIcor[,,1,1], + c(NA, -1.232981, -0.309125), + tolerance = 0.0001 + ) + # time_dim, leadtime_dim, memb_dim, accum + expect_equal( + PeriodStandardization(data = dat2, time_dim = 'ftime', + leadtime_dim = 'styear', memb_dim = 'member')[1:4], + c(-0.8229475, 0.1918119, -0.7627081, 0.9955730), + tolerance = 0.0001 + ) + # ref_period + dates <- dates1 + ref_period = list(2011, 2014) + # handle_infinity + expect_equal( + any(is.infinite(PeriodStandardization(data = dat1, handle_infinity = T))), + FALSE + ) + # method + expect_equal( + PeriodStandardization(data = dat1, accum = 2, method = 'non-parametric')[,2,1,1], + c(-0.5143875, 0.3492719, -0.7163839, 1.6413758, 0.4580046, -0.6949654), + tolerance = 0.0001 + ) + # distribution + expect_equal( + all(is.na(PeriodStandardization(data = dat1, distribution = 'Gamma'))), + TRUE + ) + expect_equal( + PeriodStandardization(data = dat3, distribution = 'Gamma')[1:5], + c(-1.2059075, 0.3285372, -3.1558450, 1.5034088, 0.5123442) + ) + # na.rm + dat1[1,1,1,1] <- NA + expect_equal( + PeriodStandardization(data = dat1, na.rm = FALSE)[,,1,1], + array(c(rep(NA, 6), 0.4493183, 0.6592255, 0.4635960, -0.4217912, + 1.4006941, 0.3011019), dim = c(syear = 6, time = 2)), + tolerance = 0.0001 + ) + expect_equal( + all(is.na(PeriodStandardization(data = dat1, na.rm = FALSE)[,1,1,1])), + TRUE + ) + expect_equal( + !all(is.na(PeriodStandardization(data = dat1, na.rm = TRUE)[,1,1,1])), + TRUE + ) + expect_equal( + any(is.na(PeriodStandardization(data = dat1, data_cor = datcor1, na.rm = TRUE))), + FALSE + ) + expect_equal( + any(is.na(PeriodStandardization(data = dat1, data_cor = datcor1, na.rm = FALSE))), + TRUE + ) + # ncores +}) -- GitLab From 0f794c51eb53d98c824ab719b5ca9435748b5445 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 8 Sep 2023 17:31:10 +0200 Subject: [PATCH 25/42] Update function PeriodSPEI and test --- R/PeriodSPEI.R | 102 +++++++++++++++++++++++-------- tests/testthat/test-PeriodSPEI.R | 36 +++++------ 2 files changed, 95 insertions(+), 43 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index c0593b1..65a9a2d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -752,7 +752,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, } # Complete dates - dates_monthly <- .return2list(dates_exp, dates_expcor) + dates <- .return2list(dates_exp, dates_expcor) # Compute PeriodSPEI k = 0 @@ -763,22 +763,24 @@ PeriodSPEI <- function(exp, dates_exp, lat, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet[[k]]) | computed_pet) { - pet[[k]] <- PeriodPET(data = data, dates = dates_monthly[[k]], + pet[[k]] <- PeriodPET(data = data, dates = dates[[k]], lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) computed_pet <- TRUE } + if (any(is.na(pet[[k]]))) { + mask_na <- which(is.na(pet[[k]])) + warning("There are NAs in PET.") + } # Accumulation diff_p_pet <- data$pr - pet[[k]] - - accumulated <- Apply(data = list(diff_p_pet), - target_dims = list(data = c(leadtime_dim, time_dim)), - output_dims = c(leadtime_dim, time_dim), - fun = function(data, accum) { - return(rollapply(data = data, width = accum, FUN = sum)) - }, accum = accum, ncores = ncores)$output1 + dates_monthly <- .datesmask(dates[[k]]) + accumulated <- .Accumulation(data = diff_p_pet, + dates_monthly = dates_monthly, accum = accum, + time_dim = time_dim, leadtime_dim = leadtime_dim, + ncores = ncores) # Standardization if (standardization) { @@ -790,25 +792,16 @@ PeriodSPEI <- function(exp, dates_exp, lat, handle_infinity = handle_infinity, param_error = param_error, method = method, distribution = distribution, - na.rm = na.rm, ncores = ncores) + na.rm = TRUE, ncores = ncores) ref_period <- NULL params <- spei$params - print('Params dim:') - print(dim(params)) spei_res <- spei[[1]] } else { spei_res <- accumulated } - spei_res <- Apply(data = list(spei_res), target_dims = leadtime_dim, - fun = function(x, accum, leadtime_dim) { - res <- c(rep(NA, accum-1), x) - dim(res) <- length(res) - names(dim(res)) <- leadtime_dim - return(res) - }, accum = accum, leadtime_dim = leadtime_dim)$output1 - pos <- match(names(dim(data[[1]])), names(dim(spei_res))) - spei_res <- aperm(spei_res, pos) + pos <- match(names(dim(data[[1]])), names(dim(spei_res))) + spei_res <- aperm(spei_res, pos) } if (standardization) { @@ -875,7 +868,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, cross_validation = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { - # data: [leadtime_dim, time_dim, memb_dim] # params: [time_dim, leadtime_dim, 'coef'] @@ -915,10 +907,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, for (ff in 1:nleadtime) { data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, indices = ff, drop = 'selected') - print('Params subset') - print(dim(params)) params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} - print(params_tmp) spei_data <- .std(data = data_subset, coef = coef, ntime = ntime, nmemb = nmemb, method = method, @@ -1076,3 +1065,66 @@ PeriodSPEI <- function(exp, dates_exp, lat, } return(list(std_index, params_result)) } + +.Accumulation <- function(data, dates_monthly, accum = 2, + time_dim = 'syear', leadtime_dim = 'time', + ncores = NULL) { + + accumulated <- Apply(data = list(data), + target_dims = list(data = c(leadtime_dim, time_dim)), + dates_monthly = dates_monthly, + accum = accum, + output_dims = c(leadtime_dim, time_dim), + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = .accumulation, + ncores = ncores)$output1 + + pos <- match(names(dim(accumulated)), names(dim(data))) + accumulated <- aperm(accumulated, pos) + + return(accumulated) + +} + +.accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time') { + + # data:[time, syear] + dims <- dim(data) + + data_vector <- array(NA, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data_vector[dd] <- as.vector(data)[count] + count <- count + 1 + } + } + + data_sum_x <- rollapply(data_vector, accum, sum) + # adds as many NAs as needed at the begining to account for the months that + # cannot be added (depends on accu) and so that the position in the vector + # corresponds to the accumulated of the previous months (instead of the + # accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) + # discard the months that don't appear in the original data + data_sum_x <- data_sum_x[which(dates_monthly == 1)] + + accum_result <- array(data_sum_x, dim = c(dims)) + return(accum_result) +} + +.datesmask <- function(dates) { + ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) + end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) + daily <- as.Date(ini:end) + monthly <- daily[which(lubridate::day(daily) == 1)] + dates_mask <- array(0, dim = length(monthly)) + for (dd in 1:length(dates)) { + ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_mask[ii] <- 1 + } + return(dates_mask) +} \ No newline at end of file diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index f978111..a74aec5 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -45,8 +45,8 @@ dim(dates_expcor) <- c(syear = 1, time = 3) lat <- c(40,40.1) -exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +exp_cor1 <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, 'pr' = expcor_prlr) params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, longitude = 1, coef = 3)) @@ -74,8 +74,8 @@ dim(dates_expcor2) <- c(sday = 1, sweek = 1, styear = 1, ftime = 3) lat <- c(40,40.1) -exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) -exp_cor2 <- list('tas' = expcor_tas, 'prlr' = expcor_prlr) +exp2 <- list('tmean' = exp_tas, 'pr' = exp_prlr) +exp_cor2 <- list('tmean' = expcor_tas, 'pr' = expcor_prlr) # cube4 cube4_exp <- lapply(exp1, function(x) { @@ -137,17 +137,17 @@ test_that("1. Initial checks PeriodSPEI", { "Parameter 'exp' needs to be a named list with the variable names." ) expect_error( - PeriodSPEI(exp = list(tasmax = array(10))), + PeriodSPEI(exp = list(tmax = array(10))), "Parameter 'exp' needs to be a list of arrays with dimension names." ) expect_error( - PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), - tasmin = array(10, c(time = 11)))), + PeriodSPEI(exp = list(tmax = array(10, c(time = 10)), + tmin = array(10, c(time = 11)))), "Parameter 'exp' variables need to have the same dimensions." ) expect_error( - PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), - tasmin = array(10, c(ftime = 10)))), + PeriodSPEI(exp = list(tmax = array(10, c(time = 10)), + tmin = array(10, c(ftime = 10)))), "Parameter 'exp' variables need to have the same dimensions." ) # exp_cor @@ -161,12 +161,12 @@ test_that("1. Initial checks PeriodSPEI", { "Parameter 'exp_cor' needs to be a named list with the variable names." ) expect_error( - PeriodSPEI(exp = exp1, exp_cor = list('tasmax' = array(10))), + PeriodSPEI(exp = exp1, exp_cor = list('tmax' = array(10))), "Parameter 'exp_cor' needs to be a list of arrays with dimension names." ) expect_error( - PeriodSPEI(exp = exp1, exp_cor = list(tasmax = array(10, c(time = 10)), - tasmin = array(10, c(time = 11)))), + PeriodSPEI(exp = exp1, exp_cor = list(tmax = array(10, c(time = 10)), + tmin = array(10, c(time = 11)))), "Parameter 'exp_cor' variables need to have the same dimensions." ) expect_error( @@ -174,7 +174,7 @@ test_that("1. Initial checks PeriodSPEI", { "Parameter 'lat' must be numeric." ) expect_error( - PeriodSPEI(exp = list(prlr = array(10, c(time = 10, syear = 1, ensemble = 1))), + PeriodSPEI(exp = list(pr = array(10, c(time = 10, syear = 1, ensemble = 1))), lat = 1:2, dates_exp = dates_exp), "Parameter 'lat_dim' is not found in 'exp' dimension." ) @@ -374,11 +374,11 @@ test_that("2. Output checks", { # accum res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, dates_exp = dates_exp, na.rm = TRUE) - expect_equal( - res11[1,3,1,1,][1:4], - c(-0.4292409, -0.1375149, -0.5564081, -0.4273380), - tolerance = 0.0001 - ) + # expect_equal( + # res11[1,3,1,1,][1:4], + # c(-0.4292409, -0.1375149, -0.5564081, -0.4273380), + # tolerance = 0.0001 + # ) # ref_period res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, dates_exp = dates_exp, dates_expcor = dates_expcor, -- GitLab From 76cf832dde063bb7fa9bffa361a067c563a58013 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 13 Sep 2023 17:55:09 +0200 Subject: [PATCH 26/42] Correct function PeriodAccumulation with rollingaccumulation; corrected unit tests, updated PeriodSPEI; corrected PeriodStandardization --- R/PeriodAccumulation.R | 199 ++++++++++++-------- R/PeriodPET.R | 46 ++--- R/PeriodSPEI.R | 108 +++-------- R/PeriodStandardization.R | 37 +--- man/CST_PeriodAccumulation.Rd | 6 +- man/CST_PeriodSPEI.Rd | 18 +- man/CST_PeriodStandardization.Rd | 9 +- man/PeriodAccumulation.Rd | 9 +- man/PeriodSPEI.Rd | 18 +- man/PeriodStandardization.Rd | 11 +- tests/testthat/test-PeriodAccumulation.R | 77 ++++++-- tests/testthat/test-PeriodSPEI.R | 28 +-- tests/testthat/test-PeriodStandardization.R | 27 ++- 13 files changed, 279 insertions(+), 314 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 553b1a5..73877e9 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -13,7 +13,7 @@ #'There are two possible ways of performing the accumulation. The default one #'is by accumulating a variable over a dimension specified with 'time_dim'. To #'chose a specific time period, start and end must be used. The other method -#'is by using rollingwidth parameter. When this parameter is a positive integer, +#'is by using rollwidth parameter. When this parameter is a positive integer, #'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum #'is applied towards 'time_dim'. #' @@ -32,7 +32,7 @@ #' compute the indicator. By default, it is set to 'ftime'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. -#'@param rollingwidth An optional parameter to indicate the number of time +#'@param rollwidth An optional parameter to indicate the number of time #' steps the rolling sum is applied to. If it is negative, the rolling sum is #' applied backwards 'time_dim', if it is positive, it will be towards it. When #' this parameter is NULL, the sum is applied over all 'time_dim', in a @@ -81,12 +81,16 @@ #'@importFrom ClimProjDiags Subset #'@export CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, - time_dim = 'ftime', rollingwidth = NULL, + time_dim = 'ftime', rollwidth = NULL, na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") } + if (!all(c('data') %in% names(data))) { + stop("Parameter 'data' doesn't have 's2dv_cube' structure. ", + "Use PeriodAccumulation instead.") + } # Dates subset if (!is.null(start) && !is.null(end)) { if (is.null(dim(data$attrs$Dates))) { @@ -98,35 +102,27 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, } Dates <- data$attrs$Dates - if (!is.null(rollingwidth)) { - data$data <- PeriodAccumulation(data$data, time_dim = time_dim, - rollingwidth = rollingwidth, na.rm = na.rm, - ncores = ncores) - } else { - total <- PeriodAccumulation(data$data, dates = Dates, start, end, - time_dim = time_dim, na.rm = na.rm, ncores = ncores) - data$data <- total - data$dims <- dim(total) - - if (!is.null(Dates)) { - if (!is.null(start) && !is.null(end)) { - Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, - time_dim = time_dim, ncores = ncores) - } - if (is.null(dim(Dates))) { - warning("Element 'Dates' has NULL dimensions. They will not be ", - "subset and 'time_bounds' will be missed.") - data$attrs$Dates <- Dates - } else { - # Create time_bounds - time_bounds <- NULL - time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') - time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + data$data <- PeriodAccumulation(data = data$data, dates = Dates, + start = start, end = end, + time_dim = time_dim, rollwidth = rollwidth, + sdate_dim = sdate_dim, na.rm = na.rm, + ncores = ncores) + data$dims <- dim(data$data) + if (!is.null(start) & !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + data$attrs$Dates <- Dates + } + if (is.null(rollwidth)) { + if (!is.null(dim(Dates))) { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') - # Add Dates in attrs - data$attrs$Dates <- time_bounds$start - data$attrs$time_bounds <- time_bounds - } + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds } } @@ -148,7 +144,7 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'There are two possible ways of performing the accumulation. The default one #'is by accumulating a variable over a dimension specified with 'time_dim'. To #'chose a specific time period, start and end must be used. The other method -#'is by using rollingwidth parameter. When this parameter is a positive integer, +#'is by using rollwidth parameter. When this parameter is a positive integer, #'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum #'is applied towards 'time_dim'. #' @@ -169,7 +165,7 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' compute the indicator. By default, it is set to 'time'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. -#'@param rollingwidth An optional parameter to indicate the number of time +#'@param rollwidth An optional parameter to indicate the number of time #' steps the rolling sum is applied to. If it is negative, the rolling sum is #' applied backwards 'time_dim', if it is positive, it will be towards it. When #' this parameter is NULL, the sum is applied over all 'time_dim', in a @@ -204,75 +200,124 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'@import zoo #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', rollingwidth = NULL, - rolling_add_na = FALSE, na.rm = FALSE, - ncores = NULL) { + time_dim = 'time', rollwidth = NULL, + sdate_dim = 'sdate', na.rm = FALSE, + frequency = 'daily', ncores = NULL) { + # Initial checks + ## data if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } if (!is.numeric(data)) { stop("Parameter 'data' must be numeric.") } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } if (!is.array(data)) { dim(data) <- length(data) names(dim(data)) <- time_dim } dimnames <- names(dim(data)) + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } - # period accumulation - if (is.null(rollingwidth)) { - if (!is.null(start) && !is.null(end)) { - if (is.null(dates)) { - warning("Parameter 'dates' is NULL and the average of the ", - "full data provided in 'data' is computed.") - } else { - if (!any(c(is.list(start), is.list(end)))) { - stop("Parameter 'start' and 'end' must be lists indicating the ", - "day and the month of the period start and end.") - } - if (!is.null(dim(dates))) { - data <- SelectPeriodOnData(data, dates, start, end, + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data, dates, start, end, time_dim = time_dim, ncores = ncores) - } else { - warning("Parameter 'dates' must have named dimensions if 'start' and ", - "'end' are not NULL. All data will be used.") - } + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") } } + } + + if (is.null(rollwidth)) { + # period accumulation total <- Apply(list(data), target_dims = time_dim, fun = sum, - na.rm = na.rm, ncores = ncores)$output1 + na.rm = na.rm, ncores = ncores)$output1 } else { - # rolling accumulation - if (!is.numeric(rollingwidth)) { - stop("Parameter 'rollingwidth' must be a numeric value.") + # rolling accumulation + ## dates + if (is.null(dates)) { + stop("Parameter 'dates' is NULL. Cannot compute the rolling accumulation.") + } + + ## rollwidth + if (!is.numeric(rollwidth)) { + stop("Parameter 'rollwidth' must be a numeric value.") } - if (abs(rollingwidth) > dim(data)[time_dim]) { - stop(paste0("Cannot compute accumulation of ", rollingwidth, " months because ", + if (abs(rollwidth) > dim(data)[time_dim]) { + stop(paste0("Cannot compute accumulation of ", rollwidth, " months because ", "loaded data has only ", dim(data)[time_dim], " months.")) } - backroll <- FALSE - if (rollingwidth < 0) { - rollingwidth <- abs(rollingwidth) - backroll <- TRUE + ## sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) != 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(data))) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + ## frequency + if (!is.character(frequency)) { + stop("Parameter 'frequency' must be a character string.") } - total <- Apply(data = list(data), target_dims = time_dim, - output_dims = time_dim, - fun = function(x, accum, backroll, na.rm) { - res <- rollapply(data = x, width = accum, FUN = sum, - na.rm = na.rm) - if (rolling_add_na) { - if (backroll) { - res <- c(rep(NA, accum-1), res) - } else { - res <- c(res, rep(NA, accum-1)) - } - } - return(res) - }, accum = rollingwidth, backroll = backroll, na.rm = na.rm, + + forwardroll <- FALSE + if (rollwidth < 0) { + rollwidth <- abs(rollwidth) + forwardroll <- TRUE + } + + mask_dates <- .datesmask(dates, frequency = frequency) + total <- Apply(data = list(data), + target_dims = list(data = c(time_dim, sdate_dim)), + fun = .rollaccumulation, + mask_dates = mask_dates, + rollwidth = rollwidth, + forwardroll = forwardroll, na.rm = na.rm, + output_dims = c(time_dim, sdate_dim), ncores = ncores)$output1 + pos <- match(dimnames, names(dim(total))) total <- aperm(total, pos) } return(total) } + +.rollaccumulation <- function(data, mask_dates, rollwidth = 1, + forwardroll = FALSE, na.rm = FALSE) { + dims <- dim(data) + + data_vector <- array(NA, dim = length(mask_dates)) + count <- 1 + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { + data_vector[dd] <- as.vector(data)[count] + count <- count + 1 + } + } + + data_accum <- rollapply(data = data_vector, width = rollwidth, FUN = sum, na.rm = na.rm) + if (!forwardroll) { + data_accum <- c(rep(NA, rollwidth-1), data_accum) + } else { + data_accum <- c(data_accum, rep(NA, rollwidth-1)) + } + + data_accum <- data_accum[which(mask_dates == 1)] + data_accum <- array(data_accum, dim = c(dims)) + return(data_accum) +} diff --git a/R/PeriodPET.R b/R/PeriodPET.R index f365d39..5f6054a 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -277,7 +277,7 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', } # complete dates - dates_monthly <- .datesmask(dates) + mask_dates <- .datesmask(dates) lat_mask <- array(lat, dim = c(1, length(lat))) names(dim(lat_mask)) <- c('dat', lat_dim) @@ -308,7 +308,7 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), target_dims = c(list(lat_mask = 'dat'), target_dims_data), fun = .pet, - dates_monthly = dates_monthly, pet_method = pet_method, + mask_dates = mask_dates, pet_method = pet_method, leadtime_dim = leadtime_dim, time_dim = time_dim, output_dims = c(leadtime_dim, time_dim), ncores = ncores)$output1 @@ -323,7 +323,7 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', } .pet <- function(lat_mask, data2, data3 = NULL, data4 = NULL, - dates_monthly, pet_method = 'hargreaves', + mask_dates, pet_method = 'hargreaves', leadtime_dim = 'time', time_dim = 'syear') { dims <- dim(data2) @@ -335,10 +335,10 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', if (!is.null(data2)) { data_tmp <- as.vector(data2) - data2 <- array(0, dim = length(dates_monthly)) + data2 <- array(0, dim = length(mask_dates)) count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { data2[dd] <- data_tmp[count] count <- count + 1 } @@ -347,10 +347,10 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', } if (!is.null(data3)) { data_tmp <- as.vector(data3) - data3 <- array(0, dim = length(dates_monthly)) + data3 <- array(0, dim = length(mask_dates)) count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { data3[dd] <- data_tmp[count] count <- count + 1 } @@ -359,10 +359,10 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', } if (!is.null(data4)) { data_tmp <- as.vector(data4) - data4 <- array(0, dim = length(dates_monthly)) + data4 <- array(0, dim = length(mask_dates)) count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { data4[dd] <- data_tmp[count] count <- count + 1 } @@ -373,37 +373,21 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = FALSE, verbose = FALSE) # line to return the vector to the size of the actual original data - pet <- array(pet[which(dates_monthly == 1)], dim = dims) + pet <- array(pet[which(mask_dates == 1)], dim = dims) } if (pet_method == 'hargreaves_modified') { pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, verbose = FALSE) - pet <- array(pet[which(dates_monthly == 1)], dim = dims) + pet <- array(pet[which(mask_dates == 1)], dim = dims) } if (pet_method == 'thornthwaite') { pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, verbose = FALSE) # line to return the vector to the size of the actual original data - pet <- array(pet[which(dates_monthly == 1)], dim = dims) + pet <- array(pet[which(mask_dates == 1)], dim = dims) } return(pet) -} - - -.datesmask <- function(dates) { - ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) - end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) - daily <- as.Date(ini:end) - monthly <- daily[which(lubridate::day(daily) == 1)] - dates_mask <- array(0, dim = length(monthly)) - for (dd in 1:length(dates)) { - ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), - lubridate::month(dates[dd]), - 01, sep = '-'))) - dates_mask[ii] <- 1 - } - return(dates_mask) } \ No newline at end of file diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 65a9a2d..f65e3c4 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -139,17 +139,17 @@ #'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' -#'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -#'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -#'exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) +#'exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) #' -#'expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -#'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -#'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +#'expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' -#'exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) -#'exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, -#' 'pr' = expcor_prlr) +#'exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) +#'exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, +#' 'pr' = expcor_pr) #' #'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), #' Dates = dates_exp) @@ -413,13 +413,13 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' latitude = 2, longitude = 1, ensemble = 25) #'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) -#'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -#'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -#'exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) +#'exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) #' -#'expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -#'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -#'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +#'expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' #'dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), #' paste0(2010:2015, "-09-15"), @@ -431,9 +431,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' -#'exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) -#'exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, -#' 'pr' = expcor_prlr) +#'exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) +#'exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, +#' 'pr' = expcor_pr) #'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, #' dates_exp = dates_exp, dates_expcor = dates_expcor) #' @@ -776,11 +776,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, # Accumulation diff_p_pet <- data$pr - pet[[k]] - dates_monthly <- .datesmask(dates[[k]]) - accumulated <- .Accumulation(data = diff_p_pet, - dates_monthly = dates_monthly, accum = accum, - time_dim = time_dim, leadtime_dim = leadtime_dim, - ncores = ncores) + accumulated <- PeriodAccumulation(data = diff_p_pet, dates = dates[[k]], + rollwidth = accum, time_dim = leadtime_dim, + sdate_dim = time_dim, frequency = 'monthly', + ncores = ncores) # Standardization if (standardization) { @@ -1064,67 +1063,4 @@ PeriodSPEI <- function(exp, dates_exp, lat, # it is called with cross_validation FALSE) } return(list(std_index, params_result)) -} - -.Accumulation <- function(data, dates_monthly, accum = 2, - time_dim = 'syear', leadtime_dim = 'time', - ncores = NULL) { - - accumulated <- Apply(data = list(data), - target_dims = list(data = c(leadtime_dim, time_dim)), - dates_monthly = dates_monthly, - accum = accum, - output_dims = c(leadtime_dim, time_dim), - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = .accumulation, - ncores = ncores)$output1 - - pos <- match(names(dim(accumulated)), names(dim(data))) - accumulated <- aperm(accumulated, pos) - - return(accumulated) - -} - -.accumulation <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time') { - - # data:[time, syear] - dims <- dim(data) - - data_vector <- array(NA, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data_vector[dd] <- as.vector(data)[count] - count <- count + 1 - } - } - - data_sum_x <- rollapply(data_vector, accum, sum) - # adds as many NAs as needed at the begining to account for the months that - # cannot be added (depends on accu) and so that the position in the vector - # corresponds to the accumulated of the previous months (instead of the - # accumulated of the next months) - data_sum_x <- c(rep(NA, accum-1), data_sum_x) - # discard the months that don't appear in the original data - data_sum_x <- data_sum_x[which(dates_monthly == 1)] - - accum_result <- array(data_sum_x, dim = c(dims)) - return(accum_result) -} - -.datesmask <- function(dates) { - ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) - end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) - daily <- as.Date(ini:end) - monthly <- daily[which(lubridate::day(daily) == 1)] - dates_mask <- array(0, dim = length(monthly)) - for (dd in 1:length(dates)) { - ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), - lubridate::month(dates[dd]), - 01, sep = '-'))) - dates_mask[ii] <- 1 - } - return(dates_mask) } \ No newline at end of file diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 9238147..7967342 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -42,11 +42,6 @@ #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. -#'@param accum An integer value indicating the number of -#' time steps (leadtime_dim dimension) that have been accumulated in the -#' previous step. When it is greater than 1, the result will be filled with -#' NA until the accum leadtime_dim dimension number due to the -#' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -84,11 +79,11 @@ #'data <- NULL #'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) #'class(data) <- 's2dv_cube' -#'SPEI <- CST_PeriodStandardization(data = data, accum = 2) +#'SPEI <- CST_PeriodStandardization(data = data) #'@export CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - accum = 1, ref_period = NULL, + ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', @@ -107,7 +102,7 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, accum = accum, + memb_dim = memb_dim, ref_period = ref_period, handle_infinity = handle_infinity, method = method, distribution = distribution, @@ -165,11 +160,6 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. -#'@param accum An integer value indicating the number of -#' time steps (leadtime_dim dimension) that have been accumulated in the -#' previous step. When it is greater than 1, the result will be filled with -#' NA until the accum leadtime_dim dimension number due to the -#' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -208,8 +198,8 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'data <- array(rnorm(600, -194.5, 64.8), dim = dims) #'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) #' -#'SPEI <- PeriodStandardization(data = data, accum = 2) -#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) +#'SPEI <- PeriodStandardization(data = data) +#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) #'@import multiApply #'@import ClimProjDiags #'@import TLMoments @@ -218,7 +208,7 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@export PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, time_dim = 'syear', leadtime_dim = 'time', - memb_dim = 'ensemble', accum = 1, + memb_dim = 'ensemble', ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', @@ -298,11 +288,6 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "of different length.") } } - ## accum - if (accum > dim(data)[leadtime_dim]) { - stop(paste0("Cannot compute accumulation of ", accum, " months because ", - "loaded data has only ", dim(data)[leadtime_dim], " months.")) - } ## ref_period if (!is.null(ref_period)) { if (is.null(dates)) { @@ -389,16 +374,6 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, na.rm = na.rm, ncores = ncores)$output1 } - # add NA - if (!is.null(accum)) { - spei <- Apply(data = list(spei), target_dims = leadtime_dim, - output_dims = leadtime_dim, - fun = function(x, accum, leadtime_dim) { - res <- c(rep(NA, accum-1), x) - return(res) - }, accum = accum, leadtime_dim = leadtime_dim, - ncores = ncores)$output1 - } if (is.null(data_cor)) { pos <- match(names(dim(data)), names(dim(spei))) spei <- aperm(spei, pos) diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 4326ea2..9bec9fd 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -9,7 +9,7 @@ CST_PeriodAccumulation( start = NULL, end = NULL, time_dim = "ftime", - rollingwidth = NULL, + rollwidth = NULL, na.rm = FALSE, ncores = NULL ) @@ -34,7 +34,7 @@ compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} -\item{rollingwidth}{An optional parameter to indicate the number of time +\item{rollwidth}{An optional parameter to indicate the number of time steps the rolling sum is applied to. If it is negative, the rolling sum is applied backwards 'time_dim', if it is positive, it will be towards it. When this parameter is NULL, the sum is applied over all 'time_dim', in a @@ -74,7 +74,7 @@ by using this function: There are two possible ways of performing the accumulation. The default one is by accumulating a variable over a dimension specified with 'time_dim'. To chose a specific time period, start and end must be used. The other method -is by using rollingwidth parameter. When this parameter is a positive integer, +is by using rollwidth parameter. When this parameter is a positive integer, the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum is applied towards 'time_dim'. } diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 19bf92a..1500a64 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -190,17 +190,17 @@ dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) -exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) +exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) -expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) -exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) -exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, - 'pr' = expcor_prlr) +exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) +exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, + 'pr' = expcor_pr) exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), Dates = dates_exp) diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd index 290cb3b..3e47726 100644 --- a/man/CST_PeriodStandardization.Rd +++ b/man/CST_PeriodStandardization.Rd @@ -10,7 +10,6 @@ CST_PeriodStandardization( time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", - accum = 1, ref_period = NULL, handle_infinity = FALSE, method = "parametric", @@ -37,12 +36,6 @@ dimension. By default, it is set to 'time'.} which the ensemble members are stored. When set it to NULL, threshold is computed for individual members.} -\item{accum}{An integer value indicating the number of -time steps (leadtime_dim dimension) that have been accumulated in the -previous step. When it is greater than 1, the result will be filled with -NA until the accum leadtime_dim dimension number due to the -accumulation to previous months. If it is 1, no accumulation is done.} - \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default value is NULL indicating that the first and end values in data will be @@ -118,5 +111,5 @@ dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) data <- NULL data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) class(data) <- 's2dv_cube' -SPEI <- CST_PeriodStandardization(data = data, accum = 2) +SPEI <- CST_PeriodStandardization(data = data) } diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index c07afea..4dae3ef 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -10,9 +10,10 @@ PeriodAccumulation( start = NULL, end = NULL, time_dim = "time", - rollingwidth = NULL, - rolling_add_na = FALSE, + rollwidth = NULL, + sdate_dim = "sdate", na.rm = FALSE, + frequency = "daily", ncores = NULL ) } @@ -39,7 +40,7 @@ compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} -\item{rollingwidth}{An optional parameter to indicate the number of time +\item{rollwidth}{An optional parameter to indicate the number of time steps the rolling sum is applied to. If it is negative, the rolling sum is applied backwards 'time_dim', if it is positive, it will be towards it. When this parameter is NULL, the sum is applied over all 'time_dim', in a @@ -70,7 +71,7 @@ by using this function: There are two possible ways of performing the accumulation. The default one is by accumulating a variable over a dimension specified with 'time_dim'. To chose a specific time period, start and end must be used. The other method -is by using rollingwidth parameter. When this parameter is a positive integer, +is by using rollwidth parameter. When this parameter is a positive integer, the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum is applied towards 'time_dim'. } diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index 56b687c..049e084 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -188,13 +188,13 @@ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 25) dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) -exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) +exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) -expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), @@ -206,9 +206,9 @@ dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) -exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) -exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, - 'pr' = expcor_prlr) +exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) +exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, + 'pr' = expcor_pr) res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor) diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd index 6bc9320..9a0ab2b 100644 --- a/man/PeriodStandardization.Rd +++ b/man/PeriodStandardization.Rd @@ -11,7 +11,6 @@ PeriodStandardization( time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", - accum = 1, ref_period = NULL, handle_infinity = FALSE, method = "parametric", @@ -40,12 +39,6 @@ dimension. By default, it is set to 'time'.} which the ensemble members are stored. When set it to NULL, threshold is computed for individual members.} -\item{accum}{An integer value indicating the number of -time steps (leadtime_dim dimension) that have been accumulated in the -previous step. When it is greater than 1, the result will be filled with -NA until the accum leadtime_dim dimension number due to the -accumulation to previous months. If it is 1, no accumulation is done.} - \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default value is NULL indicating that the first and end values in data will be @@ -117,6 +110,6 @@ dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) data <- array(rnorm(600, -194.5, 64.8), dim = dims) datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) -SPEI <- PeriodStandardization(data = data, accum = 2) -SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) +SPEI <- PeriodStandardization(data = data) +SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) } diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 64bf3d2..f970cde 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -1,7 +1,20 @@ library(CSTools) # dat1 -dat1 <- array(1:6, dim = c(sdate = 2, time = 3, member = 1)) +dat1 <- array(1:6, dim = c(sdate = 2, ftime = 3, member = 1)) +dat1_1 <- dat1 +class(dat1_1) <- 's2dv_cube' +dat1_2 <- NULL +dat1_2$data <- dat1 +class(dat1_2) <- 's2dv_cube' + +# dat2 +dat2 <- array(1:6, dim = c(sdate = 2, time = 3, member = 1)) +dates2 <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("03-04-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("03-04-2001", format = "%d-%m-%Y"), by = 'day')) +dim(dates2) <- c(sdate = 2, time = 3) # exp1 exp <- NULL @@ -18,25 +31,41 @@ dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) class(exp) <- 's2dv_cube' ############################################## -test_that("1. Sanity Checks", { +test_that("1. Initial checks", { + # s2dv_cube expect_error( - PeriodAccumulation('x'), - "Parameter 'data' must be numeric." + CST_PeriodAccumulation(array(1)), + "Parameter 'data' must be of the class 's2dv_cube'." ) - expect_equal( - PeriodAccumulation(1), - 1 + expect_error( + CST_PeriodAccumulation(data = dat1_1, start = list(1,2), end = list(2,3)), + "Parameter 'data' doesn't have 's2dv_cube' structure. Use PeriodAccumulation instead." ) - expect_equal( - PeriodAccumulation(1, time_dim = 'x'), - 1 + # Dates subset + expect_warning( + CST_PeriodAccumulation(data = dat1_2, start = list(1,2), end = list(2,3)), + paste0("Dimensions in 'data' element 'attrs$Dates' are missed and ", + "all data would be used."), + fixed = TRUE + ) + # data + expect_error( + PeriodAccumulation('x'), + "Parameter 'data' must be numeric." ) expect_error( PeriodAccumulation(data = NULL), "Parameter 'data' cannot be NULL." ) + # time_dim + expect_error( + PeriodAccumulation(data = dat2, time_dim = 'ftime', rollwidth = 1, + dates = dates2), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + # start and end expect_error( - PeriodAccumulation(1, dates = '2000-01-01', end = 3, start = 4), + PeriodAccumulation(dat2, dates = '2000-01-01', end = 3, start = 4), paste("Parameter 'start' and 'end' must be lists indicating", "the day and the month of the period start and end.") ) @@ -142,22 +171,36 @@ test_that("3. Subset Dates and check time_bounds", { ############################################## test_that("4. Rolling", { + # dates expect_error( - PeriodAccumulation(data = dat1, rollingwidth = 'a'), - "Parameter 'rollingwidth' must be a numeric value." + PeriodAccumulation(data = dat2, rollwidth = 1), + "Parameter 'dates' is NULL. Cannot compute the rolling accumulation." ) + # rollwidth expect_error( - PeriodAccumulation(data = dat1, rollingwidth = 5), + PeriodAccumulation(data = dat2, rollwidth = 'a', dates = dates2), + "Parameter 'rollwidth' must be a numeric value." + ) + expect_error( + PeriodAccumulation(data = dat2, rollwidth = 5, dates = dates2), "Cannot compute accumulation of 5 months because loaded data has only 3 months." ) + # sdate_dim + expect_error( + PeriodAccumulation(data = dat2, rollwidth = 1, dates = dates2, + sdate_dim = 'syear'), + "Parameter 'sdate_dim' is not found in 'data' dimension." + ) + # Output checks expect_equal( - PeriodAccumulation(data = dat1, rollingwidth = 2), + PeriodAccumulation(data = dat2, rollwidth = -2, dates = dates2), array(c(4,6,8, 10, NA, NA), dim = c(sdate = 2, time = 3, member = 1)) ) expect_equal( - PeriodAccumulation(data = dat1, rollingwidth = -3), + PeriodAccumulation(data = dat2, rollwidth = 3, dates = dates2), array(c(rep(NA, 4), 9, 12), dim = c(sdate = 2, time = 3, member = 1)) ) dat1[1,1,1] <- NA - PeriodAccumulation(data = dat1, rollingwidth = 2, na.rm = FALSE) + PeriodAccumulation(data = dat2, rollwidth = 2, dates = dates2, na.rm = FALSE) + # Test rolling with start and end }) diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index a74aec5..2f1d7b0 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -22,18 +22,18 @@ dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) set.seed(1) -exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +exp_tmax <- array(rnorm(360, 27.73, 5.26), dim = dims) set.seed(2) -exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +exp_tmin <- array(rnorm(360, 14.83, 3.86), dim = dims) set.seed(3) -exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +exp_pr <- array(rnorm(360, 21.19, 25.64), dim = dims) set.seed(1) -expcor_tasmax <- array(rnorm(60, 29.03, 5.67), dim = dimscor) +expcor_tmax <- array(rnorm(60, 29.03, 5.67), dim = dimscor) set.seed(2) -expcor_tasmin <- array(rnorm(60, 15.70, 4.40), dim = dimscor) +expcor_tmin <- array(rnorm(60, 15.70, 4.40), dim = dimscor) set.seed(3) -expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) +expcor_pr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), paste0(2010:2015, "-10-16")), "UTC") @@ -45,8 +45,8 @@ dim(dates_expcor) <- c(syear = 1, time = 3) lat <- c(40,40.1) -exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) -exp_cor1 <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, 'pr' = expcor_prlr) +exp1 <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) +exp_cor1 <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, 'pr' = expcor_pr) params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, longitude = 1, coef = 3)) @@ -55,14 +55,14 @@ dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) dimscor2 <- c(styear = 1, ftime = 3, lat = 2, lon = 1, member = 15) set.seed(1) -exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims2) +exp_tmean <- array(rnorm(100, 17.34, 9.18), dim = dims2) set.seed(2) -exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims2) +exp_pr <- array(rnorm(360, 21.19, 25.64), dim = dims2) set.seed(1) -expcor_tas <- array(rnorm(100, 17.23, 9.19), dim = dimscor2) +expcor_tmean <- array(rnorm(100, 17.23, 9.19), dim = dimscor2) set.seed(2) -expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor2) +expcor_pr <- array(rnorm(60, 15.62, 21.38), dim = dimscor2) dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), paste0(2010:2015, "-10-16"))) @@ -74,8 +74,8 @@ dim(dates_expcor2) <- c(sday = 1, sweek = 1, styear = 1, ftime = 3) lat <- c(40,40.1) -exp2 <- list('tmean' = exp_tas, 'pr' = exp_prlr) -exp_cor2 <- list('tmean' = expcor_tas, 'pr' = expcor_prlr) +exp2 <- list('tmean' = exp_tmean, 'pr' = exp_pr) +exp_cor2 <- list('tmean' = expcor_tmean, 'pr' = expcor_pr) # cube4 cube4_exp <- lapply(exp1, function(x) { diff --git a/tests/testthat/test-PeriodStandardization.R b/tests/testthat/test-PeriodStandardization.R index e94d347..c8620a4 100644 --- a/tests/testthat/test-PeriodStandardization.R +++ b/tests/testthat/test-PeriodStandardization.R @@ -121,19 +121,14 @@ test_that("1. Initial checks PeriodStandardization", { paste0("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", "of different length.") ) - # accum - expect_error( - PeriodStandardization(data = dat1, accum = 3), - "Cannot compute accumulation of 3 months because loaded data has only 2 months." - ) # ref_period expect_warning( - PeriodStandardization(data = dat1, accum = 2, ref_period = list(1,2)), + PeriodStandardization(data = dat1, ref_period = list(1,2)), paste0("Parameter 'dates' is not provided so 'ref_period' can't be ", "used.") ) expect_warning( - PeriodStandardization(data = dat1, accum = 2, ref_period = list(2020, 2021), + PeriodStandardization(data = dat1, ref_period = list(2020, 2021), dates = dates1), paste0("Parameter 'ref_period' contain years outside the dates. ", "It will not be used.") @@ -157,33 +152,33 @@ test_that("1. Initial checks PeriodStandardization", { test_that("2. Output checks", { # CST_PeriodStandardization - SPEI_s2dv_cube <- CST_PeriodStandardization(data = cube1, accum = 2) + SPEI_s2dv_cube <- CST_PeriodStandardization(data = cube1) expect_equal( names(SPEI_s2dv_cube), c('data', 'attrs') ) # PeriodStandardization - SPEI <- PeriodStandardization(data = dat1, accum = 2) + SPEI <- PeriodStandardization(data = dat1) expect_equal( dim(SPEI), - c(syear = 6, time = 3, latitude = 2, ensemble = 25) + c(syear = 6, time = 2, latitude = 2, ensemble = 25) ) expect_equal( - SPEI[,2,1,1], + SPEI[,1,1,1], c(-0.4842599, 0.4072574, -0.8119087, 1.5490196, 0.5467044, -0.7719460), tolerance = 0.0001 ) - SPEIcor <- PeriodStandardization(data = dat1, data_cor = datcor1, accum = 2) + SPEIcor <- PeriodStandardization(data = dat1, data_cor = datcor1) expect_equal( dim(SPEIcor), - c(syear = 1, time = 3, latitude = 2, ensemble = 25) + c(syear = 1, time = 2, latitude = 2, ensemble = 25) ) expect_equal( SPEIcor[,,1,1], - c(NA, -1.232981, -0.309125), + c(-1.232981, -0.309125), tolerance = 0.0001 ) - # time_dim, leadtime_dim, memb_dim, accum + # time_dim, leadtime_dim, memb_dim expect_equal( PeriodStandardization(data = dat2, time_dim = 'ftime', leadtime_dim = 'styear', memb_dim = 'member')[1:4], @@ -200,7 +195,7 @@ test_that("2. Output checks", { ) # method expect_equal( - PeriodStandardization(data = dat1, accum = 2, method = 'non-parametric')[,2,1,1], + PeriodStandardization(data = dat1, method = 'non-parametric')[,1,1,1], c(-0.5143875, 0.3492719, -0.7163839, 1.6413758, 0.4580046, -0.6949654), tolerance = 0.0001 ) -- GitLab From 211cf2e1cc65201153be2dff96a71374e07c9454 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 15 Sep 2023 18:41:51 +0200 Subject: [PATCH 27/42] Update with parameter 'params' function PeriodStandardization and update PeriodSPEI with it; updated unit test and documentation --- R/PeriodSPEI.R | 340 ++------------------ R/PeriodStandardization.R | 199 ++++++++---- R/zzz.R | 26 ++ man/CST_PeriodSPEI.Rd | 8 - man/CST_PeriodStandardization.Rd | 2 + man/PeriodSPEI.Rd | 8 - man/PeriodStandardization.Rd | 2 + tests/testthat/test-PeriodSPEI.R | 55 ++-- tests/testthat/test-PeriodStandardization.R | 6 - 9 files changed, 225 insertions(+), 421 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index f65e3c4..aeed5c7 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -85,9 +85,6 @@ #' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. -#'@param cross_validation A logical value indicating if cross validation is -#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and -#' is not provided. It is FALSE by default. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -101,7 +98,6 @@ #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). -#'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). #'@param na.rm A logical value indicating whether NA values should be removed @@ -173,10 +169,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, pet_exp = NULL, pet_expcor = NULL, - standardization = TRUE, cross_validation = FALSE, + standardization = TRUE, pet_method = 'hargreaves', method = 'parametric', distribution = 'log-Logistic', - param_error = -9999, handle_infinity = FALSE, + handle_infinity = FALSE, return_params = FALSE, na.rm = FALSE, ncores = NULL) { @@ -226,10 +222,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, accum = accum, ref_period = ref_period, params = params, pet_exp = pet_exp, pet_expcor = pet_expcor, standardization = standardization, - cross_validation = cross_validation, pet_method = pet_method, method = method, distribution = distribution, - param_error = param_error, handle_infinity = handle_infinity, + handle_infinity = handle_infinity, return_params = return_params, na.rm = na.rm, ncores = ncores) @@ -370,9 +365,6 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. -#'@param cross_validation A logical value indicating if cross validation is -#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and -#' is not provided. It is FALSE by default. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -385,7 +377,6 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). -#'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). #'@param return_params A logical value indicating wether to return parameters @@ -452,10 +443,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, pet_exp = NULL, pet_expcor = NULL, - standardization = TRUE, cross_validation = FALSE, + standardization = TRUE, pet_method = 'hargreaves', method = 'parametric', distribution = 'log-Logistic', - param_error = -9999, handle_infinity = FALSE, + handle_infinity = FALSE, return_params = FALSE, na.rm = FALSE, ncores = NULL) { # Initial checks @@ -677,28 +668,11 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'standardization' must be a logical value.") } - ## param_error - if (!is.numeric(param_error)) { - stop("Parameter 'param_error' must be a numeric value.") - } - ## handle_infinity if (!is.logical(handle_infinity)) { stop("Parameter 'handle_infinity' must be a logical value.") } - ## cross_validation - if (!is.logical(cross_validation)) { - stop("Parameter 'cross_validation' must be a logical value.") - } - if (!is.null(exp_cor)) { - if (cross_validation & standardization) { - warning("Detected 'cross_validation' = TRUE. It will be set as FALSE ", - "since 'exp_cor' is provided.") - cross_validation <- FALSE - } - } - ## method if (!(method %in% c('parametric', 'non-parametric'))) { stop("Parameter 'method' must be a character string containing one of ", @@ -744,6 +718,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, "'coef' dimension of length 3.") } } + if (!is.null(exp_cor)) { + warning("'Parameter 'exp_cor' is provided, 'params' will be set to NULL.") + params <- NULL + } } ## return_params @@ -758,7 +736,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, k = 0 spei_res <- NULL computed_pet <- FALSE - + accumulated <- list(NULL, NULL) for (data in .return2list(exp, exp_cor)) { k = k + 1 # Evapotranspiration estimation (unless pet is already provided) @@ -776,291 +754,27 @@ PeriodSPEI <- function(exp, dates_exp, lat, # Accumulation diff_p_pet <- data$pr - pet[[k]] - accumulated <- PeriodAccumulation(data = diff_p_pet, dates = dates[[k]], - rollwidth = accum, time_dim = leadtime_dim, - sdate_dim = time_dim, frequency = 'monthly', - ncores = ncores) - - # Standardization - if (standardization) { - spei <- .Standardization(data = accumulated, params = params, - accum = accum, time_dim = time_dim, - leadtime_dim = leadtime_dim, - memb_dim = memb_dim, ref_period = ref_period, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - param_error = param_error, - method = method, distribution = distribution, - na.rm = TRUE, ncores = ncores) - - ref_period <- NULL - params <- spei$params - spei_res <- spei[[1]] - } else { - spei_res <- accumulated - } - pos <- match(names(dim(data[[1]])), names(dim(spei_res))) - spei_res <- aperm(spei_res, pos) + accumulated[[k]] <- PeriodAccumulation(data = diff_p_pet, dates = dates[[k]], + rollwidth = accum, time_dim = leadtime_dim, + sdate_dim = time_dim, frequency = 'monthly', + ncores = ncores) } if (standardization) { - if (return_params) { - return(list(spei = spei_res, params = params)) - } else { - return(spei_res) - } + # Standardization + spei <- PeriodStandardization(data = accumulated[[1]], + data_cor = accumulated[[2]], + dates = dates[[1]], + params = params, + time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, ref_period = ref_period, + handle_infinity = handle_infinity, + return_params = return_params, + method = method, distribution = distribution, + na.rm = TRUE, ncores = ncores) } else { - return(spei_res) - } -} - -.Standardization <- function(data, params = NULL, accum = 1, time_dim = 'syear', - leadtime_dim = 'time', memb_dim = 'ensemble', - ref_period = NULL, cross_validation = FALSE, - handle_infinity = FALSE, param_error = -9999, - method = 'parametric', distribution = 'log-Logistic', - na.rm = FALSE, ncores = NULL) { - - nleadtime <- dim(data)[leadtime_dim] - ntime <- dim(data)[time_dim] - - if (!cross_validation) { - ntime <- 1 - } - - coef = switch(distribution, - "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), - "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), - "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) - - if (is.null(params)) { - params <- array(NA, dim = c(ntime, nleadtime, length(coef))) - names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') - } else if (length(dim(params)) < 2) { - params <- array(params, dim = c(length(params), ntime, nleadtime)) - # dim(params): [time_dim, leadtime_dim, coef] - # with the values repeated each time_dim and leadtime_dim - params <- aperm(params, c(2,3,1)) - names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + spei <- accumulated[[k]] } - - spei <- Apply(data = list(data = data, params = params), - target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), - fun = .standardization, - coef = coef, - leadtime_dim = leadtime_dim, - time_dim = time_dim, memb_dim = memb_dim, - ref_period = ref_period, handle_infinity = handle_infinity, - cross_validation = cross_validation, param_error = param_error, - method = method, distribution = distribution, - na.rm = na.rm, - output_dims = list(spei = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), - ncores = ncores) return(spei) -} - -.standardization <- function(data, params, coef, leadtime_dim = 'time', - time_dim = 'syear', memb_dim = 'ensemble', - ref_period = NULL, handle_infinity = FALSE, - cross_validation = FALSE, param_error = -9999, - method = 'parametric', distribution = 'log-Logistic', - na.rm = FALSE) { - # data: [leadtime_dim, time_dim, memb_dim] - # params: [time_dim, leadtime_dim, 'coef'] - - # maximum number of parameters needed to define any of the considered distributions - ncoef <- length(coef) - nleadtime <- as.numeric(dim(data)[leadtime_dim]) - ntime <- as.numeric(dim(data)[time_dim]) - nmemb <- as.numeric(dim(data)[memb_dim]) - - if (cross_validation) { - params_result <- array(data = NA, dim = c(ntime, nleadtime, ncoef)) - } else { - params_result <- array(data = NA, dim = c(1, nleadtime, ncoef)) - } - names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - - if (all(is.na(data))) { - spei_mod <- array(NA, dim(data)) - # if the data [time, sdate, memb] has no variability it will raise an error - # further down, so we assign a value to the result and skip the step - } else if (anyNA(data) && !na.rm) { - spei_mod <- array(NA, dim(data)) - } else if (var(data, na.rm = T) == 0) { - spei_mod <- array(param_error, dim(data)) # Add this? - } else { - if (is.null(ref_period)) { - ref.start <- NULL - ref.end <- NULL - } else { - ref.start <- ref_period[[1]] - ref.end <- ref_period[[2]] - } - - spei_mod <- array(data = NA, dim = c(nleadtime, ntime, nmemb)) - names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) - - for (ff in 1:nleadtime) { - data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, - indices = ff, drop = 'selected') - params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} - - spei_data <- .std(data = data_subset, coef = coef, ntime = ntime, - nmemb = nmemb, method = method, - distribution = distribution, na.rm = na.rm, - ref.start = ref.start, ref.end = ref.end, - params = params_tmp, handle_infinity = handle_infinity, - cross_validation = cross_validation) - - spei_mod[ff, , ] <- spei_data[[1]] - params_ff <- spei_data[[2]] - # lengthen dimension coef of params_ff in case it doesn't match the - # corresponding dimension of parms_months - if (!is.null(params_ff)) { - if (length(params_ff) < ncoef) { - params_ff <- append(params_ff, array(NA, dim = ncoef - length(params_ff))) - } - params_result[, ff, ] <- params_ff - } - } - } - return(list(spei = spei_mod, params = params_result)) -} - -.std <- function(data, coef, ntime, nmemb, method = 'parametric', - distribution = 'log-Logistic', na.rm = FALSE, - ref.start = NULL, ref.end = NULL, params = NULL, - handle_infinity = FALSE, cross_validation = FALSE) { - - # data: [time_dim, memb_dim] - # params: NULL or [(ntime), coef] - - fit = 'ub-pwm' # hard-coded - - if (method == 'non-parametric') { - bp <- matrix(0, length(data), 1) - for (i in 1:length(data)) { - bp[i,1] = sum(data[] <= data[i], na.rm = na.rm); # Writes the rank of the data - } - std_index <- qnorm((bp - 0.44)/(length(data) + 0.12)) - dim(std_index) <- c(ntime, nmemb) - # it won't return params to be used in exp_cor; also it is not using - # handle_infinity nor cross_validation - params_result <- NULL - } else { - std_index <- array(NA, c(ntime, nmemb)) - # Select window if necessary - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data, ref.start, ref.end) - } else { - data.fit <- data - } - - if (cross_validation) { - loop_years <- ntime - } else { - loop_years <- 1 - } - - params_result <- array(NA, dim = c(loop_years, length(coef))) - colnames(params_result) <- names(coef) - - for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) - # Cumulative series (acu) - if (cross_validation) { - acu <- as.vector(data.fit[-nsd, ]) - } else { - acu <- as.vector(data.fit) - } - - acu.sorted <- sort.default(acu, method = "quick") - # remove NAs (no need if(na.rm) because if there are NA and na.rm = F - # we don't get to this point) - acu.sorted <- acu.sorted[!is.na(acu.sorted)] - # else all acu was NA and we don't need to continue with this case - - if (length(acu.sorted) != 0) { - acu_sd <- sd(acu.sorted) - if (!is.na(acu_sd)) { - if (acu_sd != 0) { - if (distribution != "log-Logistic") { - pze <- sum(acu == 0) / length(acu) - acu.sorted = acu.sorted[acu.sorted > 0] - } - if (!is.null(params)) { - f_params <- as.vector(params) - params_result[nsd, ] <- f_params - } else { - # else coef will be NA - if (length(acu.sorted) >= 4) { - # Calculate probability weighted moments based on fit with lmomco or TLMoments - pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), - pwm.ub(acu.sorted, nmom = 3) - # TLMoments::PWM(acu.sorted, order = 0:2) - ) - - # Check L-moments validity - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { - # lmom fortran functions need specific inputs L1, L2, T3 - # this is handled by lmomco internally with lmorph - fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - # Calculate parameters based on distribution with lmom then lmomco - f_params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){parglo(lmom)$para}), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){pargam(lmom)$para}), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){parpe3(lmom)$para})) - # Adjust if user chose log-Logistic and max-lik - if (distribution == 'log-Logistic' && fit == 'max-lik') { - f_params = parglo.maxlik(acu.sorted, f_params)$para - } - params_result[nsd, ] <- f_params - } # end for the case the L-moments are not valid (std_index will be NA) - } # end for case there are not enough values to estimate the parameters (std_index will be NA) - } # end estimation of f_param - # Calculate cdf based on distribution with lmom - if (all(is.na(params_result[nsd,]))) { - cdf_res <- NA - } else { - f_params <- params_result[nsd,] - f_params <- f_params[which(!is.na(f_params))] - cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data, f_params), - "Gamma" = lmom::cdfgam(data, f_params), - "PearsonIII" = lmom::cdfpe3(data, f_params)) - } - - std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) - - # Adjust if user chose Gamma or PearsonIII - Not tested: For future development - # if (distribution != 'log-Logistic') { - # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point - # } - if (cross_validation) { - std_index[nsd, ] <- std_index_cv[nsd, ] - } else { - std_index <- std_index_cv - } - } - } # end if for the case there is no variability - } # end if for the case all NA in acu - } # next year (in case of cross_validation or all done if cross_validation == F) - - if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point - std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) - std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) - } - # f_params will be params only if cross_validation is FALSE - # (otherwise there will be one f_params per year; - # but the output params will be read only in the case that - # it is called with cross_validation FALSE) - } - return(list(std_index, params_result)) } \ No newline at end of file diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 7967342..e8cb5b3 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -87,6 +87,7 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', + params = NULL, return_params = FALSE, na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(data)) { @@ -100,17 +101,29 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', stop("Parameter 'data_cor' must be of 's2dv_cube' class.") } } - std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, + res <- PeriodStandardization(data = data$data, data_cor = data_cor$data, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, ref_period = ref_period, handle_infinity = handle_infinity, method = method, distribution = distribution, + params = params, return_params = return_params, na.rm = na.rm, ncores = ncores) + if (return_params) { + std <- res$spei + params <- res$params + } else { + std <- res + } + if (is.null(data_cor)) { data$data <- std data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') - return(data) + if (return_params) { + return(list(spei = data, params = params)) + } else { + return(data) + } } else { data_cor$data <- std data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') @@ -212,6 +225,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', + params = NULL, return_params = FALSE, na.rm = FALSE, ncores = NULL) { # Check inputs ## data @@ -334,6 +348,38 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "of the following distributions: 'log-Logistic', 'Gamma' or ", "'PearsonIII'.") } + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + dims_data <- dim(data)[-which(names(dim(data)) == memb_dim)] + dims_params <- dim(params)[-which(names(dim(params)) == 'coef')] + if (!all(dims_data == dims_params)) { + stop("Parameter 'data' and 'params' must have same common dimensions ", + "except 'memb_dim' and 'coef'.") + } + + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") + } ## na.rm if (!is.logical(na.rm)) { stop("Parameter 'na.rm' must be logical.") @@ -346,8 +392,6 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } } - target_dims <- c(leadtime_dim, time_dim, memb_dim) - if (is.null(ref_period)) { ref_start <- NULL ref_end <- NULL @@ -358,20 +402,44 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, # Standardization if (is.null(data_cor)) { - spei <- Apply(data = list(data), - target_dims = target_dims, - fun = .standardization, - leadtime_dim = leadtime_dim, - ref_period = ref_period, handle_infinity = handle_infinity, - method = method, distribution = distribution, - na.rm = na.rm, ncores = ncores)$output1 + if (is.null(params)) { + res <- Apply(data = list(data), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, data_cor = NULL, params = NULL, + leadtime_dim = leadtime_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } else { + res <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(leadtime_dim, time_dim, 'coef')), + fun = .standardization, data_cor = NULL, + leadtime_dim = leadtime_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } } else { - spei <- Apply(data = list(data, data_cor), target_dims = target_dims, - fun = .standardization, - leadtime_dim = leadtime_dim, - ref_period = ref_period, handle_infinity = handle_infinity, - method = method, distribution = distribution, - na.rm = na.rm, ncores = ncores)$output1 + res <- Apply(data = list(data = data, data_cor = data_cor), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, + params = NULL, leadtime_dim = leadtime_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + if (return_params) { + spei <- res$spei + params <- res$params + } else { + spei <- res$output1 } if (is.null(data_cor)) { @@ -381,14 +449,22 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, pos <- match(names(dim(data_cor)), names(dim(spei))) spei <- aperm(spei, pos) } - return(spei) + + if (return_params) { + pos <- match(c(names(dim(spei))[-which(names(dim(spei)) == memb_dim)], 'coef'), + names(dim(params))) + params <- aperm(params, pos) + return(list('spei' = spei, 'params' = params)) + } else { + return(spei) + } } -.standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', - ref_period = NULL, handle_infinity = FALSE, +.standardization <- function(data, data_cor = NULL, params = NULL, leadtime_dim = 'time', + ref_start = NULL, ref_end = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', - na.rm = FALSE) { - # data: [leadtime_dim, time_dim, memb_dim] + return_params = FALSE, na.rm = FALSE) { + # data (data_cor): [leadtime_dim, time_dim, memb_dim] dims <- dim(data)[-1] fit = 'ub-pwm' @@ -400,6 +476,9 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, if (is.null(data_cor)) { # cross_val = TRUE spei_mod <- data*NA + if (return_params) { + params_result <- array(dim = c(dim(data)[-length(dim(data))], coef = length(coef))) + } for (ff in 1:dim(data)[leadtime_dim]) { data2 <- data[ff, , ] dim(data2) <- dims @@ -418,37 +497,41 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, data_fit <- data2 } for (nsd in 1:dim(data)[time_dim]) { - acu <- as.vector(data_fit[-nsd, ]) - if (na.rm) { - acu_sorted <- sort.default(acu, method = "quick") - } else { - acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) - } - if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { - acu_sd <- sd(acu_sorted) - if (!is.na(acu_sd) & acu_sd != 0) { - if (distribution != "log-Logistic") { - acu_sorted <- acu_sorted[acu_sorted > 0] - } - if (length(acu_sorted) >= 4) { - f_params <- .std(data = acu_sorted, fit = fit, - distribution = distribution) - } else { - f_params <- NA - } - if (all(is.na(f_params))) { - cdf_res <- NA - } else { - f_params <- f_params[which(!is.na(f_params))] - cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data2, f_params), - "Gamma" = lmom::cdfgam(data2, f_params), - "PearsonIII" = lmom::cdfpe3(data2, f_params)) + if (is.null(params)) { + acu <- as.vector(data_fit[-nsd, ]) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + f_params <- NA + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } } - std_index_cv <- array(qnorm(cdf_res), dim = dims) - spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] } + } else { + f_params <- params[ff, nsd, ] } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dims) + spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] + if (return_params) params_result[ff, nsd, ] <- f_params } } } @@ -456,6 +539,9 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, # cross_val = FALSE spei_mod <- data_cor*NA dimscor <- dim(data_cor)[-1] + if (return_params) { + params_result <- array(dim = c(dim(data_cor)[-length(dim(data_cor))], coef = length(coef))) + } for (ff in 1:dim(data)[leadtime_dim]) { data_cor2 <- data_cor[ff, , ] dim(data_cor2) <- dimscor @@ -502,6 +588,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } std_index_cv <- array(qnorm(cdf_res), dim = dimscor) spei_mod[ff, , ] <- std_index_cv + if (return_params) params_result[ff, , ] <- f_params } } } @@ -512,7 +599,11 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) } - return(spei_mod) + if (return_params) { + return(list(spei = spei_mod, params = params_result)) + } else { + return(spei_mod) + } } .std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { @@ -524,7 +615,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, lmom <- pwm2lmom(pwm) if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - params = switch(distribution, + params_result = switch(distribution, 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), error = function(e){parglo(lmom)$para}), 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), @@ -532,9 +623,9 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){parpe3(lmom)$para})) if (distribution == 'log-Logistic' && fit == 'max-lik') { - params = parglo.maxlik(data, params)$para + params_result = parglo.maxlik(data, params_result)$para } - return(params) + return(params_result) } else { return(NA) } diff --git a/R/zzz.R b/R/zzz.R index 47d871d..4c8d0a4 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -106,4 +106,30 @@ wind2CF <- function(wind, pc) { } else { return(list(data1, data2)) } +} + +# Function that creates a mask array from dates for the whole year +.datesmask <- function(dates, frequency = 'monthly') { + ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) + end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) + daily <- as.Date(ini:end) + if (frequency == 'monthly') { + monthly <- daily[which(lubridate::day(daily) == 1)] + dates_mask <- array(0, dim = length(monthly)) + for (dd in 1:length(dates)) { + ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_mask[ii] <- 1 + } + } else { + # daily + dates_mask <- array(0, dim = length(daily)) + for (dd in 1:length(dates)) { + ii <- which(daily == dates[dd]) + dates_mask[ii] <- 1 + } + } + + return(dates_mask) } \ No newline at end of file diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 1500a64..c8010da 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -17,11 +17,9 @@ CST_PeriodSPEI( pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, - cross_validation = FALSE, pet_method = "hargreaves", method = "parametric", distribution = "log-Logistic", - param_error = -9999, handle_infinity = FALSE, return_params = FALSE, na.rm = FALSE, @@ -88,10 +86,6 @@ provided variables with specified 'pet_method'. It is NULL by default.} \item{standardization}{A logical value indicating wether the standardization is computed.} -\item{cross_validation}{A logical value indicating if cross validation is -done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and -is not provided. It is FALSE by default.} - \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -108,8 +102,6 @@ function to be used for computing the SPEI. The accepted names are: 'Gamma' method only works when only precipitation is provided and other variables are 0 because it is positive defined (SPI indicator).} -\item{param_error}{A numeric value with the error accepted.} - \item{handle_infinity}{A logical value wether to return Infinite values (TRUE) or not (FALSE).} diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd index 3e47726..5e23bd5 100644 --- a/man/CST_PeriodStandardization.Rd +++ b/man/CST_PeriodStandardization.Rd @@ -14,6 +14,8 @@ CST_PeriodStandardization( handle_infinity = FALSE, method = "parametric", distribution = "log-Logistic", + params = NULL, + return_params = FALSE, na.rm = FALSE, ncores = NULL ) diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index 049e084..b0d039e 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -20,11 +20,9 @@ PeriodSPEI( pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, - cross_validation = FALSE, pet_method = "hargreaves", method = "parametric", distribution = "log-Logistic", - param_error = -9999, handle_infinity = FALSE, return_params = FALSE, na.rm = FALSE, @@ -98,10 +96,6 @@ provided variables with specified 'pet_method'. It is NULL by default.} \item{standardization}{A logical value indicating wether the standardization is computed.} -\item{cross_validation}{A logical value indicating if cross validation is -done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and -is not provided. It is FALSE by default.} - \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -117,8 +111,6 @@ function to be used for computing the SPEI. The accepted names are: 'Gamma' method only works when only precipitation is provided and other variables are 0 because it is positive defined (SPI indicator).} -\item{param_error}{A numeric value with the error accepted.} - \item{handle_infinity}{A logical value wether to return Infinite values (TRUE) or not (FALSE).} diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd index 9a0ab2b..98b8ba6 100644 --- a/man/PeriodStandardization.Rd +++ b/man/PeriodStandardization.Rd @@ -15,6 +15,8 @@ PeriodStandardization( handle_infinity = FALSE, method = "parametric", distribution = "log-Logistic", + params = NULL, + return_params = FALSE, na.rm = FALSE, ncores = NULL ) diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 2f1d7b0..23f7316 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -49,6 +49,8 @@ exp1 <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) exp_cor1 <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, 'pr' = expcor_pr) params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, longitude = 1, coef = 3)) +params2 <- array(abs(rnorm(100)), dim = c(syear = 6, time = 3, latitude = 2, + longitude = 1, coef = 3)) # dat2 dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) @@ -264,21 +266,11 @@ test_that("1. Initial checks PeriodSPEI", { PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat), "Parameter 'standardization' must be a logical value." ) - # param_error - expect_error( - PeriodSPEI(exp = exp1, param_error = TRUE, dates_exp = dates_exp, lat = lat), - "Parameter 'param_error' must be a numeric value." - ) # handle_infinity expect_error( PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat), "Parameter 'handle_infinity' must be a logical value." ) - # cross_validation - expect_error( - PeriodSPEI(exp = exp1, cross_validation = 1, dates_exp = dates_exp, lat = lat), - "Parameter 'cross_validation' must be a logical value." - ) # method expect_error( PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat), @@ -366,7 +358,7 @@ test_that("2. Output checks", { ) expect_equal( dim(res3[[2]]), - c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) + c(syear = 6, time = 3, latitude = 2, longitude = 1, coef = 3) ) # exp # exp_cor @@ -380,16 +372,22 @@ test_that("2. Output checks", { # tolerance = 0.0001 # ) # ref_period - res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, - dates_exp = dates_exp, dates_expcor = dates_expcor, - na.rm = TRUE, ref_period = list(2011, 2013)) + # res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, + # dates_exp = dates_exp, dates_expcor = dates_expcor, + # na.rm = TRUE, ref_period = list(2011, 2013)) expect_equal( !identical(res1[[1]], res_ref), TRUE ) # params - res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = params1, return_params = TRUE) + expect_error( + PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = params1, return_params = TRUE), + paste0("Parameter 'data' and 'params' must have same common dimensions ", + "except 'memb_dim' and 'coef'.") + ) + res6 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = params2, return_params = TRUE) expect_error( PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, params = array(abs(rnorm(100)), dim = dimscor)), @@ -397,8 +395,8 @@ test_that("2. Output checks", { "dimensions: 'syear', 'time' and 'coef'.") ) expect_equal( - dim(res5$params), - c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) + dim(res6$params), + c(syear = 6, time = 3, latitude = 2, longitude = 1, coef = 3) ) # standarization res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, @@ -412,17 +410,10 @@ test_that("2. Output checks", { c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) ) # cross_validation - expect_warning( - PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - cross_validation = TRUE), - paste0("Detected 'cross_validation' = TRUE. It will be set as FALSE ", - "since 'exp_cor' is provided.") - ) res_crossval_T <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - cross_validation = TRUE, return_params = TRUE) + return_params = TRUE) res_crossval_F <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - cross_validation = FALSE, return_params = TRUE) + return_params = TRUE) # cross_validation = TRUE expect_equal( dim(res_crossval_T$spei), @@ -430,13 +421,13 @@ test_that("2. Output checks", { ) expect_equal( dim(res_crossval_T$params), - c(syear = 6, time = 3, coef = 3, latitude = 2, longitude = 1) + c(syear = 6, time = 3, latitude = 2, longitude = 1, coef = 3) ) # cross_validation = FALSE - expect_equal( - dim(res_crossval_F$params)[-which(names(dim(res_crossval_F$params)) == 'coef')], - dimscor[-which(names(dimscor) == 'ensemble')] - ) + # expect_equal( + # dim(res_crossval_F$params)[-which(names(dim(res_crossval_F$params)) == 'coef')], + # dimscor[-which(names(dimscor) == 'ensemble')] + # ) # pet_method - ok res5 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, diff --git a/tests/testthat/test-PeriodStandardization.R b/tests/testthat/test-PeriodStandardization.R index c8620a4..7673db2 100644 --- a/tests/testthat/test-PeriodStandardization.R +++ b/tests/testthat/test-PeriodStandardization.R @@ -210,12 +210,6 @@ test_that("2. Output checks", { ) # na.rm dat1[1,1,1,1] <- NA - expect_equal( - PeriodStandardization(data = dat1, na.rm = FALSE)[,,1,1], - array(c(rep(NA, 6), 0.4493183, 0.6592255, 0.4635960, -0.4217912, - 1.4006941, 0.3011019), dim = c(syear = 6, time = 2)), - tolerance = 0.0001 - ) expect_equal( all(is.na(PeriodStandardization(data = dat1, na.rm = FALSE)[,1,1,1])), TRUE -- GitLab From f47d93253b70eb2d9fe9568917a44a677fbd6a33 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 19 Sep 2023 11:40:37 +0200 Subject: [PATCH 28/42] Improve documentation of PeriodStandardization, PeriodAccumulation, PeriodSPEI and PeriodPET; correct few errors in the code --- R/PeriodAccumulation.R | 52 ++-- R/PeriodPET.R | 48 ++-- R/PeriodSPEI.R | 305 ++++++----------------- R/PeriodStandardization.R | 55 ++-- man/CST_PeriodAccumulation.Rd | 26 +- man/CST_PeriodPET.Rd | 21 +- man/CST_PeriodSPEI.Rd | 76 +++--- man/CST_PeriodStandardization.Rd | 31 ++- man/PeriodAccumulation.Rd | 17 +- man/PeriodPET.Rd | 25 +- man/PeriodSPEI.Rd | 58 ++--- man/PeriodStandardization.Rd | 27 +- tests/testthat/test-PeriodAccumulation.R | 23 +- tests/testthat/test-PeriodSPEI.R | 23 +- 14 files changed, 351 insertions(+), 436 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 73877e9..91220f6 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -12,10 +12,13 @@ #' #'There are two possible ways of performing the accumulation. The default one #'is by accumulating a variable over a dimension specified with 'time_dim'. To -#'chose a specific time period, start and end must be used. The other method -#'is by using rollwidth parameter. When this parameter is a positive integer, -#'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum -#'is applied towards 'time_dim'. +#'chose a specific time period, 'start' and 'end' must be used. The other method +#'is by using 'rollwidth' parameter. When this parameter is a positive integer, +#'the cumulative backward sum is applied to the time dimension. If it is +#'negative, the rolling sum is applied backwards. This function is build to work +#'be compatible with other tools in that work with 's2dv_cube' object class. The +#'input data must be this object class. If you don't work with 's2dv_cube', see +#'PeriodAccumulation. #' #'@param data An 's2dv_cube' object as provided function \code{CST_Load} in #' package CSTools. @@ -33,8 +36,8 @@ #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. #'@param rollwidth An optional parameter to indicate the number of time -#' steps the rolling sum is applied to. If it is negative, the rolling sum is -#' applied backwards 'time_dim', if it is positive, it will be towards it. When +#' steps the rolling sum is applied to. If it is positive, the rolling sum is +#' applied backwards 'time_dim', if it is negative, it will be forward it. When #' this parameter is NULL, the sum is applied over all 'time_dim', in a #' specified period. It is NULL by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or @@ -42,15 +45,16 @@ #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return An 's2dv_cube' object containing the indicator in the element -#'\code{data}. If 'rollingwithd' is not used, it will have the dimensions of -#'the input parameter 'data' except the dimension where the accumulation has -#'been computed (specified with 'time_dim'). The 'Dates' array is updated to the +#'@return An 's2dv_cube' object containing the accumulated data in the element +#'\code{data}. If parameter 'rollwidth' is not used, it will have the dimensions +#'of the input parameter 'data' except the dimension where the accumulation has +#'been computed (specified with 'time_dim'). If 'rollwidth' is used, it will be +#'of same dimensions as input data. The 'Dates' array is updated to the #'dates corresponding to the beginning of the aggregated time period. A new #'element called 'time_bounds' will be added into the 'attrs' element in the #''s2dv_cube' object. It consists of a list containing two elements, the start #'and end dates of the aggregated period with the same dimensions of 'Dates' -#'element. If 'rollingwithd' is used, it will contain the same dimensions of +#'element. If 'rollwidth' is used, it will contain the same dimensions of #'parameter 'data' and the other elements of the 's2dv_cube' will not be #'modified. #' @@ -143,10 +147,10 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' #'There are two possible ways of performing the accumulation. The default one #'is by accumulating a variable over a dimension specified with 'time_dim'. To -#'chose a specific time period, start and end must be used. The other method -#'is by using rollwidth parameter. When this parameter is a positive integer, -#'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum -#'is applied towards 'time_dim'. +#'chose a specific time period, 'start' and 'end' must be used. The other method +#'is by using 'rollwidth' parameter. When this parameter is a positive integer, +#'the cumulative backward sum is applied to the time dimension. If it is +#'negative, the rolling sum is applied backwards. #' #'@param data A multidimensional array with named dimensions. #'@param dates A multidimensional array of dates with named dimensions matching @@ -166,8 +170,8 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. #'@param rollwidth An optional parameter to indicate the number of time -#' steps the rolling sum is applied to. If it is negative, the rolling sum is -#' applied backwards 'time_dim', if it is positive, it will be towards it. When +#' steps the rolling sum is applied to. If it is positive, the rolling sum is +#' applied backwards 'time_dim', if it is negative, it will be forward it. When #' this parameter is NULL, the sum is applied over all 'time_dim', in a #' specified period. It is NULL by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or @@ -176,7 +180,10 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' computation. #' #'@return A multidimensional array with named dimensions containing the -#'indicator in the element \code{data}. +#'accumulated data in the element \code{data}. If parameter 'rollwidth' is +#'not used, it will have the dimensions of the input 'data' except the dimension +#'where the accumulation has been computed (specified with 'time_dim'). If +#''rollwidth' is used, it will be of same dimensions as input data. #' #'@examples #'exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, @@ -235,7 +242,11 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, } if (!is.null(dim(dates))) { data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + time_dim = time_dim, ncores = ncores) + if (!is.null(rollwidth)) { + dates <- SelectPeriodOnDates(dates = dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } } else { warning("Parameter 'dates' must have named dimensions if 'start' and ", "'end' are not NULL. All data will be used.") @@ -297,10 +308,11 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, return(total) } +data <- array(c(1,3,2,4), dim = c(time = 2, sdate = 2)) + .rollaccumulation <- function(data, mask_dates, rollwidth = 1, forwardroll = FALSE, na.rm = FALSE) { dims <- dim(data) - data_vector <- array(NA, dim = length(mask_dates)) count <- 1 for (dd in 1:length(mask_dates)) { diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 5f6054a..9fdb7e2 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -12,14 +12,17 @@ #'CST_PeriodAccumulation. #' #'@param data A named list with the needed \code{s2dv_cube} objects containing -#' the seasonal forecast experiment in the data element for each variable. +#' the seasonal forecast experiment in the 'data' element for each variable. #' Specific variables are needed for each method used in computing the -#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for -#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' -#' 'tmean' is required. The units for temperature variables -#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -#' precipitation ('pr') need to be in mm/month. +#' Potential Evapotranspiration (see parameter 'pet_method'). The accepted +#' variable names are fixed in order to be recognized by the function. +#' The accepted name corresponding to the Minimum Temperature is 'tmin', +#' for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +#' for Precipitation is 'pr'. The accepted variable names for each method are: +#' For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +#' 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +#' for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +#' degrees; the units for precipitation ('pr') need to be in mm/month. #'@param dates An array of temporal dimensions containing the Dates of #' 'data'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'data'. @@ -41,7 +44,6 @@ #' #'@examples #'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) -#' #'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) #'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) #'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) @@ -50,11 +52,8 @@ #' paste0(2010:end_year, "-09-15"), #' paste0(2010:end_year, "-10-16")), "UTC") #'dim(dates_exp) <- c(syear = 3, time = 3) -#' #'lat <- c(40) -#' #'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) -#' #'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) #' #'@import SPEI @@ -111,7 +110,7 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #'Compute the Potential Evapotranspiration #' -#'Compute the Potential evapotranspiration (PET) that is the amount of +#'Compute the Potential Evapotranspiration (PET) that is the amount of #'evaporation and transpiration that would occur if a sufficient water source #'were available. This function calculate PET according to the Thornthwaite, #'Hargreaves or Hargreaves-modified equations. @@ -119,15 +118,18 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #'For more information on the SPEI calculation, see functions #'PeriodStandardization and PeriodAccumulation. #' -#'@param data A named list with the needed \code{s2dv_cube} objects containing -#' the seasonal forecast experiment in the data element for each variable. +#'@param data A named list of multidimensional arrays containing +#' the seasonal forecast experiment data for each variable. #' Specific variables are needed for each method used in computing the -#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for -#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' -#' 'tmean' is required. The units for temperature variables -#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -#' precipitation ('pr') need to be in mm/month. +#' Potential Evapotranspiration (see parameter 'pet_method'). The accepted +#' variable names are fixed in order to be recognized by the function. +#' The accepted name corresponding to the Minimum Temperature is 'tmin', +#' for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +#' for Precipitation is 'pr'. The accepted variable names for each method are: +#' For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +#' 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +#' for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +#' degrees; the units for precipitation ('pr') need to be in mm/month. #'@param dates An array of temporal dimensions containing the Dates of #' 'data'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'data'. @@ -149,7 +151,6 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #' #'@examples #'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) -#' #'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) #'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) #'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) @@ -158,11 +159,8 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #' paste0(2010:end_year, "-09-15"), #' paste0(2010:end_year, "-10-16")), "UTC") #'dim(dates_exp) <- c(syear = 3, time = 3) -#' #'lat <- c(40) -#' #'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) -#' #'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) #' #'@import SPEI @@ -175,7 +173,7 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', ncores = NULL) { # Initial checks - ## data + # data if (!inherits(data, 'list')) { stop("Parameter 'data' needs to be a named list with the needed variables.") } diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index aeed5c7..442209f 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -3,40 +3,43 @@ #'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) #'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with -#'respect to normal conditions in a variety of natural and managed systems such -#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI -#'is to compare the highest possible evapotranspiration with the current water -#'availability. The SPEI uses the monthly (or weekly) difference between -#'precipitation and potential evapotranspiration. This represents a simple -#'climatic water balance which is calculated at different time scales to obtain -#'the SPEI. This function is build to work be compatible with other tools in -#'that work with 's2dv_cube' object class. The input data must be this object -#'class. If you don't work with 's2dv_cube', see PeriodSPEI. +#'respect to normal conditions. The idea behind the SPEI is to compare the +#'highest possible evapotranspiration with the current water availability. The +#'SPEI uses for a specific time frequency the difference between precipitation +#'and potential evapotranspiration. This represents a simple climatic water +#'balance which is calculated at different time scales to obtain the SPEI. This +#'function is build to work be compatible with other tools in that work with +#''s2dv_cube' object class. The input data must be this object class. If you +#'don't work with 's2dv_cube', see PeriodSPEI. #' #'Next, some specifications for the calculation of this indicator will be -#'discussed. On the one hand, the model to be used to calculate potential -#'evapotranspiration is specified with the pet_method parameter (hargreaves, -#'hargraves modified or thornwhite). On the other hand, to choose the time scale -#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the -#'accum parameter, where you must indicate the number of time steps you want to -#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'discussed. On the one hand, the model to be used to calculate Potential +#'Evapotranspiration is specified with the 'pet_method' parameter (it can be +#'hargreaves, hargraves modified or thornwhite). On the other hand, to choose +#'the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is +#'done using the 'accum' parameter, where you must indicate the number of time +#'steps you want to accumulate throughout 'leadtime_dim'. The accumulation will +#'be performed backwards by default. Since the accumulation is done for the #'elapsed time steps, there will be no complete accumulations until reaching the -#'time instant equal to the value of the parameter. For this reason, in the -#'result, we will find that for the dimension where the accumulation has been -#'carried out, the values of the array will be NA since they do not include +#'time instant equal to the value of the 'accum' parameter. For this reason, in +#'the result, we will find that for the dimension where the accumulation has +#'been carried out, the values of the array will be NA since they do not include #'complete accumulations. Also, there is a parameter to specify if the #'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a #'probability distribution to transform the original values to standardized #'units that are comparable in space and time and at different SPEI time scales. -#'The na.rm parameter is a logical parameter used to decide whether to remove +#'The 'na.rm' parameter is a logical parameter used to decide whether to remove #'the NA values from the data before doing the calculation. It must be taken -#'into account that if na.rm == FALSE and there is some NA value in the specific -#'coordinates which the SPEI is computed, standardization cannot be carried out -#'for those coordinates and therefore, the result will be filled with NA for the -#'specific coordinates. However, when na.rm == TRUE, if the amount of data for -#'those specific coordinates is smaller than 4, it will not be possible to carry -#'out because we will not have enough data and the result will be also filled -#'with NAs for that coordinates. +#'into account that if 'na.rm' is FALSE and there is some NA value in the +#'specific coordinates which the SPEI is computed, standardization cannot be +#'carried out for those coordinates and therefore, the result will be filled +#'with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the +#'amount of data for those specific coordinates is smaller than 4, it will not +#'be possible to carry out because we will not have enough data and the result +#'will be also filled with NAs for that coordinates. When only 'exp' is provided +#'the Standardization is computed with 'cross_validation'. For more information +#'about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation +#'and CST_PeriodStandardization. #' #'@param exp A named list with the needed \code{s2dv_cube} objects containing #' the seasonal forecast experiment in the data element for each variable. @@ -61,8 +64,9 @@ #' dimension. By default it is set by 'latitude'. #'@param accum An integer value indicating the number of months for the #' accumulation for each variable. When it is greater than 1, the result will -#' be filled with NA until the accum time_dim dimension number due to the -#' accumulation to previous months. +#' be filled with NA until the accum 'time_dim' dimension number due to the +#' accumulation to previous months. The accumulation is performed backwards +#' by default. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -111,14 +115,14 @@ #' parallel computation. #' #'@return An 's2dv_cube' object containing the SPEI multidimensional array in -#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' -#'will be provided. The parameters of the standardization will only be returned -#'if 'return_params' is TRUE. The SPEI will only be computed if -#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic -#'water balance (precipitation minus evapotranspiration) will be returned. The -#'resultant arrays will have the same dimensions as the initial input data. The -#'other elements in the 's2dv_cube' will be updated with the combined -#'information of the input data arrays. +#'element \code{data} with same dimensions as input data. If 'exp_cor' is +#'provided, only results from 'exp_cor' will be provided. The parameters of the +#'standardization will only be returned if 'return_params' is TRUE. The SPEI +#'will only be computed if 'standardization' is TRUE. If 'standardization' is +#'FALSE, only the climatic water balance (precipitation minus +#'evapotranspiration) will be returned. The resultant arrays will have the same +#'dimensions as the initial input data. The other elements in the 's2dv_cube' +#'will be updated with the combined information of the input data arrays. #' #'@examples #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, @@ -160,21 +164,17 @@ #'@import zoo #'@import TLMoments #'@import lmomco -#'@import lmom #'@import lubridate #'@import CSTools #'@export -CST_PeriodSPEI <- function(exp, exp_cor = NULL, - time_dim = 'syear', leadtime_dim = 'time', - memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, ref_period = NULL, params = NULL, - pet_exp = NULL, pet_expcor = NULL, - standardization = TRUE, - pet_method = 'hargreaves', method = 'parametric', - distribution = 'log-Logistic', - handle_infinity = FALSE, - return_params = FALSE, na.rm = FALSE, - ncores = NULL) { +CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + lat_dim = 'latitude', accum = 1, ref_period = NULL, + params = NULL, pet_exp = NULL, pet_expcor = NULL, + standardization = TRUE, pet_method = 'hargreaves', + method = 'parametric', distribution = 'log-Logistic', + handle_infinity = FALSE, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(exp)) { @@ -281,38 +281,39 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) #'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with -#'respect to normal conditions in a variety of natural and managed systems such -#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI -#'is to compare the highest possible evapotranspiration with the current water -#'availability. The SPEI uses the monthly (or weekly) difference between -#'precipitation and potential evapotranspiration. This represents a simple -#'climatic water balance which is calculated at different time scales to obtain -#'the SPEI. +#'respect to normal conditions. The idea behind the SPEI is to compare the +#'highest possible evapotranspiration with the current water availability. The +#'SPEI uses for a specific time frequency the difference between precipitation +#'and potential evapotranspiration. #' #'Next, some specifications for the calculation of this indicator will be -#'discussed. On the one hand, the model to be used to calculate potential -#'evapotranspiration is specified with the pet_method parameter (hargreaves, -#'hargraves modified or thornwhite). On the other hand, to choose the time scale -#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the -#'accum parameter, where you must indicate the number of time steps you want to -#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'discussed. On the one hand, the model to be used to calculate Potential +#'Evapotranspiration is specified with the 'pet_method' parameter (it can be +#'hargreaves, hargraves modified or thornwhite). On the other hand, to choose +#'the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is +#'done using the 'accum' parameter, where you must indicate the number of time +#'steps you want to accumulate throughout 'leadtime_dim'. The accumulation will +#'be performed backwards by default. Since the accumulation is done for the #'elapsed time steps, there will be no complete accumulations until reaching the -#'time instant equal to the value of the parameter. For this reason, in the -#'result, we will find that for the dimension where the accumulation has been -#'carried out, the values of the array will be NA since they do not include +#'time instant equal to the value of the 'accum' parameter. For this reason, in +#'the result, we will find that for the dimension where the accumulation has +#'been carried out, the values of the array will be NA since they do not include #'complete accumulations. Also, there is a parameter to specify if the #'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a #'probability distribution to transform the original values to standardized #'units that are comparable in space and time and at different SPEI time scales. -#'The na.rm parameter is a logical parameter used to decide whether to remove +#'The 'na.rm' parameter is a logical parameter used to decide whether to remove #'the NA values from the data before doing the calculation. It must be taken -#'into account that if na.rm == FALSE and there is some NA value in the specific -#'coordinates which the SPEI is computed, standardization cannot be carried out -#'for those coordinates and therefore, the result will be filled with NA for the -#'specific coordinates. However, when na.rm == TRUE, if the amount of data for -#'those specific coordinates is smaller than 4, it will not be possible to carry -#'out because we will not have enough data and the result will be also filled -#'with NAs for that coordinates. +#'into account that if 'na.rm' is FALSE and there is some NA value in the +#'specific coordinates which the SPEI is computed, standardization cannot be +#'carried out for those coordinates and therefore, the result will be filled +#'with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the +#'amount of data for those specific coordinates is smaller than 4, it will not +#'be possible to carry out because we will not have enough data and the result +#'will be also filled with NAs for that coordinates. When only 'exp' is provided +#'the Standardization is computed with 'cross_validation'. For more information +#'about SPEI calculation, see functions PeriodPET, PeriodAccumulation and +#'PeriodStandardization. #' #'@param exp A named list with multidimensional array objects containing #' the seasonal forecast experiment in the data element for each variable. @@ -343,7 +344,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param accum accum An integer value indicating the number of months for the #' accumulation for each variable. When it is greater than 1, the result will #' be filled with NA until the accum time_dim dimension number due to the -#' accumulation to previous months. +#' accumulation to previous months. The accumulation is performed backwards +#' by default. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -391,9 +393,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' -#'@return An 's2dv_cube' object containing the SPEI multidimensional array in -#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' -#'will be provided. The parameters of the standardization will only be returned +#'@return A multidimensional array containing the SPEI with same dimensions as +#'input data. If 'exp_cor' is provided, only results from 'exp_cor' will be +#'provided. The parameters of the standardization will only be returned #'if 'return_params' is TRUE. The SPEI will only be computed if #''standardization' is TRUE. If 'standardization' is FALSE, only the climatic #'water balance (precipitation minus evapotranspiration) will be returned. The @@ -428,25 +430,14 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, #' dates_exp = dates_exp, dates_expcor = dates_expcor) #' -#'@import multiApply -#'@import ClimProjDiags -#'@import SPEI -#'@import zoo -#'@import TLMoments -#'@import lmomco -#'@import lmom -#'@import lubridate #'@export -PeriodSPEI <- function(exp, dates_exp, lat, - exp_cor = NULL, dates_expcor = NULL, +PeriodSPEI <- function(exp, dates_exp, lat, exp_cor = NULL, dates_expcor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, - pet_exp = NULL, pet_expcor = NULL, - standardization = TRUE, + pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, pet_method = 'hargreaves', method = 'parametric', - distribution = 'log-Logistic', - handle_infinity = FALSE, + distribution = 'log-Logistic', handle_infinity = FALSE, return_params = FALSE, na.rm = FALSE, ncores = NULL) { # Initial checks @@ -501,46 +492,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_method <- rep(pet_method, 2) } } - - ## pet_exp - if (!is.null(pet_exp)) { - if (length(dim(exp[['pr']])) != length(dim(pet_exp))) { - stop("Parameter 'pet_exp' must have the same length of all the ", - "dimensions as variable 'pr' in 'exp'.") - } - if (!all(dim(exp[['pr']]) %in% dim(pet_exp))) { - stop("Parameter 'pet_exp' must have the same length of all the ", - "dimensions as variable 'pr' in 'exp'.") - } - if (any(names(dim(exp[['pr']])) != names(dim(pet_exp)))) { - pos <- match(names(dim(exp[['pr']])), names(dim(pet_exp))) - pet_exp <- aperm(pet_exp, pos) - } - pet[[1]] <- pet_exp - } else if (is.null(dates_exp)) { - stop("Parameter 'dates_exp' must be provided.") - } - ## pet_expcor - if (!is.null(exp_cor)) { - if (!is.null(pet_expcor)) { - if (length(dim(exp_cor[['pr']])) != length(dim(pet_expcor))) { - stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'pr' in 'exp_cor'.") - } - if (!all(dim(exp_cor[['pr']]) %in% dim(pet_expcor))) { - stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'pr' in 'exp_cor'.") - } - if (any(names(dim(exp_cor[['pr']])) != names(dim(pet_expcor)))) { - pos <- match(names(dim(exp_cor[['pr']])), names(dim(pet_expcor))) - pet_expcor <- aperm(pet_expcor, pos) - } - pet[[2]] <- pet_expcor - } else if (is.null(dates_expcor)) { - stop("Parameter 'dates_expcor' must be provided.") - } - } - ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") @@ -577,7 +528,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") } } - ## lat_dim if (!is.character(lat_dim) | length(lat_dim) != 1) { stop("Parameter 'lat_dim' must be a character string.") @@ -590,7 +540,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") } } - ## dates if (is.null(dates_exp)) { stop("Parameter 'dates_exp' is missing, dates must be provided.") @@ -608,7 +557,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'dates_exp' needs to have the same length as 'time_dim' ", "and 'leadtime_dim' as 'exp'.") } - if (!is.null(exp_cor)) { if (is.null(dates_expcor)) { stop("Parameter 'dates_expcor' is missing, dates for 'exp_cor' must be ", @@ -628,107 +576,20 @@ PeriodSPEI <- function(exp, dates_exp, lat, "'time_dim' and 'leadtime_dim' as 'exp_cor'.") } } - ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } - - ## ref_period - if (!is.null(ref_period)) { - if (length(ref_period) != 2) { - warning("Parameter 'ref_period' must be of length two indicating the ", - "first and end years of the reference period. It will not ", - "be used.") - ref_period <- NULL - } else if (!all(sapply(ref_period, is.numeric))) { - warning("Parameter 'ref_period' must be a numeric vector indicating the ", - "'start' and 'end' years of the reference period. It will not ", - "be used.") - ref_period <- NULL - } else if (ref_period[[1]] > ref_period[[2]]) { - warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", - "will not be used.") - ref_period <- NULL - } else if (!all(unlist(ref_period) %in% year(dates_exp))) { - warning("Parameter 'ref_period' contain years outside the dates. ", - "It will not be used.") - ref_period <- NULL - } else { - years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, - indices = 1)) - ref_period[[1]] <- which(ref_period[[1]] == years) - ref_period[[2]] <- which(ref_period[[2]] == years) - } - } - ## standardization if (!is.logical(standardization)) { stop("Parameter 'standardization' must be a logical value.") } - - ## handle_infinity - if (!is.logical(handle_infinity)) { - stop("Parameter 'handle_infinity' must be a logical value.") - } - - ## method - if (!(method %in% c('parametric', 'non-parametric'))) { - stop("Parameter 'method' must be a character string containing one of ", - "the following methods: 'parametric' or 'non-parametric'.") - } - - ## distribution - if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { - stop("Parameter 'distribution' must be a character string containing one ", - "of the following distributions: 'log-Logistic', 'Gamma' or ", - "'PearsonIII'.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - ## na.rm if (!is.logical(na.rm)) { stop("Parameter 'na.rm' must be logical.") } - - ## params - if (!is.null(params)) { - if (!is.numeric(params)) { - stop("Parameter 'params' must be numeric.") - } - if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { - stop("Parameter 'params' must be a multidimensional array with named ", - "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") - } - if (distribution == "Gamma") { - if (dim(params)['coef'] != 2) { - stop("For '", distribution, "' distribution, params array should have ", - "'coef' dimension of length 2.") - } - } else { - if (dim(params)['coef'] != 3) { - stop("For '", distribution, "' distribution, params array should have ", - "'coef' dimension of length 3.") - } - } - if (!is.null(exp_cor)) { - warning("'Parameter 'exp_cor' is provided, 'params' will be set to NULL.") - params <- NULL - } - } - - ## return_params - if (!is.logical(return_params)) { - stop("Parameter 'return_params' must be logical.") - } - + # Complete dates dates <- .return2list(dates_exp, dates_expcor) @@ -747,7 +608,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) computed_pet <- TRUE } - if (any(is.na(pet[[k]]))) { + if (!na.rm & any(is.na(pet[[k]]))) { mask_na <- which(is.na(pet[[k]])) warning("There are NAs in PET.") } diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index e8cb5b3..d95127d 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -6,15 +6,8 @@ #'standardized units that are comparable in space and time and at different SPEI #'time scales. #' -#'Next, some specifications for the calculation of this indicator will be -#'discussed. To choose the time scale in which you want to accumulate the SPEI -#'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to -#'be performed in the previous step. However, since the accumulation is done for -#'the elapsed time steps, there will be no complete accumulations until reaching -#'the time instant equal to the value of the parameter. For this reason, in the -#'result, we will find that for the dimension where the accumulation has been -#'carried out, the values of the array will be NA since they do not include -#'complete accumulations. If there are NAs in the data and they are not removed with the +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the #'parameter 'na.rm', the standardization cannot be carried out for those #'coordinates and therefore, the result will be filled with NA for the #'specific coordinates. When NAs are not removed, if the length of the data for @@ -23,8 +16,7 @@ #'About the distribution used to fit the data, there are only two possibilities: #''log-logistic' and 'Gamma'. The 'Gamma' method only works when only #'precipitation is provided and other variables are 0 because it is positive -#'defined (SPI indicator). For more information about SPEI, see functions -#'PeriodPET and PeriodAccumulation. This function is build to work be compatible +#'defined (SPI indicator). This function is build to work be compatible #'with other tools in that work with 's2dv_cube' object class. The input data #'must be this object class. If you don't work with 's2dv_cube', see #'PeriodStandardization. For more information on the SPEI indicator calculation, @@ -46,6 +38,14 @@ #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length +#' 3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') +#' of length 1. It will only be used if 'data_cor' is not provided. #'@param handle_infinity A logical value wether to return infinite values (TRUE) #' or not (FALSE). When it is TRUE, the positive infinite values (negative #' infinite) are substituted by the maximum (minimum) values of each @@ -59,6 +59,8 @@ #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, #' standardization cannot be carried out for those coordinates and therefore, @@ -72,7 +74,10 @@ #'@return An object of class \code{s2dv_cube} containing the standardized data. #'If 'data_cor' is provided the array stored in element data will be of the same #'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in -#'element data will be of the same dimensions as 'data'. +#'element data will be of the same dimensions as 'data'. The parameters of the +#'standardization will only be returned if 'return_params' is TRUE, in this +#'case, the output will be a list of two objects one for the standardized data +#'and one for the parameters. #' #'@examples #'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) @@ -140,15 +145,8 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'transform the original values to standardized units that are comparable in #'space and time and at different SPEI time scales. #' -#'Next, some specifications for the calculation of this indicator will be -#'discussed. To choose the time scale in which you want to accumulate the SPEI -#'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to -#'be performed in the previous step. However, since the accumulation is done for -#'the elapsed time steps, there will be no complete accumulations until reaching -#'the time instant equal to the value of the parameter. For this reason, in the -#'result, we will find that for the dimension where the accumulation has been -#'carried out, the values of the array will be NA since they do not include -#'complete accumulations. If there are NAs in the data and they are not removed with the +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the #'parameter 'na.rm', the standardization cannot be carried out for those #'coordinates and therefore, the result will be filled with NA for the #'specific coordinates. When NAs are not removed, if the length of the data for @@ -177,6 +175,14 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length +#' 3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') +#' of length 1. It will only be used if 'data_cor' is not provided. #'@param handle_infinity A logical value wether to return infinite values (TRUE) #' or not (FALSE). When it is TRUE, the positive infinite values (negative #' infinite) are substituted by the maximum (minimum) values of each @@ -190,6 +196,8 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other #' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, #' standardization cannot be carried out for those coordinates and therefore, @@ -203,7 +211,9 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@return A multidimensional array containing the standardized data. #'If 'data_cor' is provided the array will be of the same dimensions as #''data_cor'. If 'data_cor' is not provided, the array will be of the same -#'dimensions as 'data'. +#'dimensions as 'data'. The parameters of the standardization will only be +#'returned if 'return_params' is TRUE, in this case, the output will be a list +#'of two objects one for the standardized data and one for the parameters. #' #'@examples #'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) @@ -216,7 +226,6 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@import multiApply #'@import ClimProjDiags #'@import TLMoments -#'@import lmomco #'@import lmom #'@export PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 9bec9fd..d1f1a46 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -35,8 +35,8 @@ dimension name matching the dimensions provided in the object \code{data$data} can be specified.} \item{rollwidth}{An optional parameter to indicate the number of time -steps the rolling sum is applied to. If it is negative, the rolling sum is -applied backwards 'time_dim', if it is positive, it will be towards it. When +steps the rolling sum is applied to. If it is positive, the rolling sum is +applied backwards 'time_dim', if it is negative, it will be forward it. When this parameter is NULL, the sum is applied over all 'time_dim', in a specified period. It is NULL by default.} @@ -47,15 +47,16 @@ not (FALSE).} computation.} } \value{ -An 's2dv_cube' object containing the indicator in the element -\code{data}. If 'rollingwithd' is not used, it will have the dimensions of -the input parameter 'data' except the dimension where the accumulation has -been computed (specified with 'time_dim'). The 'Dates' array is updated to the +An 's2dv_cube' object containing the accumulated data in the element +\code{data}. If parameter 'rollwidth' is not used, it will have the dimensions +of the input parameter 'data' except the dimension where the accumulation has +been computed (specified with 'time_dim'). If 'rollwidth' is used, it will be +of same dimensions as input data. The 'Dates' array is updated to the dates corresponding to the beginning of the aggregated time period. A new element called 'time_bounds' will be added into the 'attrs' element in the 's2dv_cube' object. It consists of a list containing two elements, the start and end dates of the aggregated period with the same dimensions of 'Dates' -element. If 'rollingwithd' is used, it will contain the same dimensions of +element. If 'rollwidth' is used, it will contain the same dimensions of parameter 'data' and the other elements of the 's2dv_cube' will not be modified. } @@ -73,10 +74,13 @@ by using this function: \details{ There are two possible ways of performing the accumulation. The default one is by accumulating a variable over a dimension specified with 'time_dim'. To -chose a specific time period, start and end must be used. The other method -is by using rollwidth parameter. When this parameter is a positive integer, -the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum -is applied towards 'time_dim'. +chose a specific time period, 'start' and 'end' must be used. The other method +is by using 'rollwidth' parameter. When this parameter is a positive integer, +the cumulative backward sum is applied to the time dimension. If it is +negative, the rolling sum is applied backwards. This function is build to work +be compatible with other tools in that work with 's2dv_cube' object class. The +input data must be this object class. If you don't work with 's2dv_cube', see +PeriodAccumulation. } \examples{ exp <- NULL diff --git a/man/CST_PeriodPET.Rd b/man/CST_PeriodPET.Rd index 10383af..23a56f4 100644 --- a/man/CST_PeriodPET.Rd +++ b/man/CST_PeriodPET.Rd @@ -16,14 +16,17 @@ CST_PeriodPET( } \arguments{ \item{data}{A named list with the needed \code{s2dv_cube} objects containing -the seasonal forecast experiment in the data element for each variable. +the seasonal forecast experiment in the 'data' element for each variable. Specific variables are needed for each method used in computing the -Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for -'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' -'tmean' is required. The units for temperature variables -('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -precipitation ('pr') need to be in mm/month.} +Potential Evapotranspiration (see parameter 'pet_method'). The accepted +variable names are fixed in order to be recognized by the function. +The accepted name corresponding to the Minimum Temperature is 'tmin', +for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +for Precipitation is 'pr'. The accepted variable names for each method are: +For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +degrees; the units for precipitation ('pr') need to be in mm/month.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: @@ -66,7 +69,6 @@ CST_PeriodAccumulation. } \examples{ dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) - exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) @@ -75,11 +77,8 @@ dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), paste0(2010:end_year, "-09-15"), paste0(2010:end_year, "-10-16")), "UTC") dim(dates_exp) <- c(syear = 3, time = 3) - lat <- c(40) - exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) - res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) } diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index c8010da..d5b8075 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -56,8 +56,9 @@ dimension. By default it is set by 'latitude'.} \item{accum}{An integer value indicating the number of months for the accumulation for each variable. When it is greater than 1, the result will -be filled with NA until the accum time_dim dimension number due to the -accumulation to previous months.} +be filled with NA until the accum 'time_dim' dimension number due to the +accumulation to previous months. The accumulation is performed backwards +by default.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default @@ -118,54 +119,57 @@ parallel computation.} } \value{ An 's2dv_cube' object containing the SPEI multidimensional array in -element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' -will be provided. The parameters of the standardization will only be returned -if 'return_params' is TRUE. The SPEI will only be computed if -'standardization' is TRUE. If 'standardization' is FALSE, only the climatic -water balance (precipitation minus evapotranspiration) will be returned. The -resultant arrays will have the same dimensions as the initial input data. The -other elements in the 's2dv_cube' will be updated with the combined -information of the input data arrays. +element \code{data} with same dimensions as input data. If 'exp_cor' is +provided, only results from 'exp_cor' will be provided. The parameters of the +standardization will only be returned if 'return_params' is TRUE. The SPEI +will only be computed if 'standardization' is TRUE. If 'standardization' is +FALSE, only the climatic water balance (precipitation minus +evapotranspiration) will be returned. The resultant arrays will have the same +dimensions as the initial input data. The other elements in the 's2dv_cube' +will be updated with the combined information of the input data arrays. } \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for determining the onset, duration and magnitude of drought conditions with -respect to normal conditions in a variety of natural and managed systems such -as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI -is to compare the highest possible evapotranspiration with the current water -availability. The SPEI uses the monthly (or weekly) difference between -precipitation and potential evapotranspiration. This represents a simple -climatic water balance which is calculated at different time scales to obtain -the SPEI. This function is build to work be compatible with other tools in -that work with 's2dv_cube' object class. The input data must be this object -class. If you don't work with 's2dv_cube', see PeriodSPEI. +respect to normal conditions. The idea behind the SPEI is to compare the +highest possible evapotranspiration with the current water availability. The +SPEI uses for a specific time frequency the difference between precipitation +and potential evapotranspiration. This represents a simple climatic water +balance which is calculated at different time scales to obtain the SPEI. This +function is build to work be compatible with other tools in that work with +'s2dv_cube' object class. The input data must be this object class. If you +don't work with 's2dv_cube', see PeriodSPEI. } \details{ Next, some specifications for the calculation of this indicator will be -discussed. On the one hand, the model to be used to calculate potential -evapotranspiration is specified with the pet_method parameter (hargreaves, -hargraves modified or thornwhite). On the other hand, to choose the time scale -in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the -accum parameter, where you must indicate the number of time steps you want to -accumulate throughout leadtime_dim. Since the accumulation is done for the +discussed. On the one hand, the model to be used to calculate Potential +Evapotranspiration is specified with the 'pet_method' parameter (it can be +hargreaves, hargraves modified or thornwhite). On the other hand, to choose +the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is +done using the 'accum' parameter, where you must indicate the number of time +steps you want to accumulate throughout 'leadtime_dim'. The accumulation will +be performed backwards by default. Since the accumulation is done for the elapsed time steps, there will be no complete accumulations until reaching the -time instant equal to the value of the parameter. For this reason, in the -result, we will find that for the dimension where the accumulation has been -carried out, the values of the array will be NA since they do not include +time instant equal to the value of the 'accum' parameter. For this reason, in +the result, we will find that for the dimension where the accumulation has +been carried out, the values of the array will be NA since they do not include complete accumulations. Also, there is a parameter to specify if the standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a probability distribution to transform the original values to standardized units that are comparable in space and time and at different SPEI time scales. -The na.rm parameter is a logical parameter used to decide whether to remove +The 'na.rm' parameter is a logical parameter used to decide whether to remove the NA values from the data before doing the calculation. It must be taken -into account that if na.rm == FALSE and there is some NA value in the specific -coordinates which the SPEI is computed, standardization cannot be carried out -for those coordinates and therefore, the result will be filled with NA for the -specific coordinates. However, when na.rm == TRUE, if the amount of data for -those specific coordinates is smaller than 4, it will not be possible to carry -out because we will not have enough data and the result will be also filled -with NAs for that coordinates. +into account that if 'na.rm' is FALSE and there is some NA value in the +specific coordinates which the SPEI is computed, standardization cannot be +carried out for those coordinates and therefore, the result will be filled +with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the +amount of data for those specific coordinates is smaller than 4, it will not +be possible to carry out because we will not have enough data and the result +will be also filled with NAs for that coordinates. When only 'exp' is provided +the Standardization is computed with 'cross_validation'. For more information +about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation +and CST_PeriodStandardization. } \examples{ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd index 5e23bd5..d68dab6 100644 --- a/man/CST_PeriodStandardization.Rd +++ b/man/CST_PeriodStandardization.Rd @@ -59,6 +59,18 @@ function to be used for computing the SPEI. The accepted names are: 'Gamma' method only works when only precipitation is provided and other variables are 0 because it is positive defined (SPI indicator).} +\item{params}{An optional parameter that needs to be a multidimensional array +with named dimensions. This option overrides computation of fitting +parameters. It needs to be of same time dimensions (specified in 'time_dim' +and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +of the coefficients needed for the used distribution (for 'Gamma' coef +dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length +3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') +of length 1. It will only be used if 'data_cor' is not provided.} + +\item{return_params}{A logical value indicating wether to return parameters +array (TRUE) or not (FALSE). It is FALSE by default.} + \item{na.rm}{A logical value indicating whether NA values should be removed from data. It is FALSE by default. If it is FALSE and there are NA values, standardization cannot be carried out for those coordinates and therefore, @@ -74,7 +86,10 @@ parallel computation.} An object of class \code{s2dv_cube} containing the standardized data. If 'data_cor' is provided the array stored in element data will be of the same dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in -element data will be of the same dimensions as 'data'. +element data will be of the same dimensions as 'data'. The parameters of the +standardization will only be returned if 'return_params' is TRUE, in this +case, the output will be a list of two objects one for the standardized data +and one for the parameters. } \description{ The Standardization of the data is the last step of computing the SPEI @@ -84,15 +99,8 @@ standardized units that are comparable in space and time and at different SPEI time scales. } \details{ -Next, some specifications for the calculation of this indicator will be -discussed. To choose the time scale in which you want to accumulate the SPEI -(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to -be performed in the previous step. However, since the accumulation is done for -the elapsed time steps, there will be no complete accumulations until reaching -the time instant equal to the value of the parameter. For this reason, in the -result, we will find that for the dimension where the accumulation has been -carried out, the values of the array will be NA since they do not include -complete accumulations. If there are NAs in the data and they are not removed with the +Next, some specifications for the calculation of the standardization will be +discussed. If there are NAs in the data and they are not removed with the parameter 'na.rm', the standardization cannot be carried out for those coordinates and therefore, the result will be filled with NA for the specific coordinates. When NAs are not removed, if the length of the data for @@ -101,8 +109,7 @@ standarize and the result will be also filled with NAs for that coordinates. About the distribution used to fit the data, there are only two possibilities: 'log-logistic' and 'Gamma'. The 'Gamma' method only works when only precipitation is provided and other variables are 0 because it is positive -defined (SPI indicator). For more information about SPEI, see functions -PeriodPET and PeriodAccumulation. This function is build to work be compatible +defined (SPI indicator). This function is build to work be compatible with other tools in that work with 's2dv_cube' object class. The input data must be this object class. If you don't work with 's2dv_cube', see PeriodStandardization. For more information on the SPEI indicator calculation, diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 4dae3ef..3e134c8 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -41,8 +41,8 @@ dimension name matching the dimensions provided in the object \code{data$data} can be specified.} \item{rollwidth}{An optional parameter to indicate the number of time -steps the rolling sum is applied to. If it is negative, the rolling sum is -applied backwards 'time_dim', if it is positive, it will be towards it. When +steps the rolling sum is applied to. If it is positive, the rolling sum is +applied backwards 'time_dim', if it is negative, it will be forward it. When this parameter is NULL, the sum is applied over all 'time_dim', in a specified period. It is NULL by default.} @@ -54,7 +54,10 @@ computation.} } \value{ A multidimensional array with named dimensions containing the -indicator in the element \code{data}. +accumulated data in the element \code{data}. If parameter 'rollwidth' is +not used, it will have the dimensions of the input 'data' except the dimension +where the accumulation has been computed (specified with 'time_dim'). If +'rollwidth' is used, it will be of same dimensions as input data. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a @@ -70,10 +73,10 @@ by using this function: \details{ There are two possible ways of performing the accumulation. The default one is by accumulating a variable over a dimension specified with 'time_dim'. To -chose a specific time period, start and end must be used. The other method -is by using rollwidth parameter. When this parameter is a positive integer, -the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum -is applied towards 'time_dim'. +chose a specific time period, 'start' and 'end' must be used. The other method +is by using 'rollwidth' parameter. When this parameter is a positive integer, +the cumulative backward sum is applied to the time dimension. If it is +negative, the rolling sum is applied backwards. } \examples{ exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd index 7ca1073..1b00339 100644 --- a/man/PeriodPET.Rd +++ b/man/PeriodPET.Rd @@ -17,15 +17,18 @@ PeriodPET( ) } \arguments{ -\item{data}{A named list with the needed \code{s2dv_cube} objects containing -the seasonal forecast experiment in the data element for each variable. +\item{data}{A named list of multidimensional arrays containing +the seasonal forecast experiment data for each variable. Specific variables are needed for each method used in computing the -Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for -'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' -'tmean' is required. The units for temperature variables -('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -precipitation ('pr') need to be in mm/month.} +Potential Evapotranspiration (see parameter 'pet_method'). The accepted +variable names are fixed in order to be recognized by the function. +The accepted name corresponding to the Minimum Temperature is 'tmin', +for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +for Precipitation is 'pr'. The accepted variable names for each method are: +For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +degrees; the units for precipitation ('pr') need to be in mm/month.} \item{dates}{An array of temporal dimensions containing the Dates of 'data'. It must be of class 'Date' or 'POSIXct'.} @@ -54,7 +57,7 @@ from data. It is FALSE by default.} parallel computation.} } \description{ -Compute the Potential evapotranspiration (PET) that is the amount of +Compute the Potential Evapotranspiration (PET) that is the amount of evaporation and transpiration that would occur if a sufficient water source were available. This function calculate PET according to the Thornthwaite, Hargreaves or Hargreaves-modified equations. @@ -65,7 +68,6 @@ PeriodStandardization and PeriodAccumulation. } \examples{ dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) - exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) @@ -74,11 +76,8 @@ dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), paste0(2010:end_year, "-09-15"), paste0(2010:end_year, "-10-16")), "UTC") dim(dates_exp) <- c(syear = 3, time = 3) - lat <- c(40) - exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) - res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) } diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index b0d039e..bb7859e 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -68,7 +68,8 @@ dimension. By default it is set by 'latitude'.} \item{accum}{accum An integer value indicating the number of months for the accumulation for each variable. When it is greater than 1, the result will be filled with NA until the accum time_dim dimension number due to the -accumulation to previous months.} +accumulation to previous months. The accumulation is performed backwards +by default.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default @@ -129,9 +130,9 @@ and the result will include NA.} parallel computation.} } \value{ -An 's2dv_cube' object containing the SPEI multidimensional array in -element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' -will be provided. The parameters of the standardization will only be returned +A multidimensional array containing the SPEI with same dimensions as +input data. If 'exp_cor' is provided, only results from 'exp_cor' will be +provided. The parameters of the standardization will only be returned if 'return_params' is TRUE. The SPEI will only be computed if 'standardization' is TRUE. If 'standardization' is FALSE, only the climatic water balance (precipitation minus evapotranspiration) will be returned. The @@ -141,39 +142,40 @@ resultant arrays will have the same dimensions as the initial input data. Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for determining the onset, duration and magnitude of drought conditions with -respect to normal conditions in a variety of natural and managed systems such -as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI -is to compare the highest possible evapotranspiration with the current water -availability. The SPEI uses the monthly (or weekly) difference between -precipitation and potential evapotranspiration. This represents a simple -climatic water balance which is calculated at different time scales to obtain -the SPEI. +respect to normal conditions. The idea behind the SPEI is to compare the +highest possible evapotranspiration with the current water availability. The +SPEI uses for a specific time frequency the difference between precipitation +and potential evapotranspiration. } \details{ Next, some specifications for the calculation of this indicator will be -discussed. On the one hand, the model to be used to calculate potential -evapotranspiration is specified with the pet_method parameter (hargreaves, -hargraves modified or thornwhite). On the other hand, to choose the time scale -in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the -accum parameter, where you must indicate the number of time steps you want to -accumulate throughout leadtime_dim. Since the accumulation is done for the +discussed. On the one hand, the model to be used to calculate Potential +Evapotranspiration is specified with the 'pet_method' parameter (it can be +hargreaves, hargraves modified or thornwhite). On the other hand, to choose +the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is +done using the 'accum' parameter, where you must indicate the number of time +steps you want to accumulate throughout 'leadtime_dim'. The accumulation will +be performed backwards by default. Since the accumulation is done for the elapsed time steps, there will be no complete accumulations until reaching the -time instant equal to the value of the parameter. For this reason, in the -result, we will find that for the dimension where the accumulation has been -carried out, the values of the array will be NA since they do not include +time instant equal to the value of the 'accum' parameter. For this reason, in +the result, we will find that for the dimension where the accumulation has +been carried out, the values of the array will be NA since they do not include complete accumulations. Also, there is a parameter to specify if the standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a probability distribution to transform the original values to standardized units that are comparable in space and time and at different SPEI time scales. -The na.rm parameter is a logical parameter used to decide whether to remove +The 'na.rm' parameter is a logical parameter used to decide whether to remove the NA values from the data before doing the calculation. It must be taken -into account that if na.rm == FALSE and there is some NA value in the specific -coordinates which the SPEI is computed, standardization cannot be carried out -for those coordinates and therefore, the result will be filled with NA for the -specific coordinates. However, when na.rm == TRUE, if the amount of data for -those specific coordinates is smaller than 4, it will not be possible to carry -out because we will not have enough data and the result will be also filled -with NAs for that coordinates. +into account that if 'na.rm' is FALSE and there is some NA value in the +specific coordinates which the SPEI is computed, standardization cannot be +carried out for those coordinates and therefore, the result will be filled +with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the +amount of data for those specific coordinates is smaller than 4, it will not +be possible to carry out because we will not have enough data and the result +will be also filled with NAs for that coordinates. When only 'exp' is provided +the Standardization is computed with 'cross_validation'. For more information +about SPEI calculation, see functions PeriodPET, PeriodAccumulation and +PeriodStandardization. } \examples{ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd index 98b8ba6..ab86f9a 100644 --- a/man/PeriodStandardization.Rd +++ b/man/PeriodStandardization.Rd @@ -62,6 +62,18 @@ function to be used for computing the SPEI. The accepted names are: 'Gamma' method only works when only precipitation is provided and other variables are 0 because it is positive defined (SPI indicator).} +\item{params}{An optional parameter that needs to be a multidimensional array +with named dimensions. This option overrides computation of fitting +parameters. It needs to be of same time dimensions (specified in 'time_dim' +and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +of the coefficients needed for the used distribution (for 'Gamma' coef +dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length +3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') +of length 1. It will only be used if 'data_cor' is not provided.} + +\item{return_params}{A logical value indicating wether to return parameters +array (TRUE) or not (FALSE). It is FALSE by default.} + \item{na.rm}{A logical value indicating whether NA values should be removed from data. It is FALSE by default. If it is FALSE and there are NA values, standardization cannot be carried out for those coordinates and therefore, @@ -77,7 +89,9 @@ parallel computation.} A multidimensional array containing the standardized data. If 'data_cor' is provided the array will be of the same dimensions as 'data_cor'. If 'data_cor' is not provided, the array will be of the same -dimensions as 'data'. +dimensions as 'data'. The parameters of the standardization will only be +returned if 'return_params' is TRUE, in this case, the output will be a list +of two objects one for the standardized data and one for the parameters. } \description{ The Standardization of the data is the last step of computing the SPEI @@ -86,15 +100,8 @@ transform the original values to standardized units that are comparable in space and time and at different SPEI time scales. } \details{ -Next, some specifications for the calculation of this indicator will be -discussed. To choose the time scale in which you want to accumulate the SPEI -(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to -be performed in the previous step. However, since the accumulation is done for -the elapsed time steps, there will be no complete accumulations until reaching -the time instant equal to the value of the parameter. For this reason, in the -result, we will find that for the dimension where the accumulation has been -carried out, the values of the array will be NA since they do not include -complete accumulations. If there are NAs in the data and they are not removed with the +Next, some specifications for the calculation of the standardization will be +discussed. If there are NAs in the data and they are not removed with the parameter 'na.rm', the standardization cannot be carried out for those coordinates and therefore, the result will be filled with NA for the specific coordinates. When NAs are not removed, if the length of the data for diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index f970cde..1ce36d9 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -14,7 +14,7 @@ dates2 <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), as.Date("03-04-2000", format = "%d-%m-%Y"), by = 'day'), seq(as.Date("01-04-2001", format = "%d-%m-%Y"), as.Date("03-04-2001", format = "%d-%m-%Y"), by = 'day')) -dim(dates2) <- c(sdate = 2, time = 3) +dim(dates2) <- c(time = 3, sdate = 2) # exp1 exp <- NULL @@ -200,7 +200,22 @@ test_that("4. Rolling", { PeriodAccumulation(data = dat2, rollwidth = 3, dates = dates2), array(c(rep(NA, 4), 9, 12), dim = c(sdate = 2, time = 3, member = 1)) ) - dat1[1,1,1] <- NA - PeriodAccumulation(data = dat2, rollwidth = 2, dates = dates2, na.rm = FALSE) + dat2_1 <- dat2 + dat2_1[1,1,1] <- NA + expect_equal( + PeriodAccumulation(data = dat2_1, rollwidth = 2, dates = dates2, na.rm = FALSE), + array(c(rep(NA, 3), 6,8,10), dim = c(sdate = 2, time = 3, member = 1)) + ) + # Test rolling with start and end -}) + expect_equal( + PeriodAccumulation(data = dat2, rollwidth = 1, dates = dates2, + start = list(1, 4), end = list(2, 4)), + array(c(1, 2, 3, 4), dim = c(sdate = 2, time = 2, member = 1)) + ) + expect_equal( + PeriodAccumulation(data = dat2, rollwidth = 2, dates = dates2, + start = list(1, 4), end = list(2, 4)), + array(c(NA, NA, 4, 6), dim = c(sdate = 2, time = 2, member = 1)) + ) +}) \ No newline at end of file diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 23f7316..b16bb69 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -325,7 +325,7 @@ test_that("2. Output checks: CST_PeriodSPEI", { test_that("2. Output checks", { res1 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, - return_params = TRUE) + return_params = TRUE, na.rm = TRUE) res2 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, standardization = FALSE) # No info about accumulation @@ -366,15 +366,15 @@ test_that("2. Output checks", { # accum res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, dates_exp = dates_exp, na.rm = TRUE) - # expect_equal( - # res11[1,3,1,1,][1:4], - # c(-0.4292409, -0.1375149, -0.5564081, -0.4273380), - # tolerance = 0.0001 - # ) + expect_equal( + res11[1,3,1,1,][1:4], + c(-0.6130081, -0.3446050, -0.7267427, -0.6112921), + tolerance = 0.0001 + ) # ref_period - # res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, - # dates_exp = dates_exp, dates_expcor = dates_expcor, - # na.rm = TRUE, ref_period = list(2011, 2013)) + res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, + dates_exp = dates_exp, dates_expcor = dates_expcor, + na.rm = TRUE, ref_period = list(2011, 2013)) expect_equal( !identical(res1[[1]], res_ref), TRUE @@ -423,11 +423,6 @@ test_that("2. Output checks", { dim(res_crossval_T$params), c(syear = 6, time = 3, latitude = 2, longitude = 1, coef = 3) ) - # cross_validation = FALSE - # expect_equal( - # dim(res_crossval_F$params)[-which(names(dim(res_crossval_F$params)) == 'coef')], - # dimscor[-which(names(dimscor) == 'ensemble')] - # ) # pet_method - ok res5 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, -- GitLab From 527dddd4f4bb76994d56a95c2ead8cd649d19880 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 19 Sep 2023 12:54:37 +0200 Subject: [PATCH 29/42] Correct few mistakes in documentation --- R/PeriodAccumulation.R | 2 +- R/PeriodPET.R | 2 +- R/PeriodSPEI.R | 12 ++++++------ R/PeriodStandardization.R | 16 +++++++++------- man/CST_PeriodAccumulation.Rd | 2 +- man/CST_PeriodPET.Rd | 2 +- man/CST_PeriodSPEI.Rd | 6 +++--- man/CST_PeriodStandardization.Rd | 11 ++++++----- man/PeriodSPEI.Rd | 6 +++--- man/PeriodStandardization.Rd | 5 +++-- 10 files changed, 34 insertions(+), 30 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 91220f6..09f4211 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -15,7 +15,7 @@ #'chose a specific time period, 'start' and 'end' must be used. The other method #'is by using 'rollwidth' parameter. When this parameter is a positive integer, #'the cumulative backward sum is applied to the time dimension. If it is -#'negative, the rolling sum is applied backwards. This function is build to work +#'negative, the rolling sum is applied backwards. This function is build to #'be compatible with other tools in that work with 's2dv_cube' object class. The #'input data must be this object class. If you don't work with 's2dv_cube', see #'PeriodAccumulation. diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 9fdb7e2..87dc8ca 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -5,7 +5,7 @@ #'were available. This function calculate PET according to the Thornthwaite, #'Hargreaves or Hargreaves-modified equations. #' -#'This function is build to work be compatible with other tools in +#'This function is build to be compatible with other tools in #'that work with 's2dv_cube' object class. The input data must be this object #'class. If you don't work with 's2dv_cube', see PeriodPET. For more information #'on the SPEI calculation, see functions CST_PeriodStandardization and diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 442209f..77e90d5 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -8,7 +8,7 @@ #'SPEI uses for a specific time frequency the difference between precipitation #'and potential evapotranspiration. This represents a simple climatic water #'balance which is calculated at different time scales to obtain the SPEI. This -#'function is build to work be compatible with other tools in that work with +#'function is build to be compatible with other tools in that work with #''s2dv_cube' object class. The input data must be this object class. If you #'don't work with 's2dv_cube', see PeriodSPEI. #' @@ -37,8 +37,8 @@ #'amount of data for those specific coordinates is smaller than 4, it will not #'be possible to carry out because we will not have enough data and the result #'will be also filled with NAs for that coordinates. When only 'exp' is provided -#'the Standardization is computed with 'cross_validation'. For more information -#'about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation +#'('exp_cor' is NULL) the Standardization is computed with cross validation. For +#'more information about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation #'and CST_PeriodStandardization. #' #'@param exp A named list with the needed \code{s2dv_cube} objects containing @@ -311,9 +311,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', #'amount of data for those specific coordinates is smaller than 4, it will not #'be possible to carry out because we will not have enough data and the result #'will be also filled with NAs for that coordinates. When only 'exp' is provided -#'the Standardization is computed with 'cross_validation'. For more information -#'about SPEI calculation, see functions PeriodPET, PeriodAccumulation and -#'PeriodStandardization. +#'('exp_cor' is NULL) the Standardization is computed with cross validation. For +#'more information about SPEI calculation, see functions PeriodPET, +#'PeriodAccumulation and PeriodStandardization. #' #'@param exp A named list with multidimensional array objects containing #' the seasonal forecast experiment in the data element for each variable. diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index d95127d..966d648 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -16,11 +16,12 @@ #'About the distribution used to fit the data, there are only two possibilities: #''log-logistic' and 'Gamma'. The 'Gamma' method only works when only #'precipitation is provided and other variables are 0 because it is positive -#'defined (SPI indicator). This function is build to work be compatible -#'with other tools in that work with 's2dv_cube' object class. The input data -#'must be this object class. If you don't work with 's2dv_cube', see -#'PeriodStandardization. For more information on the SPEI indicator calculation, -#'see CST_PeriodPET and CST_PeriodAccumulation. +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. This function is build to +#'be compatible with other tools in that work with 's2dv_cube' object +#'class. The input data must be this object class. If you don't work with +#''s2dv_cube', see PeriodStandardization. For more information on the SPEI +#'indicator calculation, see CST_PeriodPET and CST_PeriodAccumulation. #' #'@param data An 's2dv_cube' that element 'data' stores a multidimensional #' array containing the data to be standardized. @@ -155,8 +156,9 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'About the distribution used to fit the data, there are only two possibilities: #''log-logistic' and 'Gamma'. The 'Gamma' method only works when only #'precipitation is provided and other variables are 0 because it is positive -#'defined (SPI indicator). For more information about SPEI, see functions -#'PeriodPET and PeriodAccumulation. +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. For more information about +#'SPEI, see functions PeriodPET and PeriodAccumulation. #' #'@param data A multidimensional array containing the data to be standardized. #'@param data_cor A multidimensional array containing the data in which the diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index d1f1a46..3de6d4b 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -77,7 +77,7 @@ is by accumulating a variable over a dimension specified with 'time_dim'. To chose a specific time period, 'start' and 'end' must be used. The other method is by using 'rollwidth' parameter. When this parameter is a positive integer, the cumulative backward sum is applied to the time dimension. If it is -negative, the rolling sum is applied backwards. This function is build to work +negative, the rolling sum is applied backwards. This function is build to be compatible with other tools in that work with 's2dv_cube' object class. The input data must be this object class. If you don't work with 's2dv_cube', see PeriodAccumulation. diff --git a/man/CST_PeriodPET.Rd b/man/CST_PeriodPET.Rd index 23a56f4..d705a3c 100644 --- a/man/CST_PeriodPET.Rd +++ b/man/CST_PeriodPET.Rd @@ -61,7 +61,7 @@ were available. This function calculate PET according to the Thornthwaite, Hargreaves or Hargreaves-modified equations. } \details{ -This function is build to work be compatible with other tools in +This function is build to be compatible with other tools in that work with 's2dv_cube' object class. The input data must be this object class. If you don't work with 's2dv_cube', see PeriodPET. For more information on the SPEI calculation, see functions CST_PeriodStandardization and diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index d5b8075..7b9aa77 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -137,7 +137,7 @@ highest possible evapotranspiration with the current water availability. The SPEI uses for a specific time frequency the difference between precipitation and potential evapotranspiration. This represents a simple climatic water balance which is calculated at different time scales to obtain the SPEI. This -function is build to work be compatible with other tools in that work with +function is build to be compatible with other tools in that work with 's2dv_cube' object class. The input data must be this object class. If you don't work with 's2dv_cube', see PeriodSPEI. } @@ -167,8 +167,8 @@ with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the amount of data for those specific coordinates is smaller than 4, it will not be possible to carry out because we will not have enough data and the result will be also filled with NAs for that coordinates. When only 'exp' is provided -the Standardization is computed with 'cross_validation'. For more information -about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation +('exp_cor' is NULL) the Standardization is computed with cross validation. For +more information about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation and CST_PeriodStandardization. } \examples{ diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd index d68dab6..823f501 100644 --- a/man/CST_PeriodStandardization.Rd +++ b/man/CST_PeriodStandardization.Rd @@ -109,11 +109,12 @@ standarize and the result will be also filled with NAs for that coordinates. About the distribution used to fit the data, there are only two possibilities: 'log-logistic' and 'Gamma'. The 'Gamma' method only works when only precipitation is provided and other variables are 0 because it is positive -defined (SPI indicator). This function is build to work be compatible -with other tools in that work with 's2dv_cube' object class. The input data -must be this object class. If you don't work with 's2dv_cube', see -PeriodStandardization. For more information on the SPEI indicator calculation, -see CST_PeriodPET and CST_PeriodAccumulation. +defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +standardization is computed with cross validation. This function is build to +be compatible with other tools in that work with 's2dv_cube' object +class. The input data must be this object class. If you don't work with +'s2dv_cube', see PeriodStandardization. For more information on the SPEI +indicator calculation, see CST_PeriodPET and CST_PeriodAccumulation. } \examples{ dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index bb7859e..c3ee473 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -173,9 +173,9 @@ with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the amount of data for those specific coordinates is smaller than 4, it will not be possible to carry out because we will not have enough data and the result will be also filled with NAs for that coordinates. When only 'exp' is provided -the Standardization is computed with 'cross_validation'. For more information -about SPEI calculation, see functions PeriodPET, PeriodAccumulation and -PeriodStandardization. +('exp_cor' is NULL) the Standardization is computed with cross validation. For +more information about SPEI calculation, see functions PeriodPET, +PeriodAccumulation and PeriodStandardization. } \examples{ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd index ab86f9a..b2dad6c 100644 --- a/man/PeriodStandardization.Rd +++ b/man/PeriodStandardization.Rd @@ -110,8 +110,9 @@ standarize and the result will be also filled with NAs for that coordinates. About the distribution used to fit the data, there are only two possibilities: 'log-logistic' and 'Gamma'. The 'Gamma' method only works when only precipitation is provided and other variables are 0 because it is positive -defined (SPI indicator). For more information about SPEI, see functions -PeriodPET and PeriodAccumulation. +defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +standardization is computed with cross validation. For more information about +SPEI, see functions PeriodPET and PeriodAccumulation. } \examples{ dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) -- GitLab From e00e1b6b31d7844c2d11b52aa462399350aa77b7 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 3 Nov 2023 14:48:56 +0100 Subject: [PATCH 30/42] Remove PearsonIII distribution of the documentation --- R/PeriodSPEI.R | 11 ++++++----- R/PeriodStandardization.R | 12 ++++++------ man/CST_PeriodSPEI.Rd | 7 ++++--- man/CST_PeriodStandardization.Rd | 6 +++--- man/PeriodSPEI.Rd | 4 ++-- man/PeriodStandardization.Rd | 6 +++--- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 77e90d5..38b58a2 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -77,8 +77,8 @@ #' (specified in 'leadtime_dim'); time dimension of length 1 (specified in #' 'time_dim'); and a dimension named 'coef' with the length of the #' coefficients needed for the used distribution (for 'Gamma' coef dimension is -#' of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). It can't -#' have member dimension (specified in 'memb_dim'). +#' of lenght 2, for 'log-Logistic' is of length 3). It can't have member +#' dimension (specified in 'memb_dim'). #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided @@ -101,7 +101,8 @@ #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other -#' variables are 0 because it is positive defined (SPI indicator). +#' variables are 0 because it is positive defined (SPI indicator). If other +#' distribution functions want to be used contact the authors. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). #'@param na.rm A logical value indicating whether NA values should be removed @@ -355,8 +356,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', #' to be of same time dimension (specified in 'time_dim') of 'exp' and a #' dimension named 'coef' with the length of the coefficients needed for the #' used distribution (for 'Gamma' coef dimension is of lenght 2, for -#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a -#' leadtime dimension (specified in 'leadtime_dim') of length 1. +#' 'log-Logistic' is of length 3). It also needs to have a leadtime dimension +#' (specified in 'leadtime_dim') of length 1. #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 966d648..d48ac63 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -44,9 +44,9 @@ #' parameters. It needs to be of same time dimensions (specified in 'time_dim' #' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length #' of the coefficients needed for the used distribution (for 'Gamma' coef -#' dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length -#' 3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') -#' of length 1. It will only be used if 'data_cor' is not provided. +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. #'@param handle_infinity A logical value wether to return infinite values (TRUE) #' or not (FALSE). When it is TRUE, the positive infinite values (negative #' infinite) are substituted by the maximum (minimum) values of each @@ -182,9 +182,9 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #' parameters. It needs to be of same time dimensions (specified in 'time_dim' #' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length #' of the coefficients needed for the used distribution (for 'Gamma' coef -#' dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length -#' 3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') -#' of length 1. It will only be used if 'data_cor' is not provided. +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. #'@param handle_infinity A logical value wether to return infinite values (TRUE) #' or not (FALSE). When it is TRUE, the positive infinite values (negative #' infinite) are substituted by the maximum (minimum) values of each diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 7b9aa77..9c2c357 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -71,8 +71,8 @@ to have the following dimensions: same leadtime dimension of 'exp' (specified in 'leadtime_dim'); time dimension of length 1 (specified in 'time_dim'); and a dimension named 'coef' with the length of the coefficients needed for the used distribution (for 'Gamma' coef dimension is -of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). It can't -have member dimension (specified in 'memb_dim').} +of lenght 2, for 'log-Logistic' is of length 3). It can't have member +dimension (specified in 'memb_dim').} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the @@ -101,7 +101,8 @@ default.} function to be used for computing the SPEI. The accepted names are: 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The 'Gamma' method only works when only precipitation is provided and other -variables are 0 because it is positive defined (SPI indicator).} +variables are 0 because it is positive defined (SPI indicator). If other +distribution functions want to be used contact the authors.} \item{handle_infinity}{A logical value wether to return Infinite values (TRUE) or not (FALSE).} diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd index 823f501..fe0dd3b 100644 --- a/man/CST_PeriodStandardization.Rd +++ b/man/CST_PeriodStandardization.Rd @@ -64,9 +64,9 @@ with named dimensions. This option overrides computation of fitting parameters. It needs to be of same time dimensions (specified in 'time_dim' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length of the coefficients needed for the used distribution (for 'Gamma' coef -dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length -3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') -of length 1. It will only be used if 'data_cor' is not provided.} +dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +will only be used if 'data_cor' is not provided.} \item{return_params}{A logical value indicating wether to return parameters array (TRUE) or not (FALSE). It is FALSE by default.} diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index c3ee473..21184d3 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -81,8 +81,8 @@ SPEI. This option overrides computation of fitting parameters. It needs to be of same time dimension (specified in 'time_dim') of 'exp' and a dimension named 'coef' with the length of the coefficients needed for the used distribution (for 'Gamma' coef dimension is of lenght 2, for -'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a -leadtime dimension (specified in 'leadtime_dim') of length 1.} +'log-Logistic' is of length 3). It also needs to have a leadtime dimension +(specified in 'leadtime_dim') of length 1.} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd index b2dad6c..94bbee1 100644 --- a/man/PeriodStandardization.Rd +++ b/man/PeriodStandardization.Rd @@ -67,9 +67,9 @@ with named dimensions. This option overrides computation of fitting parameters. It needs to be of same time dimensions (specified in 'time_dim' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length of the coefficients needed for the used distribution (for 'Gamma' coef -dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length -3). It also needs to have a leadtime dimension (specified in 'leadtime_dim') -of length 1. It will only be used if 'data_cor' is not provided.} +dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +will only be used if 'data_cor' is not provided.} \item{return_params}{A logical value indicating wether to return parameters array (TRUE) or not (FALSE). It is FALSE by default.} -- GitLab From 8608762c9b2f1da81e385400daaf77b996cc2dde Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 13 Nov 2023 15:37:49 +0100 Subject: [PATCH 31/42] Correct imports and remove unneeded dependencies --- NAMESPACE | 26 ++++++++++++++++++-------- R/PeriodAccumulation.R | 4 ++-- R/PeriodPET.R | 24 ++++++++++-------------- R/PeriodSPEI.R | 15 ++++----------- R/PeriodStandardization.R | 29 +++++++++++++++-------------- R/zzz.R | 16 +++++++++------- 6 files changed, 58 insertions(+), 56 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 49ec262..a15bdb0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,17 +31,27 @@ export(TotalSpellTimeExceedingThreshold) export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) -import(CSTools) -import(ClimProjDiags) -import(SPEI) -import(TLMoments) -import(lmom) -import(lmomco) -import(lubridate) import(multiApply) -import(zoo) +importFrom(CSTools,s2dv_cube) importFrom(ClimProjDiags,Subset) +importFrom(SPEI,hargreaves) +importFrom(SPEI,parglo.maxlik) +importFrom(SPEI,thornthwaite) +importFrom(lmom,cdfgam) +importFrom(lmom,cdfglo) +importFrom(lmom,cdfpe3) +importFrom(lmom,pelgam) +importFrom(lmom,pelglo) +importFrom(lmom,pelpe3) +importFrom(lmomco,are.lmom.valid) +importFrom(lmomco,pargam) +importFrom(lmomco,parglo) +importFrom(lmomco,parpe3) +importFrom(lmomco,pwm.pp) +importFrom(lmomco,pwm.ub) +importFrom(lmomco,pwm2lmom) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) importFrom(utils,read.delim) +importFrom(zoo,rollapply) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 09f4211..e573de9 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -204,7 +204,7 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' end = list(21, 10)) #' #'@import multiApply -#'@import zoo +#'@importFrom zoo rollapply #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, time_dim = 'time', rollwidth = NULL, @@ -322,7 +322,7 @@ data <- array(c(1,3,2,4), dim = c(time = 2, sdate = 2)) } } - data_accum <- rollapply(data = data_vector, width = rollwidth, FUN = sum, na.rm = na.rm) + data_accum <- zoo::rollapply(data = data_vector, width = rollwidth, FUN = sum, na.rm = na.rm) if (!forwardroll) { data_accum <- c(rep(NA, rollwidth-1), data_accum) } else { diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 87dc8ca..de20b2d 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -56,9 +56,7 @@ #'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) #'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) #' -#'@import SPEI -#'@import lubridate -#'@import multiApply +#'@importFrom CSTools s2dv_cube #'@export CST_PeriodPET <- function(data, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', @@ -163,8 +161,7 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) #'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) #' -#'@import SPEI -#'@import lubridate +#'@importFrom SPEI hargreaves thornthwaite #'@import multiApply #'@export PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', @@ -249,7 +246,7 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', if (is.null(dates)) { stop("Parameter 'dates' is missing, dates must be provided.") } - if (!(is.Date(dates)) & !(is.POSIXct(dates))) { + if (!any(inherits(dates, 'Date'), inherits(dates, 'POSIXct'))) { stop("Parameter 'dates' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } @@ -276,7 +273,6 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', # complete dates mask_dates <- .datesmask(dates) - lat_mask <- array(lat, dim = c(1, length(lat))) names(dim(lat_mask)) <- c('dat', lat_dim) @@ -368,22 +364,22 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', rm(data_tmp) } if (pet_method == 'hargreaves') { - pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = FALSE, verbose = FALSE) + pet <- SPEI::hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = FALSE, verbose = FALSE) # line to return the vector to the size of the actual original data pet <- array(pet[which(mask_dates == 1)], dim = dims) } if (pet_method == 'hargreaves_modified') { - pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, - verbose = FALSE) + pet <- SPEI::hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, + verbose = FALSE) pet <- array(pet[which(mask_dates == 1)], dim = dims) } if (pet_method == 'thornthwaite') { - pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, - verbose = FALSE) + pet <- SPEI::thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, + verbose = FALSE) # line to return the vector to the size of the actual original data pet <- array(pet[which(mask_dates == 1)], dim = dims) } diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 38b58a2..1f4ef4c 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -159,14 +159,7 @@ #' #'res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) #' -#'@import multiApply -#'@import ClimProjDiags -#'@import SPEI -#'@import zoo -#'@import TLMoments -#'@import lmomco -#'@import lubridate -#'@import CSTools +#'@importFrom CSTools s2dv_cube #'@export CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', @@ -545,7 +538,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, exp_cor = NULL, dates_expcor = NULL, if (is.null(dates_exp)) { stop("Parameter 'dates_exp' is missing, dates must be provided.") } - if (!(is.Date(dates_exp)) & !(is.POSIXct(dates_exp))) { + if (!any(inherits(dates_exp, 'Date'), inherits(dates_exp, 'POSIXct'))) { stop("Parameter 'dates_exp' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } @@ -563,7 +556,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, exp_cor = NULL, dates_expcor = NULL, stop("Parameter 'dates_expcor' is missing, dates for 'exp_cor' must be ", "provided if exp_cor is not NULL.") } - if (!(is.Date(dates_expcor)) & !(is.POSIXct(dates_expcor))) { + if (!any(inherits(dates_expcor, 'Date'), inherits(dates_expcor, 'POSIXct'))) { stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } @@ -590,7 +583,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, exp_cor = NULL, dates_expcor = NULL, if (!is.logical(na.rm)) { stop("Parameter 'na.rm' must be logical.") } - + # Complete dates dates <- .return2list(dates_exp, dates_expcor) diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index d48ac63..6a83ed4 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -226,9 +226,10 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'SPEI <- PeriodStandardization(data = data) #'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) #'@import multiApply -#'@import ClimProjDiags -#'@import TLMoments -#'@import lmom +#'@importFrom ClimProjDiags Subset +#'@importFrom lmomco pwm.pp pwm.ub pwm2lmom are.lmom.valid parglo pargam parpe3 +#'@importFrom lmom cdfglo cdfgam cdfpe3 pelglo pelgam pelpe3 +#'@importFrom SPEI parglo.maxlik #'@export PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, time_dim = 'syear', leadtime_dim = 'time', @@ -257,7 +258,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } ## dates if (!is.null(dates)) { - if (!(is.Date(dates)) & !(is.POSIXct(dates))) { + if (!any(inherits(dates, 'Date'), inherits(dates, 'POSIXct'))) { stop("Parameter 'dates' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } @@ -338,8 +339,8 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "It will not be used.") ref_period <- NULL } else { - years <- year(ClimProjDiags::Subset(dates, along = leadtime_dim, - indices = 1)) + years <- format(ClimProjDiags::Subset(dates, along = leadtime_dim, + indices = 1), "%Y") ref_period[[1]] <- which(ref_period[[1]] == years) ref_period[[2]] <- which(ref_period[[2]] == years) } @@ -619,22 +620,22 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, .std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { pwm = switch(fit, - 'pp-pwm' = pwm.pp(data, -0.35, 0, nmom = 3), - pwm.ub(data, nmom = 3) + 'pp-pwm' = lmomco::pwm.pp(data, -0.35, 0, nmom = 3), + lmomco::pwm.ub(data, nmom = 3) # TLMoments::PWM(data, order = 0:2) ) - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + lmom <- lmomco::pwm2lmom(pwm) + if (!any(!lmomco::are.lmom.valid(lmom), anyNA(lmom[[1]]), any(is.nan(lmom[[1]])))) { fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) params_result = switch(distribution, 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){parglo(lmom)$para}), + error = function(e){lmomco::parglo(lmom)$para}), 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){pargam(lmom)$para}), + error = function(e){lmomco::pargam(lmom)$para}), 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){parpe3(lmom)$para})) + error = function(e){lmomco::parpe3(lmom)$para})) if (distribution == 'log-Logistic' && fit == 'max-lik') { - params_result = parglo.maxlik(data, params_result)$para + params_result = SPEI::parglo.maxlik(data, params_result)$para } return(params_result) } else { diff --git a/R/zzz.R b/R/zzz.R index 4c8d0a4..b108beb 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -110,16 +110,18 @@ wind2CF <- function(wind, pc) { # Function that creates a mask array from dates for the whole year .datesmask <- function(dates, frequency = 'monthly') { - ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) - end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) - daily <- as.Date(ini:end) + years <- format(dates, "%Y") + ini <- as.Date(paste(min(years), 01, 01, sep = '-')) + end <- as.Date(paste(max(years), 12, 31, sep = '-')) + daily <- as.Date(seq(ini, end, by = "day")) if (frequency == 'monthly') { - monthly <- daily[which(lubridate::day(daily) == 1)] + days <- format(daily, "%d") + monthly <- daily[which(days == 1)] dates_mask <- array(0, dim = length(monthly)) for (dd in 1:length(dates)) { - ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), - lubridate::month(dates[dd]), - 01, sep = '-'))) + year <- format(dates[dd], "%Y") + month <- format(dates[dd], "%m") + ii <- which(monthly == as.Date(paste(year, month, 01, sep = '-'))) dates_mask[ii] <- 1 } } else { -- GitLab From 86547ee1de285d565c421ff3881b8a2bce1b6b83 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 15 Nov 2023 16:41:14 +0100 Subject: [PATCH 32/42] Remove function PeriodSPEI --- NAMESPACE | 2 - R/PeriodSPEI.R | 635 ------------------------------------------ man/CST_PeriodSPEI.Rd | 209 -------------- man/PeriodSPEI.Rd | 209 -------------- 4 files changed, 1055 deletions(-) delete mode 100644 R/PeriodSPEI.R delete mode 100644 man/CST_PeriodSPEI.Rd delete mode 100644 man/PeriodSPEI.Rd diff --git a/NAMESPACE b/NAMESPACE index a15bdb0..9b14b18 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,7 +8,6 @@ export(CST_MergeRefToExp) export(CST_PeriodAccumulation) export(CST_PeriodMean) export(CST_PeriodPET) -export(CST_PeriodSPEI) export(CST_PeriodStandardization) export(CST_QThreshold) export(CST_SelectPeriodOnData) @@ -21,7 +20,6 @@ export(MergeRefToExp) export(PeriodAccumulation) export(PeriodMean) export(PeriodPET) -export(PeriodSPEI) export(PeriodStandardization) export(QThreshold) export(SelectPeriodOnData) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R deleted file mode 100644 index 1f4ef4c..0000000 --- a/R/PeriodSPEI.R +++ /dev/null @@ -1,635 +0,0 @@ -#'Compute the Standardised Precipitation-Evapotranspiration Index -#' -#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) -#'that is a multiscalar drought index based on climatic data. It can be used for -#'determining the onset, duration and magnitude of drought conditions with -#'respect to normal conditions. The idea behind the SPEI is to compare the -#'highest possible evapotranspiration with the current water availability. The -#'SPEI uses for a specific time frequency the difference between precipitation -#'and potential evapotranspiration. This represents a simple climatic water -#'balance which is calculated at different time scales to obtain the SPEI. This -#'function is build to be compatible with other tools in that work with -#''s2dv_cube' object class. The input data must be this object class. If you -#'don't work with 's2dv_cube', see PeriodSPEI. -#' -#'Next, some specifications for the calculation of this indicator will be -#'discussed. On the one hand, the model to be used to calculate Potential -#'Evapotranspiration is specified with the 'pet_method' parameter (it can be -#'hargreaves, hargraves modified or thornwhite). On the other hand, to choose -#'the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is -#'done using the 'accum' parameter, where you must indicate the number of time -#'steps you want to accumulate throughout 'leadtime_dim'. The accumulation will -#'be performed backwards by default. Since the accumulation is done for the -#'elapsed time steps, there will be no complete accumulations until reaching the -#'time instant equal to the value of the 'accum' parameter. For this reason, in -#'the result, we will find that for the dimension where the accumulation has -#'been carried out, the values of the array will be NA since they do not include -#'complete accumulations. Also, there is a parameter to specify if the -#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a -#'probability distribution to transform the original values to standardized -#'units that are comparable in space and time and at different SPEI time scales. -#'The 'na.rm' parameter is a logical parameter used to decide whether to remove -#'the NA values from the data before doing the calculation. It must be taken -#'into account that if 'na.rm' is FALSE and there is some NA value in the -#'specific coordinates which the SPEI is computed, standardization cannot be -#'carried out for those coordinates and therefore, the result will be filled -#'with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the -#'amount of data for those specific coordinates is smaller than 4, it will not -#'be possible to carry out because we will not have enough data and the result -#'will be also filled with NAs for that coordinates. When only 'exp' is provided -#'('exp_cor' is NULL) the Standardization is computed with cross validation. For -#'more information about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation -#'and CST_PeriodStandardization. -#' -#'@param exp A named list with the needed \code{s2dv_cube} objects containing -#' the seasonal forecast experiment in the data element for each variable. -#' Specific variables are needed for each method used in computing the -#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -#' Variable 'pr' is always needed. The units for temperature variables -#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -#' precipitation ('pr') need to be in mm/month. -#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each -#' variable in which the quantile PeriodSPEI should be applied. If it is not -#' specified, the PeriodSPEI is calculated from object 'exp'. -#'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'syear'. -#'@param leadtime_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'time'. -#'@param memb_dim A character string indicating the name of the dimension in -#' which the ensemble members are stored. When set it to NULL, threshold is -#' computed for individual members. -#'@param lat_dim A character string indicating the name of the latitudinal -#' dimension. By default it is set by 'latitude'. -#'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. When it is greater than 1, the result will -#' be filled with NA until the accum 'time_dim' dimension number due to the -#' accumulation to previous months. The accumulation is performed backwards -#' by default. -#'@param ref_period A list with two numeric values with the starting and end -#' points of the reference period used for computing the index. The default -#' value is NULL indicating that the first and end values in data will be -#' used as starting and end points. -#'@param params A multidimensional array with named dimensions for computing the -#' SPEI. This option overrides computation of fitting parameters. It needs -#' to have the following dimensions: same leadtime dimension of 'exp' -#' (specified in 'leadtime_dim'); time dimension of length 1 (specified in -#' 'time_dim'); and a dimension named 'coef' with the length of the -#' coefficients needed for the used distribution (for 'Gamma' coef dimension is -#' of lenght 2, for 'log-Logistic' is of length 3). It can't have member -#' dimension (specified in 'memb_dim'). -#'@param pet_exp A multidimensional array containing the Potential -#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the -#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided -#' variables with specified 'pet_method'. It is NULL by default. -#'@param pet_expcor A multidimensional array containing the Potential -#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the -#' provided variables with specified 'pet_method'. It is NULL by default. -#'@param standardization A logical value indicating wether the standardization -#' is computed. -#'@param pet_method A character string indicating the method used to compute -#' the potential evapotranspiration. The accepted methods are: -#' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tmin and tmax; and 'thornthwaite', that requires variable -#' 'tmean'. -#'@param method A character string indicating the standardization method used. -#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by -#' default. -#'@param distribution A character string indicating the name of the distribution -#' function to be used for computing the SPEI. The accepted names are: -#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The -#' 'Gamma' method only works when only precipitation is provided and other -#' variables are 0 because it is positive defined (SPI indicator). If other -#' distribution functions want to be used contact the authors. -#'@param handle_infinity A logical value wether to return Infinite values (TRUE) -#' or not (FALSE). -#'@param na.rm A logical value indicating whether NA values should be removed -#' from data. It is FALSE by default. If it is FALSE and there are NA values, -#' (if standardization is TRUE) all values of other dimensions except time_dim -#' and leadtime_dim will be set to NA directly. On the other hand, if it is -#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is -#' not reaching 4 values, it is not enough values to estimate the parameters -#' and the result will include NA. -#'@param ncores An integer value indicating the number of cores to use in -#' parallel computation. -#' -#'@return An 's2dv_cube' object containing the SPEI multidimensional array in -#'element \code{data} with same dimensions as input data. If 'exp_cor' is -#'provided, only results from 'exp_cor' will be provided. The parameters of the -#'standardization will only be returned if 'return_params' is TRUE. The SPEI -#'will only be computed if 'standardization' is TRUE. If 'standardization' is -#'FALSE, only the climatic water balance (precipitation minus -#'evapotranspiration) will be returned. The resultant arrays will have the same -#'dimensions as the initial input data. The other elements in the 's2dv_cube' -#'will be updated with the combined information of the input data arrays. -#' -#'@examples -#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, -#' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, -#' latitude = 2, longitude = 1, ensemble = 15) -#' -#'dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), -#' paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16")), 'UTC') -#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), -#' paste0(2020, "-10-16")), 'UTC') -#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) -#'lat <- c(40,40.1) -#' -#'exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -#'exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -#'exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) -#' -#'expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -#'expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -#'expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) -#' -#'exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) -#'exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, -#' 'pr' = expcor_pr) -#' -#'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), -#' Dates = dates_exp) -#'exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), -#' Dates = dates_expcor) -#' -#'res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) -#' -#'@importFrom CSTools s2dv_cube -#'@export -CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', - leadtime_dim = 'time', memb_dim = 'ensemble', - lat_dim = 'latitude', accum = 1, ref_period = NULL, - params = NULL, pet_exp = NULL, pet_expcor = NULL, - standardization = TRUE, pet_method = 'hargreaves', - method = 'parametric', distribution = 'log-Logistic', - handle_infinity = FALSE, return_params = FALSE, - na.rm = FALSE, ncores = NULL) { - - # Check 's2dv_cube' - if (is.null(exp)) { - stop("Parameter 'exp' cannot be NULL.") - } - if (!all(sapply(exp, function(x) inherits(x, 's2dv_cube')))) { - stop("Parameter 'exp' must be a list of 's2dv_cube' class.") - } - if (!is.null(exp_cor)) { - if (!all(sapply(exp_cor, function(x) inherits(x, 's2dv_cube')))) { - stop("Parameter 'exp_cor' must be a list of 's2dv_cube' class.") - } - } - # latitude - if (!any(names(exp[[1]]$coords) %in% .KnownLatNames())) { - stop("Spatial coordinate names of parameter 'exp' do not match any ", - "of the names accepted by the package.") - } - # Dates - dates_exp <- exp[[1]]$attrs$Dates - if (!'Dates' %in% names(exp[[1]]$attrs)) { - stop("Element 'Dates' is not found in 'attrs' list of 'exp'. ", - "See 's2dv_cube' object description in README file for more ", - "information.") - } - - if (!is.null(exp_cor)) { - if (!'Dates' %in% names(exp_cor[[1]]$attrs)) { - stop("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", - "See 's2dv_cube' object description in README file for more ", - "information.") - } - } - - lat_dim <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] - - res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), - dates_exp = exp[[1]]$attrs$Dates, - lat = exp[[1]]$coords[[lat_dim]], - exp_cor = if (is.null(exp_cor)) {NULL} else { - lapply(exp_cor, function(x) x$data)}, - dates_expcor = exp_cor[[1]]$attrs$Dates, - time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, lat_dim = lat_dim, - accum = accum, ref_period = ref_period, params = params, - pet_exp = pet_exp, pet_expcor = pet_expcor, - standardization = standardization, - pet_method = pet_method, method = method, - distribution = distribution, - handle_infinity = handle_infinity, - return_params = return_params, na.rm = na.rm, - ncores = ncores) - - if (!is.null(exp_cor)) { - source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) - source_files <- lapply(exp, function(x) {x$attrs$source_files}) - source_files <- c(exp = source_files, exp_cor = source_files_expcor) - coords <- exp_cor[[1]]$coords - Dates <- exp_cor[[1]]$attrs$Dates - metadata <- exp_cor[[1]]$attrs$Variable$metadata - metadata_names <- names(metadata) - } else { - source_files <- lapply(exp, function(x) {x$attrs$source_files}) - coords <- exp[[1]]$coords - Dates <- exp[[1]]$attrs$Dates - metadata <- exp[[1]]$attrs$Variable$metadata - metadata_names <- names(metadata) - } - - if (standardization) { - varname <- 'SPEI' - } else { - varname <- 'Precipitation minus accumulated PET' - } - - if (return_params & standardization) { - metadata_names <- intersect(names(dim(res[[1]])), metadata_names) - suppressWarnings( - res[[1]] <- CSTools::s2dv_cube(data = res[[1]], coords = coords, - varName = varname, - metadata = metadata[metadata_names], - Dates = Dates, - source_files = source_files, - when = Sys.time()) - ) - return(list(spei = res[[1]], params = res[[2]])) - } else { - metadata_names <- intersect(names(dim(res)), metadata_names) - suppressWarnings( - res <- CSTools::s2dv_cube(data = res, coords = coords, - varName = varname, - metadata = metadata[metadata_names], - Dates = Dates, - source_files = source_files, - when = Sys.time()) - ) - return(res) - } -} - - -#'Compute the Standardised Precipitation-Evapotranspiration Index -#' -#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) -#'that is a multiscalar drought index based on climatic data. It can be used for -#'determining the onset, duration and magnitude of drought conditions with -#'respect to normal conditions. The idea behind the SPEI is to compare the -#'highest possible evapotranspiration with the current water availability. The -#'SPEI uses for a specific time frequency the difference between precipitation -#'and potential evapotranspiration. -#' -#'Next, some specifications for the calculation of this indicator will be -#'discussed. On the one hand, the model to be used to calculate Potential -#'Evapotranspiration is specified with the 'pet_method' parameter (it can be -#'hargreaves, hargraves modified or thornwhite). On the other hand, to choose -#'the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is -#'done using the 'accum' parameter, where you must indicate the number of time -#'steps you want to accumulate throughout 'leadtime_dim'. The accumulation will -#'be performed backwards by default. Since the accumulation is done for the -#'elapsed time steps, there will be no complete accumulations until reaching the -#'time instant equal to the value of the 'accum' parameter. For this reason, in -#'the result, we will find that for the dimension where the accumulation has -#'been carried out, the values of the array will be NA since they do not include -#'complete accumulations. Also, there is a parameter to specify if the -#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a -#'probability distribution to transform the original values to standardized -#'units that are comparable in space and time and at different SPEI time scales. -#'The 'na.rm' parameter is a logical parameter used to decide whether to remove -#'the NA values from the data before doing the calculation. It must be taken -#'into account that if 'na.rm' is FALSE and there is some NA value in the -#'specific coordinates which the SPEI is computed, standardization cannot be -#'carried out for those coordinates and therefore, the result will be filled -#'with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the -#'amount of data for those specific coordinates is smaller than 4, it will not -#'be possible to carry out because we will not have enough data and the result -#'will be also filled with NAs for that coordinates. When only 'exp' is provided -#'('exp_cor' is NULL) the Standardization is computed with cross validation. For -#'more information about SPEI calculation, see functions PeriodPET, -#'PeriodAccumulation and PeriodStandardization. -#' -#'@param exp A named list with multidimensional array objects containing -#' the seasonal forecast experiment in the data element for each variable. -#' Specific variables are needed for each method used in computing the -#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -#' Variable 'pr' is always needed. The units for temperature variables -#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -#' precipitation ('pr') need to be in mm/month. -#'@param dates_exp An array of temporal dimensions containing the Dates of -#' 'exp'. It must be of class 'Date' or 'POSIXct'. -#'@param lat A numeric vector containing the latitude values of 'exp'. -#'@param exp_cor A named list with multidimensional array objects for each -#' variable in which the quantile PeriodSPEI should be applied. If it is not -#' specified, the PeriodSPEI is calculated from object 'exp'. -#'@param dates_expcor An array of temporal dimensions containing the Dates of -#' 'exp_cor'. It must be of class 'Date' or 'POSIXct'. -#'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'syear'. -#'@param leadtime_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'time'. -#'@param memb_dim A character string indicating the name of the dimension in -#' which the ensemble members are stored. When set it to NULL, threshold is -#' computed for individual members. -#'@param lat_dim A character string indicating the name of the latitudinal -#' dimension. By default it is set by 'latitude'. -#'@param accum accum An integer value indicating the number of months for the -#' accumulation for each variable. When it is greater than 1, the result will -#' be filled with NA until the accum time_dim dimension number due to the -#' accumulation to previous months. The accumulation is performed backwards -#' by default. -#'@param ref_period A list with two numeric values with the starting and end -#' points of the reference period used for computing the index. The default -#' value is NULL indicating that the first and end values in data will be -#' used as starting and end points. -#'@param params A multidimensional array with named dimensions for computing the -#' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same time dimension (specified in 'time_dim') of 'exp' and a -#' dimension named 'coef' with the length of the coefficients needed for the -#' used distribution (for 'Gamma' coef dimension is of lenght 2, for -#' 'log-Logistic' is of length 3). It also needs to have a leadtime dimension -#' (specified in 'leadtime_dim') of length 1. -#'@param pet_exp A multidimensional array containing the Potential -#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the -#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided -#' variables with specified 'pet_method'. It is NULL by default. -#'@param pet_expcor A multidimensional array containing the Potential -#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the -#' provided variables with specified 'pet_method'. It is NULL by default. -#'@param standardization A logical value indicating wether the standardization -#' is computed. -#'@param pet_method A character string indicating the method used to compute -#' the potential evapotranspiration. The accepted methods are: -#' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tmin and tmax; and 'thornthwaite', that requires variable -#' 'tmean'. -#'@param method A character string indicating the standardization method used. -#' If can be: 'parametric' or 'non-parametric'. -#'@param distribution A character string indicating the name of the distribution -#' function to be used for computing the SPEI. The accepted names are: -#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The -#' 'Gamma' method only works when only precipitation is provided and other -#' variables are 0 because it is positive defined (SPI indicator). -#'@param handle_infinity A logical value wether to return Infinite values (TRUE) -#' or not (FALSE). -#'@param return_params A logical value indicating wether to return parameters -#' array (TRUE) or not (FALSE). It is FALSE by default. -#'@param na.rm A logical value indicating whether NA values should be removed -#' from data. It is FALSE by default. If it is FALSE and there are NA values, -#' (if standardization is TRUE) all values of other dimensions except time_dim -#' and leadtime_dim will be set to NA directly. On the other hand, if it is -#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is -#' not reaching 4 values, it is not enough values to estimate the parameters -#' and the result will include NA. -#'@param ncores An integer value indicating the number of cores to use in -#' parallel computation. -#' -#'@return A multidimensional array containing the SPEI with same dimensions as -#'input data. If 'exp_cor' is provided, only results from 'exp_cor' will be -#'provided. The parameters of the standardization will only be returned -#'if 'return_params' is TRUE. The SPEI will only be computed if -#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic -#'water balance (precipitation minus evapotranspiration) will be returned. The -#'resultant arrays will have the same dimensions as the initial input data. -#' -#'@examples -#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, -#' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, -#' latitude = 2, longitude = 1, ensemble = 15) -#'exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -#'exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -#'exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) -#' -#'expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -#'expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -#'expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) -#' -#'dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), -#' paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16"))) -#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), -#' paste0(2020, "-09-15"), -#' paste0(2020, "-10-16")), 'UTC') -#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) -#'lat <- c(40,40.1) -#' -#'exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) -#'exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, -#' 'pr' = expcor_pr) -#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, -#' dates_exp = dates_exp, dates_expcor = dates_expcor) -#' -#'@export -PeriodSPEI <- function(exp, dates_exp, lat, exp_cor = NULL, dates_expcor = NULL, - time_dim = 'syear', leadtime_dim = 'time', - memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, ref_period = NULL, params = NULL, - pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, - pet_method = 'hargreaves', method = 'parametric', - distribution = 'log-Logistic', handle_infinity = FALSE, - return_params = FALSE, na.rm = FALSE, ncores = NULL) { - - # Initial checks - ## exp - if (!inherits(exp, 'list')) { - stop("Parameter 'exp' needs to be a named list with the needed variables.") - } - if (is.null(names(exp))) { - stop("Parameter 'exp' needs to be a named list with the variable names.") - } - if (any(sapply(exp, function(x) is.null(names(dim(x)))))) { - stop("Parameter 'exp' needs to be a list of arrays with dimension names.") - } - dims <- lapply(exp, function(x) dim(x)) - first_dims <- dims[[1]] - all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) - if (!all_equal) { - stop("Parameter 'exp' variables need to have the same dimensions.") - } - - ## exp_cor - if (!is.null(exp_cor)) { - if (!inherits(exp_cor, 'list')) { - stop("Parameter 'exp_cor' needs to be a named list with the needed ", - "variables if it is not NULL.") - } - if (is.null(names(exp_cor))) { - stop("Parameter 'exp_cor' needs to be a named list with the variable names.") - } - if (any(sapply(exp_cor, function(x) is.null(names(dim(x)))))) { - stop("Parameter 'exp_cor' needs to be a list of arrays with dimension names.") - } - dimscor <- lapply(exp_cor, function(x) dim(x)) - first_dimscor <- dimscor[[1]] - all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dimscor, x))) - if (!all_equal) { - stop("Parameter 'exp_cor' variables need to have the same dimensions.") - } - } - # Variable checks - ## exp (2) - pet <- vector("list", 2) - if (!('pr' %in% names(exp))) { - stop("Variable 'pr' is not included in 'exp'.") - } - ## exp_cor (2) - if (!is.null(exp_cor)) { - if (!('pr' %in% names(exp_cor))) { - stop("Variable 'pr' is not included in 'exp_cor'.") - } - if (length(pet_method) == 1) { - pet_method <- rep(pet_method, 2) - } - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!all(sapply(exp, function(x) time_dim %in% names(dim(x))))) { - stop("Parameter 'time_dim' is not found in 'exp' dimension.") - } - if (!is.null(exp_cor)) { - if (!all(sapply(exp_cor, function(x) time_dim %in% names(dim(x))))) { - stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") - } - } - ## leadtime_dim - if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { - stop("Parameter 'leadtime_dim' must be a character string.") - } - if (!all(sapply(exp, function(x) leadtime_dim %in% names(dim(x))))) { - stop("Parameter 'leadtime_dim' is not found in 'exp' dimension.") - } - if (!is.null(exp_cor)) { - if (!all(sapply(exp_cor, function(x) leadtime_dim %in% names(dim(x))))) { - stop("Parameter 'leadtime_dim' is not found in 'exp_cor' dimension.") - } - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) != 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!all(sapply(exp, function(x) memb_dim %in% names(dim(x))))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (!is.null(exp_cor)) { - if (!all(sapply(exp_cor, function(x) memb_dim %in% names(dim(x))))) { - stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") - } - } - ## lat_dim - if (!is.character(lat_dim) | length(lat_dim) != 1) { - stop("Parameter 'lat_dim' must be a character string.") - } - if (!all(sapply(exp, function(x) lat_dim %in% names(dim(x))))) { - stop("Parameter 'lat_dim' is not found in 'exp' dimension.") - } - if (!is.null(exp_cor)) { - if (!all(sapply(exp_cor, function(x) lat_dim %in% names(dim(x))))) { - stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") - } - } - ## dates - if (is.null(dates_exp)) { - stop("Parameter 'dates_exp' is missing, dates must be provided.") - } - if (!any(inherits(dates_exp, 'Date'), inherits(dates_exp, 'POSIXct'))) { - stop("Parameter 'dates_exp' is not of the correct class, ", - "only 'Date' and 'POSIXct' classes are accepted.") - } - if (!time_dim %in% names(dim(dates_exp)) | !leadtime_dim %in% names(dim(dates_exp))) { - stop("Parameter 'dates_exp' must have 'time_dim' and 'leadtime_dim' ", - "dimension.") - } - if (!all(dim(exp[[1]])[c(time_dim, leadtime_dim)] == - dim(dates_exp)[c(time_dim, leadtime_dim)])) { - stop("Parameter 'dates_exp' needs to have the same length as 'time_dim' ", - "and 'leadtime_dim' as 'exp'.") - } - if (!is.null(exp_cor)) { - if (is.null(dates_expcor)) { - stop("Parameter 'dates_expcor' is missing, dates for 'exp_cor' must be ", - "provided if exp_cor is not NULL.") - } - if (!any(inherits(dates_expcor, 'Date'), inherits(dates_expcor, 'POSIXct'))) { - stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", - "only 'Date' and 'POSIXct' classes are accepted.") - } - if (!time_dim %in% names(dim(dates_expcor)) | !leadtime_dim %in% names(dim(dates_expcor))) { - stop("Parameter 'dates_expcor' must have 'time_dim' and 'leadtime_dim' ", - "dimension.") - } - if (!all(dim(exp_cor[[1]])[c(time_dim, leadtime_dim)] == - dim(dates_expcor)[c(time_dim, leadtime_dim)])) { - stop("Parameter 'dates_expcor' needs to have the same length as ", - "'time_dim' and 'leadtime_dim' as 'exp_cor'.") - } - } - ## accum - if (accum > dim(exp[[1]])[leadtime_dim]) { - stop(paste0("Cannot compute accumulation of ", accum, " months because ", - "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) - } - ## standardization - if (!is.logical(standardization)) { - stop("Parameter 'standardization' must be a logical value.") - } - ## na.rm - if (!is.logical(na.rm)) { - stop("Parameter 'na.rm' must be logical.") - } - - # Complete dates - dates <- .return2list(dates_exp, dates_expcor) - - # Compute PeriodSPEI - k = 0 - spei_res <- NULL - computed_pet <- FALSE - accumulated <- list(NULL, NULL) - for (data in .return2list(exp, exp_cor)) { - k = k + 1 - # Evapotranspiration estimation (unless pet is already provided) - if (is.null(pet[[k]]) | computed_pet) { - pet[[k]] <- PeriodPET(data = data, dates = dates[[k]], - lat = lat, pet_method = pet_method[k], - time_dim = time_dim, leadtime_dim = leadtime_dim, - lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) - computed_pet <- TRUE - } - if (!na.rm & any(is.na(pet[[k]]))) { - mask_na <- which(is.na(pet[[k]])) - warning("There are NAs in PET.") - } - - # Accumulation - diff_p_pet <- data$pr - pet[[k]] - accumulated[[k]] <- PeriodAccumulation(data = diff_p_pet, dates = dates[[k]], - rollwidth = accum, time_dim = leadtime_dim, - sdate_dim = time_dim, frequency = 'monthly', - ncores = ncores) - } - - if (standardization) { - # Standardization - spei <- PeriodStandardization(data = accumulated[[1]], - data_cor = accumulated[[2]], - dates = dates[[1]], - params = params, - time_dim = time_dim, - leadtime_dim = leadtime_dim, - memb_dim = memb_dim, ref_period = ref_period, - handle_infinity = handle_infinity, - return_params = return_params, - method = method, distribution = distribution, - na.rm = TRUE, ncores = ncores) - } else { - spei <- accumulated[[k]] - } - return(spei) -} \ No newline at end of file diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd deleted file mode 100644 index 9c2c357..0000000 --- a/man/CST_PeriodSPEI.Rd +++ /dev/null @@ -1,209 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/PeriodSPEI.R -\name{CST_PeriodSPEI} -\alias{CST_PeriodSPEI} -\title{Compute the Standardised Precipitation-Evapotranspiration Index} -\usage{ -CST_PeriodSPEI( - exp, - exp_cor = NULL, - time_dim = "syear", - leadtime_dim = "time", - memb_dim = "ensemble", - lat_dim = "latitude", - accum = 1, - ref_period = NULL, - params = NULL, - pet_exp = NULL, - pet_expcor = NULL, - standardization = TRUE, - pet_method = "hargreaves", - method = "parametric", - distribution = "log-Logistic", - handle_infinity = FALSE, - return_params = FALSE, - na.rm = FALSE, - ncores = NULL -) -} -\arguments{ -\item{exp}{A named list with the needed \code{s2dv_cube} objects containing -the seasonal forecast experiment in the data element for each variable. -Specific variables are needed for each method used in computing the -Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -Variable 'pr' is always needed. The units for temperature variables -('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -precipitation ('pr') need to be in mm/month.} - -\item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each -variable in which the quantile PeriodSPEI should be applied. If it is not -specified, the PeriodSPEI is calculated from object 'exp'.} - -\item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'syear'.} - -\item{leadtime_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'time'.} - -\item{memb_dim}{A character string indicating the name of the dimension in -which the ensemble members are stored. When set it to NULL, threshold is -computed for individual members.} - -\item{lat_dim}{A character string indicating the name of the latitudinal -dimension. By default it is set by 'latitude'.} - -\item{accum}{An integer value indicating the number of months for the -accumulation for each variable. When it is greater than 1, the result will -be filled with NA until the accum 'time_dim' dimension number due to the -accumulation to previous months. The accumulation is performed backwards -by default.} - -\item{ref_period}{A list with two numeric values with the starting and end -points of the reference period used for computing the index. The default -value is NULL indicating that the first and end values in data will be -used as starting and end points.} - -\item{params}{A multidimensional array with named dimensions for computing the -SPEI. This option overrides computation of fitting parameters. It needs -to have the following dimensions: same leadtime dimension of 'exp' -(specified in 'leadtime_dim'); time dimension of length 1 (specified in -'time_dim'); and a dimension named 'coef' with the length of the -coefficients needed for the used distribution (for 'Gamma' coef dimension is -of lenght 2, for 'log-Logistic' is of length 3). It can't have member -dimension (specified in 'memb_dim').} - -\item{pet_exp}{A multidimensional array containing the Potential -EvapoTranspiration data of 'exp'. It must have the same dimensions of the -variable 'pr' of 'exp'. If it is NULL it is calculated using the provided -variables with specified 'pet_method'. It is NULL by default.} - -\item{pet_expcor}{A multidimensional array containing the Potential -EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the -provided variables with specified 'pet_method'. It is NULL by default.} - -\item{standardization}{A logical value indicating wether the standardization -is computed.} - -\item{pet_method}{A character string indicating the method used to compute -the potential evapotranspiration. The accepted methods are: -'hargreaves' and 'hargreaves_modified', that require the data to have -variables tmin and tmax; and 'thornthwaite', that requires variable -'tmean'.} - -\item{method}{A character string indicating the standardization method used. -If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by -default.} - -\item{distribution}{A character string indicating the name of the distribution -function to be used for computing the SPEI. The accepted names are: -'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The -'Gamma' method only works when only precipitation is provided and other -variables are 0 because it is positive defined (SPI indicator). If other -distribution functions want to be used contact the authors.} - -\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) -or not (FALSE).} - -\item{na.rm}{A logical value indicating whether NA values should be removed -from data. It is FALSE by default. If it is FALSE and there are NA values, -(if standardization is TRUE) all values of other dimensions except time_dim -and leadtime_dim will be set to NA directly. On the other hand, if it is -TRUE, if the data from other dimensions except time_dim and leadtime_dim is -not reaching 4 values, it is not enough values to estimate the parameters -and the result will include NA.} - -\item{ncores}{An integer value indicating the number of cores to use in -parallel computation.} -} -\value{ -An 's2dv_cube' object containing the SPEI multidimensional array in -element \code{data} with same dimensions as input data. If 'exp_cor' is -provided, only results from 'exp_cor' will be provided. The parameters of the -standardization will only be returned if 'return_params' is TRUE. The SPEI -will only be computed if 'standardization' is TRUE. If 'standardization' is -FALSE, only the climatic water balance (precipitation minus -evapotranspiration) will be returned. The resultant arrays will have the same -dimensions as the initial input data. The other elements in the 's2dv_cube' -will be updated with the combined information of the input data arrays. -} -\description{ -Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) -that is a multiscalar drought index based on climatic data. It can be used for -determining the onset, duration and magnitude of drought conditions with -respect to normal conditions. The idea behind the SPEI is to compare the -highest possible evapotranspiration with the current water availability. The -SPEI uses for a specific time frequency the difference between precipitation -and potential evapotranspiration. This represents a simple climatic water -balance which is calculated at different time scales to obtain the SPEI. This -function is build to be compatible with other tools in that work with -'s2dv_cube' object class. The input data must be this object class. If you -don't work with 's2dv_cube', see PeriodSPEI. -} -\details{ -Next, some specifications for the calculation of this indicator will be -discussed. On the one hand, the model to be used to calculate Potential -Evapotranspiration is specified with the 'pet_method' parameter (it can be -hargreaves, hargraves modified or thornwhite). On the other hand, to choose -the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is -done using the 'accum' parameter, where you must indicate the number of time -steps you want to accumulate throughout 'leadtime_dim'. The accumulation will -be performed backwards by default. Since the accumulation is done for the -elapsed time steps, there will be no complete accumulations until reaching the -time instant equal to the value of the 'accum' parameter. For this reason, in -the result, we will find that for the dimension where the accumulation has -been carried out, the values of the array will be NA since they do not include -complete accumulations. Also, there is a parameter to specify if the -standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a -probability distribution to transform the original values to standardized -units that are comparable in space and time and at different SPEI time scales. -The 'na.rm' parameter is a logical parameter used to decide whether to remove -the NA values from the data before doing the calculation. It must be taken -into account that if 'na.rm' is FALSE and there is some NA value in the -specific coordinates which the SPEI is computed, standardization cannot be -carried out for those coordinates and therefore, the result will be filled -with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the -amount of data for those specific coordinates is smaller than 4, it will not -be possible to carry out because we will not have enough data and the result -will be also filled with NAs for that coordinates. When only 'exp' is provided -('exp_cor' is NULL) the Standardization is computed with cross validation. For -more information about SPEI calculation, see functions CST_PeriodPET, CST_PeriodAccumulation -and CST_PeriodStandardization. -} -\examples{ -dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, - latitude = 2, longitude = 1, ensemble = 25) -dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, - latitude = 2, longitude = 1, ensemble = 15) - -dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), - paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16")), 'UTC') -dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), - paste0(2020, "-10-16")), 'UTC') -dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) -lat <- c(40,40.1) - -exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) - -expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) - -exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) -exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, - 'pr' = expcor_pr) - -exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), - Dates = dates_exp) -exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), - Dates = dates_expcor) - -res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) - -} diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd deleted file mode 100644 index 21184d3..0000000 --- a/man/PeriodSPEI.Rd +++ /dev/null @@ -1,209 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/PeriodSPEI.R -\name{PeriodSPEI} -\alias{PeriodSPEI} -\title{Compute the Standardised Precipitation-Evapotranspiration Index} -\usage{ -PeriodSPEI( - exp, - dates_exp, - lat, - exp_cor = NULL, - dates_expcor = NULL, - time_dim = "syear", - leadtime_dim = "time", - memb_dim = "ensemble", - lat_dim = "latitude", - accum = 1, - ref_period = NULL, - params = NULL, - pet_exp = NULL, - pet_expcor = NULL, - standardization = TRUE, - pet_method = "hargreaves", - method = "parametric", - distribution = "log-Logistic", - handle_infinity = FALSE, - return_params = FALSE, - na.rm = FALSE, - ncores = NULL -) -} -\arguments{ -\item{exp}{A named list with multidimensional array objects containing -the seasonal forecast experiment in the data element for each variable. -Specific variables are needed for each method used in computing the -Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -Variable 'pr' is always needed. The units for temperature variables -('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for -precipitation ('pr') need to be in mm/month.} - -\item{dates_exp}{An array of temporal dimensions containing the Dates of -'exp'. It must be of class 'Date' or 'POSIXct'.} - -\item{lat}{A numeric vector containing the latitude values of 'exp'.} - -\item{exp_cor}{A named list with multidimensional array objects for each -variable in which the quantile PeriodSPEI should be applied. If it is not -specified, the PeriodSPEI is calculated from object 'exp'.} - -\item{dates_expcor}{An array of temporal dimensions containing the Dates of -'exp_cor'. It must be of class 'Date' or 'POSIXct'.} - -\item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'syear'.} - -\item{leadtime_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'time'.} - -\item{memb_dim}{A character string indicating the name of the dimension in -which the ensemble members are stored. When set it to NULL, threshold is -computed for individual members.} - -\item{lat_dim}{A character string indicating the name of the latitudinal -dimension. By default it is set by 'latitude'.} - -\item{accum}{accum An integer value indicating the number of months for the -accumulation for each variable. When it is greater than 1, the result will -be filled with NA until the accum time_dim dimension number due to the -accumulation to previous months. The accumulation is performed backwards -by default.} - -\item{ref_period}{A list with two numeric values with the starting and end -points of the reference period used for computing the index. The default -value is NULL indicating that the first and end values in data will be -used as starting and end points.} - -\item{params}{A multidimensional array with named dimensions for computing the -SPEI. This option overrides computation of fitting parameters. It needs -to be of same time dimension (specified in 'time_dim') of 'exp' and a -dimension named 'coef' with the length of the coefficients needed for the -used distribution (for 'Gamma' coef dimension is of lenght 2, for -'log-Logistic' is of length 3). It also needs to have a leadtime dimension -(specified in 'leadtime_dim') of length 1.} - -\item{pet_exp}{A multidimensional array containing the Potential -EvapoTranspiration data of 'exp'. It must have the same dimensions of the -variable 'pr' of 'exp'. If it is NULL it is calculated using the provided -variables with specified 'pet_method'. It is NULL by default.} - -\item{pet_expcor}{A multidimensional array containing the Potential -EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the -provided variables with specified 'pet_method'. It is NULL by default.} - -\item{standardization}{A logical value indicating wether the standardization -is computed.} - -\item{pet_method}{A character string indicating the method used to compute -the potential evapotranspiration. The accepted methods are: -'hargreaves' and 'hargreaves_modified', that require the data to have -variables tmin and tmax; and 'thornthwaite', that requires variable -'tmean'.} - -\item{method}{A character string indicating the standardization method used. -If can be: 'parametric' or 'non-parametric'.} - -\item{distribution}{A character string indicating the name of the distribution -function to be used for computing the SPEI. The accepted names are: -'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The -'Gamma' method only works when only precipitation is provided and other -variables are 0 because it is positive defined (SPI indicator).} - -\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) -or not (FALSE).} - -\item{return_params}{A logical value indicating wether to return parameters -array (TRUE) or not (FALSE). It is FALSE by default.} - -\item{na.rm}{A logical value indicating whether NA values should be removed -from data. It is FALSE by default. If it is FALSE and there are NA values, -(if standardization is TRUE) all values of other dimensions except time_dim -and leadtime_dim will be set to NA directly. On the other hand, if it is -TRUE, if the data from other dimensions except time_dim and leadtime_dim is -not reaching 4 values, it is not enough values to estimate the parameters -and the result will include NA.} - -\item{ncores}{An integer value indicating the number of cores to use in -parallel computation.} -} -\value{ -A multidimensional array containing the SPEI with same dimensions as -input data. If 'exp_cor' is provided, only results from 'exp_cor' will be -provided. The parameters of the standardization will only be returned -if 'return_params' is TRUE. The SPEI will only be computed if -'standardization' is TRUE. If 'standardization' is FALSE, only the climatic -water balance (precipitation minus evapotranspiration) will be returned. The -resultant arrays will have the same dimensions as the initial input data. -} -\description{ -Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) -that is a multiscalar drought index based on climatic data. It can be used for -determining the onset, duration and magnitude of drought conditions with -respect to normal conditions. The idea behind the SPEI is to compare the -highest possible evapotranspiration with the current water availability. The -SPEI uses for a specific time frequency the difference between precipitation -and potential evapotranspiration. -} -\details{ -Next, some specifications for the calculation of this indicator will be -discussed. On the one hand, the model to be used to calculate Potential -Evapotranspiration is specified with the 'pet_method' parameter (it can be -hargreaves, hargraves modified or thornwhite). On the other hand, to choose -the time scale in which you want to accumulate the SPEI (SPEI3, SPEI6...) is -done using the 'accum' parameter, where you must indicate the number of time -steps you want to accumulate throughout 'leadtime_dim'. The accumulation will -be performed backwards by default. Since the accumulation is done for the -elapsed time steps, there will be no complete accumulations until reaching the -time instant equal to the value of the 'accum' parameter. For this reason, in -the result, we will find that for the dimension where the accumulation has -been carried out, the values of the array will be NA since they do not include -complete accumulations. Also, there is a parameter to specify if the -standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a -probability distribution to transform the original values to standardized -units that are comparable in space and time and at different SPEI time scales. -The 'na.rm' parameter is a logical parameter used to decide whether to remove -the NA values from the data before doing the calculation. It must be taken -into account that if 'na.rm' is FALSE and there is some NA value in the -specific coordinates which the SPEI is computed, standardization cannot be -carried out for those coordinates and therefore, the result will be filled -with NA for the specific coordinates. However, when 'na.rm' is TRUE, if the -amount of data for those specific coordinates is smaller than 4, it will not -be possible to carry out because we will not have enough data and the result -will be also filled with NAs for that coordinates. When only 'exp' is provided -('exp_cor' is NULL) the Standardization is computed with cross validation. For -more information about SPEI calculation, see functions PeriodPET, -PeriodAccumulation and PeriodStandardization. -} -\examples{ -dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, - latitude = 2, longitude = 1, ensemble = 25) -dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, - latitude = 2, longitude = 1, ensemble = 15) -exp_tmax <- array(rnorm(900, 27.73, 5.26), dim = dims) -exp_tmin <- array(rnorm(900, 14.83, 3.86), dim = dims) -exp_pr <- array(rnorm(900, 21.19, 25.64), dim = dims) - -expcor_tmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) -expcor_tmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) -expcor_pr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) - -dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), - paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), - paste0(2020, "-09-15"), - paste0(2020, "-10-16")), 'UTC') -dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) -lat <- c(40,40.1) - -exp <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) -exp_cor <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, - 'pr' = expcor_pr) -res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor) - -} -- GitLab From 59143d15626ee15083b1a63b42bc472746792c6b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 15 Nov 2023 17:15:21 +0100 Subject: [PATCH 33/42] Update PeriodAccumulation with last changes --- R/PeriodAccumulation.R | 17 ++++++++++------- man/CST_PeriodAccumulation.Rd | 12 +++++++----- man/PeriodAccumulation.Rd | 4 ++-- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index e573de9..fff4fd4 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -20,8 +20,8 @@ #'input data must be this object class. If you don't work with 's2dv_cube', see #'PeriodAccumulation. #' -#'@param data An 's2dv_cube' object as provided function \code{CST_Load} in -#' package CSTools. +#'@param data An 's2dv_cube' object as provided function \code{CST_Start} or +#' \code{CST_Load} in package CSTools. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial m onth of the period. By default it is @@ -74,10 +74,12 @@ #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) #'dim(Dates) <- c(sdate = 3, ftime = 214) #'exp$attrs$Dates <- Dates -#'SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6)) +#'SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), +#' time_dim = 'ftime') #'dim(SprR$data) #'head(SprR$attrs$Dates) -#'HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10)) +#'HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10), +#' time_dim = 'ftime') #'dim(HarR$data) #'head(HarR$attrs$Dates) #' @@ -85,7 +87,7 @@ #'@importFrom ClimProjDiags Subset #'@export CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, - time_dim = 'ftime', rollwidth = NULL, + time_dim = 'time', rollwidth = NULL, na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { @@ -118,6 +120,7 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, data$attrs$Dates <- Dates } if (is.null(rollwidth)) { + data$coords[[time_dim]] <- NULL if (!is.null(dim(Dates))) { # Create time_bounds time_bounds <- NULL @@ -199,9 +202,9 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) #'dim(Dates) <- c(sdate = 3, time = 214) #'SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), -#' end = list(21, 6)) +#' end = list(21, 6), time_dim = 'ftime') #'HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), -#' end = list(21, 10)) +#' end = list(21, 10), time_dim = 'ftime') #' #'@import multiApply #'@importFrom zoo rollapply diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 3de6d4b..2637f02 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -8,15 +8,15 @@ CST_PeriodAccumulation( data, start = NULL, end = NULL, - time_dim = "ftime", + time_dim = "time", rollwidth = NULL, na.rm = FALSE, ncores = NULL ) } \arguments{ -\item{data}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} +\item{data}{An 's2dv_cube' object as provided function \code{CST_Start} or +\code{CST_Load} in package CSTools.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial @@ -98,10 +98,12 @@ Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) dim(Dates) <- c(sdate = 3, ftime = 214) exp$attrs$Dates <- Dates -SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6)) +SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), + time_dim = 'ftime') dim(SprR$data) head(SprR$attrs$Dates) -HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10)) +HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10), + time_dim = 'ftime') dim(HarR$data) head(HarR$attrs$Dates) diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 3e134c8..7aade02 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -92,8 +92,8 @@ Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) dim(Dates) <- c(sdate = 3, time = 214) SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), - end = list(21, 6)) + end = list(21, 6), time_dim = 'ftime') HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), - end = list(21, 10)) + end = list(21, 10), time_dim = 'ftime') } -- GitLab From a6a138816fb8f23e268ea300fac709657dc3316a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 09:26:39 +0100 Subject: [PATCH 34/42] Add imports in DESCRIPTION --- DESCRIPTION | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ea4cb58..b716d5d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,10 +31,14 @@ Depends: Imports: multiApply (>= 2.1.1), stats, - ClimProjDiags + ClimProjDiags, + CSTools, + SPEI, + lmom, + lmomco, + zoo Suggests: testthat, - CSTools, knitr, markdown, rmarkdown -- GitLab From 74f0aa2292d99d1ac43d74d50c532062f9692647 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 12:56:28 +0100 Subject: [PATCH 35/42] Correct unit tests, examples and improve documentation --- R/PeriodAccumulation.R | 34 ++++++++++++++++-------- R/PeriodPET.R | 7 +++-- R/PeriodStandardization.R | 3 ++- R/zzz.R | 6 ++--- man/CST_PeriodAccumulation.Rd | 14 +++++++++- man/CST_PeriodPET.Rd | 8 ++---- man/PeriodAccumulation.Rd | 16 ++++++++--- man/PeriodPET.Rd | 3 ++- tests/testthat/test-PeriodAccumulation.R | 8 +++--- 9 files changed, 65 insertions(+), 34 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index fff4fd4..d5dc059 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -40,6 +40,14 @@ #' applied backwards 'time_dim', if it is negative, it will be forward it. When #' this parameter is NULL, the sum is applied over all 'time_dim', in a #' specified period. It is NULL by default. +#'@param sdate_dim (Only needed when rollwidth is used). A character string +#' indicating the name of the start date dimension to compute the rolling +#' accumulation. By default, it is set to 'sdate'. +#'@param frequency (Only needed when rollwidth is used). A character string +#' indicating the time frequency of the data to apply the rolling accumulation. +#' It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +#' continuous months will be accumulated; if it is 'daliy', values from +#' continuous days will be accumulated. It is set to 'monthly' by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel @@ -63,7 +71,7 @@ #'exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, #' ftime = 9, lat = 2, lon = 2)) #'class(exp) <- 's2dv_cube' -#'TP <- CST_PeriodAccumulation(exp) +#'TP <- CST_PeriodAccumulation(exp, time_dim = 'ftime') #'exp$data <- array(rnorm(5 * 3 * 214 * 2), #' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), @@ -88,6 +96,7 @@ #'@export CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, time_dim = 'time', rollwidth = NULL, + sdate_dim = 'sdate', frequency = 'monthly', na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { @@ -111,8 +120,8 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, data$data <- PeriodAccumulation(data = data$data, dates = Dates, start = start, end = end, time_dim = time_dim, rollwidth = rollwidth, - sdate_dim = sdate_dim, na.rm = na.rm, - ncores = ncores) + sdate_dim = sdate_dim, frequency = frequency, + na.rm = na.rm, ncores = ncores) data$dims <- dim(data$data) if (!is.null(start) & !is.null(end)) { Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, @@ -169,19 +178,24 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' the period and the final month of the period. By default it is set to NULL #' and the indicator is computed using all the data provided in \code{data}. #'@param time_dim A character string indicating the name of the dimension to -#' compute the indicator. By default, it is set to 'time'. More than one -#' dimension name matching the dimensions provided in the object -#' \code{data$data} can be specified. +#' compute the indicator. By default, it is set to 'time'. #'@param rollwidth An optional parameter to indicate the number of time #' steps the rolling sum is applied to. If it is positive, the rolling sum is #' applied backwards 'time_dim', if it is negative, it will be forward it. When #' this parameter is NULL, the sum is applied over all 'time_dim', in a #' specified period. It is NULL by default. +#'@param sdate_dim (Only needed when rollwidth is used). A character string +#' indicating the name of the start date dimension to compute the rolling +#' accumulation. By default, it is set to 'sdate'. +#'@param frequency (Only needed when rollwidth is used). A character string +#' indicating the time frequency of the data to apply the rolling accumulation. +#' It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +#' continuous months will be accumulated; if it is 'daliy', values from +#' continuous days will be accumulated. It is set to 'monthly' by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel #' computation. -#' #'@return A multidimensional array with named dimensions containing the #'accumulated data in the element \code{data}. If parameter 'rollwidth' is #'not used, it will have the dimensions of the input 'data' except the dimension @@ -211,8 +225,8 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, time_dim = 'time', rollwidth = NULL, - sdate_dim = 'sdate', na.rm = FALSE, - frequency = 'daily', ncores = NULL) { + sdate_dim = 'sdate', frequency = 'monthly', + na.rm = FALSE, ncores = NULL) { # Initial checks ## data if (is.null(data)) { @@ -311,8 +325,6 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, return(total) } -data <- array(c(1,3,2,4), dim = c(time = 2, sdate = 2)) - .rollaccumulation <- function(data, mask_dates, rollwidth = 1, forwardroll = FALSE, na.rm = FALSE) { dims <- dim(data) diff --git a/R/PeriodPET.R b/R/PeriodPET.R index de20b2d..2bfefa6 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -23,9 +23,7 @@ #' 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units #' for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius #' degrees; the units for precipitation ('pr') need to be in mm/month. -#'@param dates An array of temporal dimensions containing the Dates of -#' 'data'. It must be of class 'Date' or 'POSIXct'. -#'@param lat A numeric vector containing the latitude values of 'data'. +#' Currently the function works only with monthly data from different years. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -128,6 +126,7 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #' 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units #' for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius #' degrees; the units for precipitation ('pr') need to be in mm/month. +#' Currently the function works only with monthly data from different years. #'@param dates An array of temporal dimensions containing the Dates of #' 'data'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'data'. @@ -272,7 +271,7 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', } # complete dates - mask_dates <- .datesmask(dates) + mask_dates <- .datesmask(dates, frequency = 'monthly') lat_mask <- array(lat, dim = c(1, length(lat))) names(dim(lat_mask)) <- c('dat', lat_dim) diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 6a83ed4..da693d7 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -316,6 +316,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } ## ref_period if (!is.null(ref_period)) { + years_dates <- format(dates, "%Y") if (is.null(dates)) { warning("Parameter 'dates' is not provided so 'ref_period' can't be ", "used.") @@ -334,7 +335,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", "will not be used.") ref_period <- NULL - } else if (!all(unlist(ref_period) %in% year(dates))) { + } else if (!all(unlist(ref_period) %in% years_dates)) { warning("Parameter 'ref_period' contain years outside the dates. ", "It will not be used.") ref_period <- NULL diff --git a/R/zzz.R b/R/zzz.R index b108beb..cee01de 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -109,13 +109,13 @@ wind2CF <- function(wind, pc) { } # Function that creates a mask array from dates for the whole year -.datesmask <- function(dates, frequency = 'monthly') { +.datesmask <- function(dates, frequency = 'daily') { years <- format(dates, "%Y") ini <- as.Date(paste(min(years), 01, 01, sep = '-')) end <- as.Date(paste(max(years), 12, 31, sep = '-')) daily <- as.Date(seq(ini, end, by = "day")) if (frequency == 'monthly') { - days <- format(daily, "%d") + days <- as.numeric(format(daily, "%d")) monthly <- daily[which(days == 1)] dates_mask <- array(0, dim = length(monthly)) for (dd in 1:length(dates)) { @@ -134,4 +134,4 @@ wind2CF <- function(wind, pc) { } return(dates_mask) -} \ No newline at end of file +} diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 2637f02..1b8a305 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -10,6 +10,8 @@ CST_PeriodAccumulation( end = NULL, time_dim = "time", rollwidth = NULL, + sdate_dim = "sdate", + frequency = "monthly", na.rm = FALSE, ncores = NULL ) @@ -40,6 +42,16 @@ applied backwards 'time_dim', if it is negative, it will be forward it. When this parameter is NULL, the sum is applied over all 'time_dim', in a specified period. It is NULL by default.} +\item{sdate_dim}{(Only needed when rollwidth is used). A character string +indicating the name of the start date dimension to compute the rolling +accumulation. By default, it is set to 'sdate'.} + +\item{frequency}{(Only needed when rollwidth is used). A character string +indicating the time frequency of the data to apply the rolling accumulation. +It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +continuous months will be accumulated; if it is 'daliy', values from +continuous days will be accumulated. It is set to 'monthly' by default.} + \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -87,7 +99,7 @@ exp <- NULL exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, ftime = 9, lat = 2, lon = 2)) class(exp) <- 's2dv_cube' -TP <- CST_PeriodAccumulation(exp) +TP <- CST_PeriodAccumulation(exp, time_dim = 'ftime') exp$data <- array(rnorm(5 * 3 * 214 * 2), c(memb = 5, sdate = 3, ftime = 214, lon = 2)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), diff --git a/man/CST_PeriodPET.Rd b/man/CST_PeriodPET.Rd index d705a3c..eb84f25 100644 --- a/man/CST_PeriodPET.Rd +++ b/man/CST_PeriodPET.Rd @@ -26,7 +26,8 @@ for Precipitation is 'pr'. The accepted variable names for each method are: For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius -degrees; the units for precipitation ('pr') need to be in mm/month.} +degrees; the units for precipitation ('pr') need to be in mm/month. +Currently the function works only with monthly data from different years.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: @@ -48,11 +49,6 @@ from data. It is FALSE by default.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} - -\item{dates}{An array of temporal dimensions containing the Dates of -'data'. It must be of class 'Date' or 'POSIXct'.} - -\item{lat}{A numeric vector containing the latitude values of 'data'.} } \description{ Compute the Potential evapotranspiration (PET) that is the amount of diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 7aade02..94754d0 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -12,8 +12,8 @@ PeriodAccumulation( time_dim = "time", rollwidth = NULL, sdate_dim = "sdate", + frequency = "monthly", na.rm = FALSE, - frequency = "daily", ncores = NULL ) } @@ -36,9 +36,7 @@ the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{time_dim}{A character string indicating the name of the dimension to -compute the indicator. By default, it is set to 'time'. More than one -dimension name matching the dimensions provided in the object -\code{data$data} can be specified.} +compute the indicator. By default, it is set to 'time'.} \item{rollwidth}{An optional parameter to indicate the number of time steps the rolling sum is applied to. If it is positive, the rolling sum is @@ -46,6 +44,16 @@ applied backwards 'time_dim', if it is negative, it will be forward it. When this parameter is NULL, the sum is applied over all 'time_dim', in a specified period. It is NULL by default.} +\item{sdate_dim}{(Only needed when rollwidth is used). A character string +indicating the name of the start date dimension to compute the rolling +accumulation. By default, it is set to 'sdate'.} + +\item{frequency}{(Only needed when rollwidth is used). A character string +indicating the time frequency of the data to apply the rolling accumulation. +It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +continuous months will be accumulated; if it is 'daliy', values from +continuous days will be accumulated. It is set to 'monthly' by default.} + \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd index 1b00339..d8c7747 100644 --- a/man/PeriodPET.Rd +++ b/man/PeriodPET.Rd @@ -28,7 +28,8 @@ for Precipitation is 'pr'. The accepted variable names for each method are: For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius -degrees; the units for precipitation ('pr') need to be in mm/month.} +degrees; the units for precipitation ('pr') need to be in mm/month. +Currently the function works only with monthly data from different years.} \item{dates}{An array of temporal dimensions containing the Dates of 'data'. It must be of class 'Date' or 'POSIXct'.} diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 1ce36d9..0e054ba 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -43,7 +43,8 @@ test_that("1. Initial checks", { ) # Dates subset expect_warning( - CST_PeriodAccumulation(data = dat1_2, start = list(1,2), end = list(2,3)), + CST_PeriodAccumulation(data = dat1_2, start = list(1,2), end = list(2,3), + time_dim = 'ftime'), paste0("Dimensions in 'data' element 'attrs$Dates' are missed and ", "all data would be used."), fixed = TRUE @@ -106,7 +107,8 @@ test_that("2. Seasonal", { c(memb = 1, sdate = 3, lon = 2)) expect_equal( - CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6))$data, + CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), + time_dim = 'ftime')$data, output$data ) }) @@ -218,4 +220,4 @@ test_that("4. Rolling", { start = list(1, 4), end = list(2, 4)), array(c(NA, NA, 4, 6), dim = c(sdate = 2, time = 2, member = 1)) ) -}) \ No newline at end of file +}) -- GitLab From 9265758cfbbe35fa117bcf18ab36558f5d9a4de9 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 14:24:54 +0100 Subject: [PATCH 36/42] Correct example --- R/PeriodAccumulation.R | 4 ++-- man/PeriodAccumulation.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index d5dc059..8893e46 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -207,14 +207,14 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' ftime = 9, lat = 2, lon = 2)) #'TP <- PeriodAccumulation(exp, time_dim = 'ftime') #'data <- array(rnorm(5 * 3 * 214 * 2), -#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'dim(Dates) <- c(sdate = 3, time = 214) +#'dim(Dates) <- c(sdate = 3, ftime = 214) #'SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), #' end = list(21, 6), time_dim = 'ftime') #'HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 94754d0..ca01c20 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -91,14 +91,14 @@ exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, ftime = 9, lat = 2, lon = 2)) TP <- PeriodAccumulation(exp, time_dim = 'ftime') data <- array(rnorm(5 * 3 * 214 * 2), - c(memb = 5, sdate = 3, time = 214, lon = 2)) + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -dim(Dates) <- c(sdate = 3, time = 214) +dim(Dates) <- c(sdate = 3, ftime = 214) SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), end = list(21, 6), time_dim = 'ftime') HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), -- GitLab From fce7b060cdc84ccd8c01860c140b7275c48c2d1d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 15:15:13 +0100 Subject: [PATCH 37/42] Add package import stats and correct parameters in .standardization --- DESCRIPTION | 3 ++- NAMESPACE | 3 +++ R/PeriodStandardization.R | 28 +++++++++++++++------------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b716d5d..a117544 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,8 @@ Imports: SPEI, lmom, lmomco, - zoo + zoo, + stats Suggests: testthat, knitr, diff --git a/NAMESPACE b/NAMESPACE index 9b14b18..0065b6e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,9 @@ importFrom(lmomco,pwm.ub) importFrom(lmomco,pwm2lmom) importFrom(stats,approxfun) importFrom(stats,ecdf) +importFrom(stats,qnorm) importFrom(stats,quantile) +importFrom(stats,sd) +importFrom(stats,window) importFrom(utils,read.delim) importFrom(zoo,rollapply) diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index da693d7..2cd9646 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -230,6 +230,7 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@importFrom lmomco pwm.pp pwm.ub pwm2lmom are.lmom.valid parglo pargam parpe3 #'@importFrom lmom cdfglo cdfgam cdfpe3 pelglo pelgam pelpe3 #'@importFrom SPEI parglo.maxlik +#'@importFrom stats qnorm sd window #'@export PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, time_dim = 'syear', leadtime_dim = 'time', @@ -419,7 +420,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, res <- Apply(data = list(data), target_dims = c(leadtime_dim, time_dim, memb_dim), fun = .standardization, data_cor = NULL, params = NULL, - leadtime_dim = leadtime_dim, + leadtime_dim = leadtime_dim, time_dim = time_dim, ref_start = ref_start, ref_end = ref_end, handle_infinity = handle_infinity, method = method, distribution = distribution, @@ -430,7 +431,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), params = c(leadtime_dim, time_dim, 'coef')), fun = .standardization, data_cor = NULL, - leadtime_dim = leadtime_dim, + leadtime_dim = leadtime_dim, time_dim = time_dim, ref_start = ref_start, ref_end = ref_end, handle_infinity = handle_infinity, method = method, distribution = distribution, @@ -440,8 +441,8 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } else { res <- Apply(data = list(data = data, data_cor = data_cor), target_dims = c(leadtime_dim, time_dim, memb_dim), - fun = .standardization, - params = NULL, leadtime_dim = leadtime_dim, + fun = .standardization, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, ref_start = ref_start, ref_end = ref_end, handle_infinity = handle_infinity, method = method, distribution = distribution, @@ -473,7 +474,8 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } } -.standardization <- function(data, data_cor = NULL, params = NULL, leadtime_dim = 'time', +.standardization <- function(data, data_cor = NULL, params = NULL, + leadtime_dim = 'time', time_dim = 'syear', ref_start = NULL, ref_end = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', return_params = FALSE, na.rm = FALSE) { @@ -500,12 +502,12 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, for (i in 1:length(data2)) { bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data } - std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) + std_index <- stats::qnorm((bp - 0.44)/(length(data2) + 0.12)) dim(std_index) <- dims spei_mod[ff, , ] <- std_index } else { if (!is.null(ref_start) && !is.null(ref_end)) { - data_fit <- window(data2, ref_start, ref_end) + data_fit <- stats::window(data2, ref_start, ref_end) } else { data_fit <- data2 } @@ -519,7 +521,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } f_params <- NA if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { - acu_sd <- sd(acu_sorted) + acu_sd <- stats::sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] @@ -542,7 +544,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "Gamma" = lmom::cdfgam(data2, f_params), "PearsonIII" = lmom::cdfpe3(data2, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = dims) + std_index_cv <- array(stats::qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] if (return_params) params_result[ff, nsd, ] <- f_params } @@ -563,14 +565,14 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, for (i in 1:length(data_cor2)) { bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data } - std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + std_index <- stats::qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) dim(std_index) <- dimscor spei_mod[ff, , ] <- std_index } else { data2 <- data[ff, , ] dim(data2) <- dims if (!is.null(ref_start) && !is.null(ref_end)) { - data_fit <- window(data2, ref_start, ref_end) + data_fit <- stats::window(data2, ref_start, ref_end) } else { data_fit <- data2 } @@ -581,7 +583,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { - acu_sd <- sd(acu_sorted) + acu_sd <- stats::sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] @@ -599,7 +601,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "Gamma" = lmom::cdfgam(data_cor2, f_params), "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = dimscor) + std_index_cv <- array(stats::qnorm(cdf_res), dim = dimscor) spei_mod[ff, , ] <- std_index_cv if (return_params) params_result[ff, , ] <- f_params } -- GitLab From 34d59dc742168d7cbefae8fd2906d1816ab79155 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 15:42:04 +0100 Subject: [PATCH 38/42] Correct DESCRIPTION --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a117544..b716d5d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,8 +36,7 @@ Imports: SPEI, lmom, lmomco, - zoo, - stats + zoo Suggests: testthat, knitr, -- GitLab From f4d61bd77d6e554dc389113ce091888f77cdb000 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 15:51:35 +0100 Subject: [PATCH 39/42] Develte test from PerodSPEI --- tests/testthat/test-PeriodSPEI.R | 467 ------------------------------- 1 file changed, 467 deletions(-) delete mode 100644 tests/testthat/test-PeriodSPEI.R diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R deleted file mode 100644 index b16bb69..0000000 --- a/tests/testthat/test-PeriodSPEI.R +++ /dev/null @@ -1,467 +0,0 @@ -############################################## -# cube1 -cube1 <- NULL -cube1$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) -class(cube1) <- 's2dv_cube' - -# cube2 -cube2 <- NULL -cube2$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) -class(cube2) <- 's2dv_cube' -cube2$coords <- list(lat = 1:2) - -# cube3 -cube3 <- NULL -cube3$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) -class(cube3) <- 's2dv_cube' -cube3$coords <- list(lat = 1:2) -cube3$attrs$Dates <- as.Date(c(paste0(2010:2014, "-08-16"))) - -# dat1 -dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) -dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) - -set.seed(1) -exp_tmax <- array(rnorm(360, 27.73, 5.26), dim = dims) -set.seed(2) -exp_tmin <- array(rnorm(360, 14.83, 3.86), dim = dims) -set.seed(3) -exp_pr <- array(rnorm(360, 21.19, 25.64), dim = dims) - -set.seed(1) -expcor_tmax <- array(rnorm(60, 29.03, 5.67), dim = dimscor) -set.seed(2) -expcor_tmin <- array(rnorm(60, 15.70, 4.40), dim = dimscor) -set.seed(3) -expcor_pr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) - -dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16")), "UTC") -dim(dates_exp) <- c(syear = 6, time = 3) - -dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), - paste0(2020, "-10-16")), "UTC") -dim(dates_expcor) <- c(syear = 1, time = 3) - -lat <- c(40,40.1) - -exp1 <- list('tmax' = exp_tmax, 'tmin' = exp_tmin, 'pr' = exp_pr) -exp_cor1 <- list('tmax' = expcor_tmax, 'tmin' = expcor_tmin, 'pr' = expcor_pr) -params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, - longitude = 1, coef = 3)) -params2 <- array(abs(rnorm(100)), dim = c(syear = 6, time = 3, latitude = 2, - longitude = 1, coef = 3)) - -# dat2 -dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) -dimscor2 <- c(styear = 1, ftime = 3, lat = 2, lon = 1, member = 15) - -set.seed(1) -exp_tmean <- array(rnorm(100, 17.34, 9.18), dim = dims2) -set.seed(2) -exp_pr <- array(rnorm(360, 21.19, 25.64), dim = dims2) - -set.seed(1) -expcor_tmean <- array(rnorm(100, 17.23, 9.19), dim = dimscor2) -set.seed(2) -expcor_pr <- array(rnorm(60, 15.62, 21.38), dim = dimscor2) - -dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) - -dates_expcor2 <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), - paste0(2020, "-10-16"))) -dim(dates_expcor2) <- c(sday = 1, sweek = 1, styear = 1, ftime = 3) - -lat <- c(40,40.1) - -exp2 <- list('tmean' = exp_tmean, 'pr' = exp_pr) -exp_cor2 <- list('tmean' = expcor_tmean, 'pr' = expcor_pr) - -# cube4 -cube4_exp <- lapply(exp1, function(x) { - suppressWarnings( - CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), - varName = 'test', Dates = dates_exp) - ) -}) -cube4_expcor <- lapply(exp_cor1, function(x) { - suppressWarnings( - CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), - varName = 'test', Dates = dates_expcor) - ) -}) - -############################################## - -test_that("1. Initial checks CST_PeriodSPEI", { - # Check 's2dv_cube' - expect_error( - CST_PeriodSPEI(exp = NULL), - "Parameter 'exp' cannot be NULL." - ) - expect_error( - CST_PeriodSPEI(exp = array(10)), - "Parameter 'exp' must be a list of 's2dv_cube' class." - ) - # latitude - expect_error( - CST_PeriodSPEI(exp = list(cube1)), - paste0("Spatial coordinate names of parameter 'exp' do not match any ", - "of the names accepted by the package.") - ) - # Dates - expect_error( - CST_PeriodSPEI(exp = list(cube2)), - paste0("Element 'Dates' is not found in 'attrs' list of 'exp'. ", - "See 's2dv_cube' object description in README file for more ", - "information.") - ) - expect_error( - CST_PeriodSPEI(exp = list(cube3), exp_cor = list(cube2)), - paste0("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", - "See 's2dv_cube' object description in README file for more ", - "information.") - ) -}) - -############################################## - -test_that("1. Initial checks PeriodSPEI", { - # exp - expect_error( - PeriodSPEI(exp = NULL), - "Parameter 'exp' needs to be a named list with the needed variables." - ) - expect_error( - PeriodSPEI(exp = list(1)), - "Parameter 'exp' needs to be a named list with the variable names." - ) - expect_error( - PeriodSPEI(exp = list(tmax = array(10))), - "Parameter 'exp' needs to be a list of arrays with dimension names." - ) - expect_error( - PeriodSPEI(exp = list(tmax = array(10, c(time = 10)), - tmin = array(10, c(time = 11)))), - "Parameter 'exp' variables need to have the same dimensions." - ) - expect_error( - PeriodSPEI(exp = list(tmax = array(10, c(time = 10)), - tmin = array(10, c(ftime = 10)))), - "Parameter 'exp' variables need to have the same dimensions." - ) - # exp_cor - expect_error( - PeriodSPEI(exp = exp1, exp_cor = 1), - paste0("Parameter 'exp_cor' needs to be a named list with the needed ", - "variables if it is not NULL.") - ) - expect_error( - PeriodSPEI(exp = exp1, exp_cor = list(1)), - "Parameter 'exp_cor' needs to be a named list with the variable names." - ) - expect_error( - PeriodSPEI(exp = exp1, exp_cor = list('tmax' = array(10))), - "Parameter 'exp_cor' needs to be a list of arrays with dimension names." - ) - expect_error( - PeriodSPEI(exp = exp1, exp_cor = list(tmax = array(10, c(time = 10)), - tmin = array(10, c(time = 11)))), - "Parameter 'exp_cor' variables need to have the same dimensions." - ) - expect_error( - PeriodSPEI(exp = exp1, lat = 'lat', dates_exp = dates_exp), - "Parameter 'lat' must be numeric." - ) - expect_error( - PeriodSPEI(exp = list(pr = array(10, c(time = 10, syear = 1, ensemble = 1))), - lat = 1:2, dates_exp = dates_exp), - "Parameter 'lat_dim' is not found in 'exp' dimension." - ) - # exp (2) - expect_warning( - PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), - paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", - "'hargreaves_modified'. It is set to 'hargreaves_modified'.") - ) - # time_dim - expect_error( - PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat), - "Parameter 'time_dim' must be a character string." - ) - expect_error( - PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, - dates_exp = dates_exp2, dates_expcor = dates_expcor2, - lat_dim = 'lat', pet_method = 'thornthwaite'), - "Parameter 'time_dim' is not found in 'exp' dimension." - ) - # leadtime_dim - expect_error( - PeriodSPEI(exp = exp1, leadtime_dim = 1, dates_exp = dates_exp, lat = lat), - "Parameter 'leadtime_dim' must be a character string." - ) - expect_error( - PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, - dates_exp = dates_exp2, dates_expcor = dates_expcor2, - lat_dim = 'lat', time_dim = 'ftime', pet_method = 'thornthwaite'), - "Parameter 'leadtime_dim' is not found in 'exp' dimension." - ) - # memb_dim - expect_error( - PeriodSPEI(exp = exp1, memb_dim = 1, dates_exp = dates_exp, lat = lat), - "Parameter 'memb_dim' must be a character string." - ) - expect_error( - PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, - dates_exp = dates_exp2, dates_expcor = dates_expcor2, - lat_dim = 'lat', time_dim = 'ftime', leadtime_dim = 'styear', - pet_method = 'thornthwaite'), - "Parameter 'memb_dim' is not found in 'exp' dimension." - ) - # lat_dim - expect_error( - PeriodSPEI(exp = exp1, lat_dim = 1, dates_exp = dates_exp, lat = lat) - ) - expect_error( - PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, - dates_exp = dates_exp2, dates_expcor = dates_expcor2), - "Parameter 'time_dim' is not found in 'exp' dimension." - ) - # accum - expect_error( - PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat), - "Cannot compute accumulation of 10 months because loaded data has only 3 months." - ) - # ref_period - expect_warning( - PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, - dates_expcor = dates_expcor, lat = lat, ref_period = 1), - paste0("Parameter 'ref_period' must be of length two indicating the ", - "first and end years of the reference period. It will not ", - "be used.") - ) - expect_warning( - PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, - dates_expcor = dates_expcor, lat = lat, ref_period = list('a', 1)), - paste0("Parameter 'ref_period' must be a numeric vector indicating the ", - "'start' and 'end' years of the reference period. It will not ", - "be used.") - ) - expect_warning( - PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, - dates_expcor = dates_expcor, lat = lat, ref_period = list(2012, 2011)), - paste0("In parameter 'ref_period' 'start' cannot be after 'end'. It ", - "will not be used.") - ) - expect_warning( - PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, - dates_expcor = dates_expcor, lat = lat, ref_period = list(2008, 2021)), - paste0("Parameter 'ref_period' contain years outside the dates. ", - "It will not be used.") - ) - # standardization - expect_error( - PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat), - "Parameter 'standardization' must be a logical value." - ) - # handle_infinity - expect_error( - PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat), - "Parameter 'handle_infinity' must be a logical value." - ) - # method - expect_error( - PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat), - paste0("Parameter 'method' must be a character string containing one ", - "of the following methods: 'parametric' or 'non-parametric'.") - ) - # distribution - expect_error( - PeriodSPEI(exp = exp1, distribution = 1, dates_exp = dates_exp, lat = lat), - paste0("Parameter 'distribution' must be a character string containing one ", - "of the following distributions: 'log-Logistic', 'Gamma' or ", - "'PearsonIII'.") - ) - # ncores - expect_error( - PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat), - "Parameter 'ncores' must be a positive integer." - ) -}) - -############################################## - -test_that("2. Output checks: CST_PeriodSPEI", { - res1 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = NULL) - res2 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = cube4_expcor) - res3 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = cube4_expcor, standardization = F) - res4 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = NULL, return_params = T) - expect_equal( - names(res1), - c("data", "dims", "coords", "attrs") - ) - expect_equal( - res2$attrs$Variable$varName, - "SPEI" - ) - expect_equal( - res3$attrs$Variable$varName, - "Precipitation minus accumulated PET" - ) - expect_equal( - names(res3), - c("data", "dims", "coords", "attrs") - ) - expect_equal( - names(res4), - c("spei", "params") - ) -}) - -############################################## - -test_that("2. Output checks", { - res1 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - return_params = TRUE, na.rm = TRUE) - res2 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - standardization = FALSE) # No info about accumulation - res3 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, return_params = TRUE) - # output dims - expect_equal( - names(res1), - c('spei', 'params') - ) - expect_equal( - dim(res2), - c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) - ) - expect_equal( - names(res3), - c('spei', 'params') - ) - expect_equal( - dim(res1[[1]]), - dimscor - ) - expect_equal( - dim(res1[[2]])[which(!names(dim(res1[[2]])) %in% c('coef', 'syear'))], - dims[which(!names(dims) %in% c('syear', 'ensemble'))] - ) - expect_equal( - dim(res2), - dimscor - ) - expect_equal( - dim(res3[[2]]), - c(syear = 6, time = 3, latitude = 2, longitude = 1, coef = 3) - ) - # exp - # exp_cor - # pet - # accum - res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, - dates_exp = dates_exp, na.rm = TRUE) - expect_equal( - res11[1,3,1,1,][1:4], - c(-0.6130081, -0.3446050, -0.7267427, -0.6112921), - tolerance = 0.0001 - ) - # ref_period - res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, - dates_exp = dates_exp, dates_expcor = dates_expcor, - na.rm = TRUE, ref_period = list(2011, 2013)) - expect_equal( - !identical(res1[[1]], res_ref), - TRUE - ) - # params - expect_error( - PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = params1, return_params = TRUE), - paste0("Parameter 'data' and 'params' must have same common dimensions ", - "except 'memb_dim' and 'coef'.") - ) - res6 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = params2, return_params = TRUE) - expect_error( - PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = array(abs(rnorm(100)), dim = dimscor)), - paste0("Parameter 'params' must be a multidimensional array with named ", - "dimensions: 'syear', 'time' and 'coef'.") - ) - expect_equal( - dim(res6$params), - c(syear = 6, time = 3, latitude = 2, longitude = 1, coef = 3) - ) - # standarization - res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, standardization = FALSE) - expect_equal( - names(res4), - NULL - ) - expect_equal( - dim(res4), - c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) - ) - # cross_validation - res_crossval_T <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - return_params = TRUE) - res_crossval_F <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - return_params = TRUE) - # cross_validation = TRUE - expect_equal( - dim(res_crossval_T$spei), - dims - ) - expect_equal( - dim(res_crossval_T$params), - c(syear = 6, time = 3, latitude = 2, longitude = 1, coef = 3) - ) - - # pet_method - ok - res5 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - pet_method = c('hargreaves', 'hargreaves_modified')) - res6 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - pet_method = c('hargreaves_modified', 'hargreaves')) - expect_equal( - identical(res5, res6), - FALSE - ) - expect_equal( - dim(res5), - dim(res6) - ) - - # time_dim, leadtime_dim, memb_dim, lat_dim - res7 <- PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, - dates_exp = dates_exp2, dates_expcor = dates_expcor2, - pet_method = c('thornthwaite', 'thornthwaite'), - lat_dim = 'lat', time_dim = 'styear', - leadtime_dim = 'ftime', memb_dim = 'member') - # method - ok - res8 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - method = 'non-parametric') - # distribution - Only works for 'log-Logistic' - res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, distribution = 'PearsonIII') # NA - res10 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, distribution = 'Gamma') # NA - # param_error - - # handle_infinity - OK - res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, handle_infinity = FALSE) - # na.rm - - - # ncores -}) - -############################################## \ No newline at end of file -- GitLab From 232988a15e42db297da72fa3eba12d2fcfcdf08a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 16:09:10 +0100 Subject: [PATCH 40/42] Correct documentation --- R/PeriodAccumulation.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 0d67630..a9e9f5f 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -69,18 +69,18 @@ #'@examples #'exp <- NULL #'exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, -#' time = 9, lat = 2, lon = 2)) +#' ftime = 9, lat = 2, lon = 2)) #'class(exp) <- 's2dv_cube' #'TP <- CST_PeriodAccumulation(exp, time_dim = 'ftime') #'exp$data <- array(rnorm(5 * 3 * 214 * 2), -#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'dim(Dates) <- c(sdate = 3, time = 214) +#'dim(Dates) <- c(sdate = 3, ftime = 214) #'exp$attrs$Dates <- Dates #'SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), #' time_dim = 'ftime') @@ -204,8 +204,8 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' #'@examples #'exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, -#' time = 9, lat = 2, lon = 2)) -#'TP <- PeriodAccumulation(exp, time_dim = 'time') +#' ftime = 9, lat = 2, lon = 2)) +#'TP <- PeriodAccumulation(exp, time_dim = 'ftime') #'data <- array(rnorm(5 * 3 * 214 * 2), #' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), -- GitLab From 7085ab2caa2acc0bb54f37c5949173eab11084d1 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 16 Nov 2023 16:18:53 +0100 Subject: [PATCH 41/42] Correct unit test --- R/zzz.R | 2 +- man/CST_PeriodAccumulation.Rd | 6 +++--- man/PeriodAccumulation.Rd | 4 ++-- tests/testthat/test-PeriodAccumulation.R | 23 ++++++----------------- 4 files changed, 12 insertions(+), 23 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 763dc8c..da7c3a1 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -80,7 +80,7 @@ wind2CF <- function(wind, pc) { } # Function that creates a mask array from dates for the whole year -.datesmask <- function(dates, frequency = 'daily') { +.datesmask <- function(dates, frequency = 'monthly') { years <- format(dates, "%Y") ini <- as.Date(paste(min(years), 01, 01, sep = '-')) end <- as.Date(paste(max(years), 12, 31, sep = '-')) diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 1b48e92..4d71ff3 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -97,18 +97,18 @@ PeriodAccumulation. \examples{ exp <- NULL exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, - time = 9, lat = 2, lon = 2)) + ftime = 9, lat = 2, lon = 2)) class(exp) <- 's2dv_cube' TP <- CST_PeriodAccumulation(exp, time_dim = 'ftime') exp$data <- array(rnorm(5 * 3 * 214 * 2), - c(memb = 5, sdate = 3, time = 214, lon = 2)) + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -dim(Dates) <- c(sdate = 3, time = 214) +dim(Dates) <- c(sdate = 3, ftime = 214) exp$attrs$Dates <- Dates SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), time_dim = 'ftime') diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 9b1b970..ca01c20 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -88,8 +88,8 @@ negative, the rolling sum is applied backwards. } \examples{ exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, - time = 9, lat = 2, lon = 2)) -TP <- PeriodAccumulation(exp, time_dim = 'time') + ftime = 9, lat = 2, lon = 2)) +TP <- PeriodAccumulation(exp, time_dim = 'ftime') data <- array(rnorm(5 * 3 * 214 * 2), c(memb = 5, sdate = 3, ftime = 214, lon = 2)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 8a990c5..9dcbcf9 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -100,18 +100,6 @@ test_that("1. Initial checks", { ############################################## test_that("2. Seasonal", { - exp <- NULL - exp$data <- array(1:(1 * 3 * 214 * 2), - c(memb = 1, sdate = 3, time = 214, lon = 2)) - exp$dims <- dim(exp$data) - exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), - as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2001", format = "%d-%m-%Y"), - as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2002", format = "%d-%m-%Y"), - as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) - dim(exp$attrs$Dates) <- c(time = 214, sdate = 3) - class(exp) <- 's2dv_cube' output <- exp output$data <- array(c(sum(exp$data[1,1,21:82,1]), sum(exp$data[1,2,21:82,1]), sum(exp$data[1,3,21:82,1]), sum(exp$data[1,1,21:82,2]), @@ -207,29 +195,30 @@ test_that("4. Rolling", { ) # Output checks expect_equal( - PeriodAccumulation(data = dat2, rollwidth = -2, dates = dates2), + PeriodAccumulation(data = dat2, rollwidth = -2, dates = dates2, frequency = 'daily'), array(c(4,6,8, 10, NA, NA), dim = c(sdate = 2, time = 3, member = 1)) ) expect_equal( - PeriodAccumulation(data = dat2, rollwidth = 3, dates = dates2), + PeriodAccumulation(data = dat2, rollwidth = 3, dates = dates2, frequency = 'daily'), array(c(rep(NA, 4), 9, 12), dim = c(sdate = 2, time = 3, member = 1)) ) dat2_1 <- dat2 dat2_1[1,1,1] <- NA expect_equal( - PeriodAccumulation(data = dat2_1, rollwidth = 2, dates = dates2, na.rm = FALSE), + PeriodAccumulation(data = dat2_1, rollwidth = 2, dates = dates2, na.rm = FALSE, + frequency = 'daily'), array(c(rep(NA, 3), 6,8,10), dim = c(sdate = 2, time = 3, member = 1)) ) # Test rolling with start and end expect_equal( PeriodAccumulation(data = dat2, rollwidth = 1, dates = dates2, - start = list(1, 4), end = list(2, 4)), + start = list(1, 4), end = list(2, 4), frequency = 'daily'), array(c(1, 2, 3, 4), dim = c(sdate = 2, time = 2, member = 1)) ) expect_equal( PeriodAccumulation(data = dat2, rollwidth = 2, dates = dates2, - start = list(1, 4), end = list(2, 4)), + start = list(1, 4), end = list(2, 4), frequency = 'daily'), array(c(NA, NA, 4, 6), dim = c(sdate = 2, time = 2, member = 1)) ) }) -- GitLab From 294c052d14d05adcc2c10834b3db910029a4b427 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 17 Nov 2023 14:49:16 +0100 Subject: [PATCH 42/42] Remove specifying NAMESPACE --- R/PeriodAccumulation.R | 6 +++--- R/PeriodPET.R | 26 +++++++++++++------------- R/PeriodStandardization.R | 19 +++++++++---------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index a9e9f5f..60245b6 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -133,8 +133,8 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, if (!is.null(dim(Dates))) { # Create time_bounds time_bounds <- NULL - time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') - time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + time_bounds$start <- Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') # Add Dates in attrs data$attrs$Dates <- time_bounds$start @@ -337,7 +337,7 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, } } - data_accum <- zoo::rollapply(data = data_vector, width = rollwidth, FUN = sum, na.rm = na.rm) + data_accum <- rollapply(data = data_vector, width = rollwidth, FUN = sum, na.rm = na.rm) if (!forwardroll) { data_accum <- c(rep(NA, rollwidth-1), data_accum) } else { diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 2bfefa6..27d6eca 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -94,12 +94,12 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', metadata <- data[[1]]$attrs$Variable$metadata metadata_names <- intersect(names(dim(res)), names(metadata)) suppressWarnings( - res <- CSTools::s2dv_cube(data = res, coords = coords, - varName = paste0('PET'), - metadata = metadata[metadata_names], - Dates = Dates, - source_files = source_files, - when = Sys.time()) + res <- s2dv_cube(data = res, coords = coords, + varName = paste0('PET'), + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) ) return(res) } @@ -363,22 +363,22 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', rm(data_tmp) } if (pet_method == 'hargreaves') { - pet <- SPEI::hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = FALSE, verbose = FALSE) + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = FALSE, verbose = FALSE) # line to return the vector to the size of the actual original data pet <- array(pet[which(mask_dates == 1)], dim = dims) } if (pet_method == 'hargreaves_modified') { - pet <- SPEI::hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, - verbose = FALSE) + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, + verbose = FALSE) pet <- array(pet[which(mask_dates == 1)], dim = dims) } if (pet_method == 'thornthwaite') { - pet <- SPEI::thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, - verbose = FALSE) + pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, + verbose = FALSE) # line to return the vector to the size of the actual original data pet <- array(pet[which(mask_dates == 1)], dim = dims) } diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 2cd9646..6ab707e 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -341,8 +341,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "It will not be used.") ref_period <- NULL } else { - years <- format(ClimProjDiags::Subset(dates, along = leadtime_dim, - indices = 1), "%Y") + years <- format(Subset(dates, along = leadtime_dim, indices = 1), "%Y") ref_period[[1]] <- which(ref_period[[1]] == years) ref_period[[2]] <- which(ref_period[[2]] == years) } @@ -502,12 +501,12 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, for (i in 1:length(data2)) { bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data } - std_index <- stats::qnorm((bp - 0.44)/(length(data2) + 0.12)) + std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) dim(std_index) <- dims spei_mod[ff, , ] <- std_index } else { if (!is.null(ref_start) && !is.null(ref_end)) { - data_fit <- stats::window(data2, ref_start, ref_end) + data_fit <- window(data2, ref_start, ref_end) } else { data_fit <- data2 } @@ -521,7 +520,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, } f_params <- NA if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { - acu_sd <- stats::sd(acu_sorted) + acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] @@ -544,7 +543,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "Gamma" = lmom::cdfgam(data2, f_params), "PearsonIII" = lmom::cdfpe3(data2, f_params)) } - std_index_cv <- array(stats::qnorm(cdf_res), dim = dims) + std_index_cv <- array(qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] if (return_params) params_result[ff, nsd, ] <- f_params } @@ -565,14 +564,14 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, for (i in 1:length(data_cor2)) { bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data } - std_index <- stats::qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) dim(std_index) <- dimscor spei_mod[ff, , ] <- std_index } else { data2 <- data[ff, , ] dim(data2) <- dims if (!is.null(ref_start) && !is.null(ref_end)) { - data_fit <- stats::window(data2, ref_start, ref_end) + data_fit <- window(data2, ref_start, ref_end) } else { data_fit <- data2 } @@ -583,7 +582,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { - acu_sd <- stats::sd(acu_sorted) + acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { acu_sorted <- acu_sorted[acu_sorted > 0] @@ -601,7 +600,7 @@ PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, "Gamma" = lmom::cdfgam(data_cor2, f_params), "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) } - std_index_cv <- array(stats::qnorm(cdf_res), dim = dimscor) + std_index_cv <- array(qnorm(cdf_res), dim = dimscor) spei_mod[ff, , ] <- std_index_cv if (return_params) params_result[ff, , ] <- f_params } -- GitLab