From 95e6464a82a28d40b00fa55c5bf8e2582a32a7e7 Mon Sep 17 00:00:00 2001 From: allabres Date: Mon, 17 Apr 2023 20:13:07 +0200 Subject: [PATCH 01/87] first version of SPEI function, but it has several known issues --- R/PeriodSPEI.R | 509 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 509 insertions(+) create mode 100644 R/PeriodSPEI.R diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R new file mode 100644 index 0000000..d56916b --- /dev/null +++ b/R/PeriodSPEI.R @@ -0,0 +1,509 @@ +PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) + time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = NULL, + standardization = TRUE, + param_error = -9999, + handle_infinity = FALSE, + cross_validation = FALSE, + method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', + n_procs = 4){ + + # check provided data and parameters: + if (!is.null(pet) & !is.null(pet_method)){ + print ('WARNING: pet data is provided and also a pet estimation method, the provided pet data will be used and NOT estimated') + pet_method <- NULL + } + if (is.null(pet_method) & is.null(pet)){ + stop ('variable pet needs to be provided or a pet_method selected') + } + if ((pet_method == 'hargreaves' | pet_method == 'hargreaves_modified') & (is.null(tasmax) | is.null(tasmin))){ + stop (paste0('Need to provide tasmax and tasmin for the selected method ', pet_method)) + } + if (pet_method == 'thornthwaite' & is.null(tas)){ + stop (paste0('Need to provide tas for the selected method ', pet_method)) + } + print('WARNING: temperature needs to be in C and precipitation in mm/month') # there is no check + + # check if accumulation period is possible + if(accum > dim(prlr$data)[leadtime_dim][[1]]){ + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) + } + + # complete dates + dates <- prlr$attrs$Dates + dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly <- array(0, dim=length(dates_complete_monthly)) + for (dd in 1:length(dates)){ + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + dates_monthly[ii] <- 1 + } + + # Evapotranspiration estimation (unless pet is already provided) + if (is.null(pet)){ + if (pet_method == 'hargreaves'){ + data <- list(tasmax = tasmax$data, tasmin = tasmin$data, lat = prlr$coords$latitude) + } else if (pet_method == 'hargreaves_modified'){ + data <- list(tasmax = tasmax$data, tasmin = tasmin$data, prlr = prlr$data, lat = prlr$coords$latitude) + } else if (pet_method == 'thornthwaite'){ + data <- list(tas = tas$data, lat = prlr$coords$latitude) + } + pet <- evapotranspiration(data, dates_monthly, pet_method, time_dim, leadtime_dim, memb_dim, lon_dim, lat_dim, n_procs) + } + + # Accumulation + diff_P_PET <- prlr$data - pet + data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, leadtime_dim, memb_dim, n_procs) + + # Standardization: + if (standardization == TRUE){ + spei_dat <- spei_standardization(data_accum = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + n_procs = n_procs, + accum = accum, + param_error = param_error, + method = method, distribution = distribution, fit = fit) + } else { + spei_dat <- data_accum + } + + return(spei_dat) +} + + + +## functions + +evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', + lon_dim = 'longitude', lat_dim = 'latitude', + n_procs = 4){ + + lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + + # extract mask of NA locations to return to NA the final result + mask_NA <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves'){ + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + } + if (pet_method == 'hargreaves_modified'){ + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + mask_NA[which(is.na(data$prlr))] <- 0 + } + if (pet_method == 'thornthwaite'){ + mask_NA[which(is.na(tas$data))] <- 0 + } + + # replace NA with 0 + for (dd in 1:length(data)){ + data[[dd]][which(is.na(data[[dd]]))] <- 0 + } + + # prepare data + if (pet_method == 'hargreaves'){ + data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin) + targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) + } else if (pet_method == 'hargreaves_modified'){ + data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) + targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) + } else if (pet_method == 'thornthwaite'){ + data_input <- list(lat_mask = lat_mask, tas = data$tas) + targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) + } else { + stop (paste0('Unknown pet_method ', pet_method)) + } + + PET_estimated <- Apply(data = data_input, + target_dims = targetdim_input, + output_dims = c(leadtime_dim, time_dim), + pet_method = pet_method, + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dim_month = leadtime_dim, dim_year = time_dim, + fun = atomic_pet, ncores = n_procs) + + # reorder dims in PET_estimated + dims_order <- array(NA, length(dim(data[[1]]))) + for (ord in 1:length(dim(data[[1]]))){ + dims_order[ord] <- which(names(dim(PET_estimated$output1)) == names(dim(data[[1]]))[ord]) + } + data_pet <- aperm(PET_estimated$output1, dims_order) + + # restore original NAs from mask_NA + data_pet[which(mask_NA == 0 )] <- NA + + return(data_pet) + +} + +accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', n_procs = 4){ + + if(!time_dim %in% names(dim(diff_P_PET))){ + diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) + } + if(!leadtime_dim %in% names(dim(diff_P_PET))){ + diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) + } + + accum_result <- Apply(data = list(diff_P_PET), + target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + accum = accum, + output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), + dim_month = leadtime_dim, dim_year = time_dim, + fun = atomic_accum, ncores = n_procs) + + # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): + for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)){ + if(!d %in% names(dim(accum_result$output1))){ + accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) + } + } + + # reorder dims in accum_result + dims_order <- array(NA, length(dim(diff_P_PET))) + for (ord in 1:length(dim(diff_P_PET))){ + dims_order[ord] <- which(names(dim(accum_result$output1)) == names(dim(diff_P_PET))[ord]) + } + data_accum <- aperm(accum_result$output1, dims_order) + + return(data_accum) + +} + +spei_standardization <- function(data_accum, + leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, + n_procs, accum, param_error, + method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ + data_spei <- data_accum + + data_spei <- Apply(data = list(data_accum), + target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), + output_dims = c(leadtime_dim,time_dim, memb_dim), + dim_month = leadtime_dim, + dim_year = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = n_procs)$output1 + + return(data_spei) + +} + +### Atomic functions + +atomic_pet <- function(pet_method, dates_monthly, dim_month, dim_year, lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL){ + + ftime = as.numeric(dim(data2)[leadtime_dim]) + n_sdates = as.numeric(dim(data2)[time_dim]) + + # create a vector from data but adding 0 to achive complete time series of the considered period + #(starting in January of the first year) so that the solar radiation estimation is computed + # in each case for the correct month + if (!is.null(data2)){ + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data2[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data3)){ + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data3[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data4)){ + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data4[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (pet_method == 'hargreaves'){ + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = FALSE) + x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data + PET <- array(x_PET, dim = c(ftime,n_sdates)) + } + + if (pet_method == 'hargreaves_modified'){ # not sure this works properly + PET <- array(NA, dim = c(ftime,n_sdates)) + for(ns in 1:n_sdates){ + tmax_mod_temp <- data2[12*(ns-1)+(1:12)] #data2[,ns] + tmin_mod_temp <- data3[12*(ns-1)+(1:12)] #data3[,ns] + pre_mod_temp <- data4[12*(ns-1)+(1:12)] #data4[,ns] + + # Computation of PET + x_PET <- hargreaves(Tmin = tmin_mod_temp, Tmax = tmax_mod_temp, lat = lat_mask, Pre = pre_mod_temp, na.rm = FALSE) + PET[1:ftime,ns] <- x_PET #dim <- (lon lat ft sd memb) + } + } + + if (pet_method == 'thornthwaite'){ + x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) + x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data + PET <- array(x_PET, dim = c(ftime,n_sdates)) + } + + return(PET) + +} + +atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ + # data:[time, syear] + + ftime = as.numeric(dim(data)[dim_month]) + n_sdates = as.numeric(dim(data)[dim_year]) + + data_vector <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)){ + if (dates_monthly[dd] == 1){ + data_vector[dd] <- as.vector(data)[count] + count <- count + 1 + } + } + + # Accumulation at different timescales + data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data + accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form + if (accum > 1){ + accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data + } + + return(accum_result) + +} + +atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, + method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ + # data: [time, sdate, memb] + if (is.null(ref_period)){ + ref.start <- NULL + ref.end <- NULL + } else { + ref.start <- ref_period[[1]] + ref.end <- ref_period[[2]] + } + + if (all(is.na(data))) { + speiX <- array(NA, dim(data)) + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + speiX <- array(param_error, dim(data)) + } else { + speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, + method = method, distribution = distribution, fit = fit, + dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) + } + return(speiX) + +} + +### Support functions + +spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ + + # acc_data: [ftime, syear, nmemb] + + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + } + + ftime <- as.numeric(dim(acc_data)[dim_month]) + n_sdates <- as.numeric(dim(acc_data)[dim_year]) + nmemb <- as.numeric(dim(acc_data)[dim_memb]) + + spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) + names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + + for(ff in 1:ftime){ # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) + x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + spei_mod[ff,,] <- x_spei_mod + } + + return(spei_mod) +} + +# HandleInfinity: +spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, + ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { + + # data:[ftime = 1, syear, ensemble] + + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('SPEI can be only computed using the following approach: parametric or non-parametric') + } + + if (method == 'non-parametric'){ + + if (anyNA(data) && na.rm == FALSE) { + stop('Error: Data must not contain NAs') + } + + bp = matrix(0, length(data), 1) + for (i in 1:length(data)) { + bp[i,1] = sum(data[] <= data[i]); # Writes the rank of the data + } + + SPEI = qnorm((bp-0.44)/(length(data)+0.12)) + + return(SPEI) + + } else { + + std_index <- array(NA, c(n_sdates, nmemb)) + + if (anyNA(data) && na.rm==FALSE) { + stop('Error: Data must not contain NAs') + } + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + } + if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + } + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma'))) + ) + + dim_one <- length(coef) + + if (!is.null(params)) { + if (length(params)!=dim_one) { + stop(paste0('parameters array should have dimensions [', dim_one, ']')) + } + } + + # Select window if necessary + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data,ref.start,ref.end) + } else { + data.fit <- data + } + + if(cross_validation == "TRUE") { + loop_years <- n_sdates + } else { + loop_years <- 1 + } + + for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) + # Cumulative series (acu) + if (cross_validation == TRUE){ + acu <- as.vector(data.fit[,-nsd,]) + } else { + acu <- as.vector(data.fit) + } + + acu.sorted <- sort.default(acu, method = "quick") + acu.sorted <- acu.sorted[!is.na(acu.sorted)] # remove NAs (no need if(na.rm) because if there are NA and na.rm=F we don't get to this point) + if (length(acu.sorted)!=0){ # else all acu was NA and we don't need to continue with this case + acu_sd = sd(acu.sorted) + if (!is.na(acu_sd)){ + if (acu_sd != 0){ + if(distribution != "log-Logistic"){ + pze <- sum(acu==0)/length(acu) + acu.sorted = acu.sorted[acu.sorted > 0] + } + if (!is.null(params)) { + f_params = as.vector(params) + } else { + if (length(acu.sorted) >= 4){ # else coef will be NA + # Calculate probability weighted moments based on fit with lmomco or TLMoments + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), + pwm.ub(acu.sorted, nmom=3) + #TLMoments::PWM(acu.sorted, order=0:2) + ) + + # Check L-moments validity + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))){ + + # lmom fortran functions need specific inputs L1, L2, T3 + # this is handled by lmomco internally with lmorph + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + + # Calculate parameters based on distribution with lmom then lmomco + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), error = function(e){ parglo(lmom)$para }), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), error = function(e){ pargam(lmom)$para }), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){ parpe3(lmom)$para }) + ) + + # Adjust if user chose log-Logistic and max-lik + if(distribution == 'log-Logistic' && fit == 'max-lik'){ + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + } # end if dor the case the L-moments are not valid (std_index will be NA) + } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) + } # end estimation of f_param + # Calculate cdf based on distribution with lmom + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data, f_params), + "Gamma" = lmom::cdfgam(data, f_params), + "PearsonIII" = lmom::cdfpe3(data, f_params) + ) + + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) + + # Adjust if user chose Gamma or PearsonIII - Not tested: For future development + #if(distribution != 'log-Logistic'){ + # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point + #} + if (cross_validation == TRUE){ + std_index[nsd,] <- std_index_cv[nsd,] + } else { + std_index <- std_index_cv + } + } + } # end if for the case there is no variability + } # end if for the case all NA in acu + } # next year (in case of cross_validation or all done if cross_validation == F) + + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point + std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) + std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) + } + + } # end parametric + + return(std_index) # fitted is the only thing used after calling this function + +} + -- GitLab From bc82389d1aa70ce237850ddfeba1775fcf6af2ab Mon Sep 17 00:00:00 2001 From: allabres Date: Tue, 9 May 2023 15:37:03 +0200 Subject: [PATCH 02/87] SPEI working version --- R/PeriodSPEI.R | 246 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 194 insertions(+), 52 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index d56916b..43fdb56 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,12 +1,97 @@ -PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) + +## libraries + library(s2dv) # InsertDim, ... + library(multiApply) # Apply + library(ClimProjDiags) # Subset + library(zeallot) # multiple assignment %<-% + library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) + library(zoo) # rollapply, as.Date + library(TLMoments) # pwd (Calculate probability weighted moments based on fit) + library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) + library(lubridate) # year(), month(),... + +CST_PeriodSPEI <- function(exp, exp_cor = NULL, # lists of s2dv_cubes time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = NULL, + pet_method_exp = 'hargreaves', + pet_method_cor = 'hargreaves', standardization = TRUE, param_error = -9999, handle_infinity = FALSE, - cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', + cross_validation = FALSE, # forced to FALSE if exp_cor is NOT NULL + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + n_procs = 4){ + + # obtain variables from exp (which is a list of s2dv_cubes); some of them can be NULL (if they are NULL or not on the list) + tas_exp <- exp$tas + tasmax_exp <- exp$tasmax + tasmin_exp <- exp$tasmin + pet_exp <- exp$pet + prlr_exp <- exp$prlr + + if (!is.null(exp_cor)){ + cross_validation_call <- FALSE # forced to FALSE otherwise we cannot get the params needed to standardize exp_cor + } else { + cross_validation_call <- cross_validation + } + + # call PeriodSPEI for exp (and also obtain params of the standardization, in case standardization == TRUE; + # if it's FALSE, it's not a problem because we won't need to standardize exp_cor either) + spei_exp <- PeriodSPEI(tas = tas_exp, tasmax = tasmax_exp, tasmin = tasmin_exp, pet = pet_exp, prlr = prlr_exp, + # line above need to be s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates); prlr is always needed + time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lon_dim = lon_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method_exp, + standardization = standardization, + params = NULL, # not used if standardization is FALSE + param_error = param_error, # not used if standardization is FALSE + handle_infinity = handle_infinity, # not used if standardization is FALSE + cross_validation = cross_validation_call, # not used if standardization is FALSE + method = method, # not used if standardization is FALSE + distribution = distribution, # not used if standardization is FALSE or method is non-parametric + fit = fit, # not used if standardization is FALSE or method is non-parametric + n_procs = n_procs) + + # obtain variables from exp_cor (in case it is not NULL) + if (!is.null(exp_cor)){ + tas_cor <- exp_cor$tas + tasmax_cor <- exp_cor$tasmax + tasmin_cor <- exp_cor$tasmin + pet_cor <- exp_cor$pet + prlr_cor <- exp_cor$prlr + + # obtain spei of exp_cor with parameters of exp for the standardization + spei_cor <- PeriodSPEI(tas = tas_cor, tasmax = tasmax_cor, tasmin = tasmin_cor, pet = pet_cor, prlr = prlr_cor, + time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lon_dim = lon_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method_cor, + standardization = standardization, + params = spei_exp$params, # not used if standardization is FALSE + param_error = param_error, # not used if standardization is FALSE + handle_infinity = handle_infinity, # not used if standardization is FALSE + cross_validation = cross_validation_call, # not used if standardization is FALSE + method = method, # not used if standardization is FALSE + distribution = distribution, # not used if standardization is FALSE or method is non-parametric + fit = fit, # not used if standardization is FALSE or method is non-parametric + n_procs = n_procs) + + return(list(spei_exp = spei_exp, spei_cor = spei_cor)) + } else { + return(spei_exp) + } + +} + +PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, + time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = NULL, + standardization = TRUE, + params = NULL, # not used if standardization is FALSE + param_error = -9999, # not used if standardization is FALSE + handle_infinity = FALSE, # not used if standardization is FALSE + cross_validation = FALSE, # not used if standardization is FALSE + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', # not used if standardization is FALSE n_procs = 4){ # check provided data and parameters: @@ -27,12 +112,14 @@ PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prl # check if accumulation period is possible if(accum > dim(prlr$data)[leadtime_dim][[1]]){ - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', + dim(prlr$data)[leadtime_dim][[1]], ' months.')) } # complete dates dates <- prlr$attrs$Dates - dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) + dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), + 12, 31, sep='-'))) dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] dates_monthly <- array(0, dim=length(dates_complete_monthly)) @@ -68,7 +155,7 @@ PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prl n_procs = n_procs, accum = accum, param_error = param_error, - method = method, distribution = distribution, fit = fit) + params = params, method = method, distribution = distribution, fit = fit) } else { spei_dat <- data_accum } @@ -85,7 +172,8 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', lon_dim = 'longitude', lat_dim = 'latitude', n_procs = 4){ - lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'), pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + if(names(dim(lat_mask)[2]) == ""){names(dim(lat_mask))[2] <- 'latitude'} # extract mask of NA locations to return to NA the final result mask_NA <- array(1, dim = dim(data[[1]])) @@ -113,7 +201,8 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) } else if (pet_method == 'hargreaves_modified'){ data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) + targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), + tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) } else if (pet_method == 'thornthwaite'){ data_input <- list(lat_mask = lat_mask, tas = data$tas) targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) @@ -125,7 +214,7 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', target_dims = targetdim_input, output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to complete array of the full years dim_month = leadtime_dim, dim_year = time_dim, fun = atomic_pet, ncores = n_procs) @@ -154,7 +243,7 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to complete array of the full years accum = accum, output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), dim_month = leadtime_dim, dim_year = time_dim, @@ -181,24 +270,38 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear spei_standardization <- function(data_accum, leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, n_procs, accum, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - data_spei <- data_accum - - data_spei <- Apply(data = list(data_accum), - target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), - output_dims = c(leadtime_dim,time_dim, memb_dim), - dim_month = leadtime_dim, - dim_year = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = n_procs)$output1 + params = NULL, method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - return(data_spei) + n_leadtimes <- dim(data_accum)[leadtime_dim] + n_sdates_params <- dim(data_accum)[time_dim] + if (!cross_validation){ + n_sdates_params <- 1 + } + + if (is.null(params)){ + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data_accum, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), + dim_month = leadtime_dim, + dim_year = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = n_procs) + + return(spei) # spei is a list of data_spei and params } @@ -294,7 +397,9 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ # Accumulation at different timescales data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form if (accum > 1){ @@ -305,8 +410,8 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ } -atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ +atomic_spei <- function(data, params, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, + param_error, method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ # data: [time, sdate, memb] if (is.null(ref_period)){ ref.start <- NULL @@ -318,23 +423,30 @@ atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_ if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { - speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, - method = method, distribution = distribution, fit = fit, - dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, + method = method, distribution = distribution, fit = fit, + dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) } - return(speiX) + + return(list(speiX, params)) } ### Support functions -spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ - +spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ + # acc_data: [ftime, syear, nmemb] + # params: [syear, ftime, coef] + + n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions if (!(method %in% c('parametric', 'non-parametric'))) { stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') @@ -346,14 +458,31 @@ spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + if (cross_validation){ + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } - for(ff in 1:ftime){ # treat months independently + for(ff in 1:ftime){ # treat months independently Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) - x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = if(all(is.na(params))){NULL}else{params[,ff,]}, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, cross_validation = cross_validation) spei_mod[ff,,] <- x_spei_mod + coef_names <- names(params_ff) + if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[,ff,] <- params_ff } + + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(dim_year, dim_month, 'coef') - return(spei_mod) + return(list(spei = spei_mod, params = params_result)) } # HandleInfinity: @@ -379,7 +508,7 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm SPEI = qnorm((bp-0.44)/(length(data)+0.12)) - return(SPEI) + return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { @@ -421,6 +550,9 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm } else { loop_years <- 1 } + + params_result <- array(NA, dim = c(loop_years, dim_one)) + colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) @@ -442,6 +574,7 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm } if (!is.null(params)) { f_params = as.vector(params) + params_result[nsd,] <- f_params } else { if (length(acu.sorted) >= 4){ # else coef will be NA # Calculate probability weighted moments based on fit with lmomco or TLMoments @@ -470,16 +603,23 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm if(distribution == 'log-Logistic' && fit == 'max-lik'){ f_params = parglo.maxlik(acu.sorted, f_params)$para } + params_result[nsd,] <- f_params } # end if dor the case the L-moments are not valid (std_index will be NA) } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom - cdf_res = switch(distribution, + if (all(is.na(params_result[nsd,]))){ + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data, f_params), "Gamma" = lmom::cdfgam(data, f_params), "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - + ) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development @@ -490,20 +630,22 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm std_index[nsd,] <- std_index_cv[nsd,] } else { std_index <- std_index_cv - } + } } } # end if for the case there is no variability } # end if for the case all NA in acu } # next year (in case of cross_validation or all done if cross_validation == F) - + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - } # end parametric - return(std_index) # fitted is the only thing used after calling this function + return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + + } # end parametric } - -- GitLab From f4f75caec0005b4e9d5969b791416c39c03182ac Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 25 May 2023 17:20:51 +0200 Subject: [PATCH 03/87] Develop PeriodSPEI within the package csindicators from Alba's code --- R/PeriodSPEI.R | 740 +++++++++++++++++++++++++++++++------------------ 1 file changed, 475 insertions(+), 265 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index d56916b..1eef63d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,167 +1,334 @@ -PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) - time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = NULL, - standardization = TRUE, - param_error = -9999, - handle_infinity = FALSE, - cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', - n_procs = 4){ - - # check provided data and parameters: - if (!is.null(pet) & !is.null(pet_method)){ - print ('WARNING: pet data is provided and also a pet estimation method, the provided pet data will be used and NOT estimated') - pet_method <- NULL - } - if (is.null(pet_method) & is.null(pet)){ - stop ('variable pet needs to be provided or a pet_method selected') - } - if ((pet_method == 'hargreaves' | pet_method == 'hargreaves_modified') & (is.null(tasmax) | is.null(tasmin))){ - stop (paste0('Need to provide tasmax and tasmin for the selected method ', pet_method)) - } - if (pet_method == 'thornthwaite' & is.null(tas)){ - stop (paste0('Need to provide tas for the selected method ', pet_method)) - } - print('WARNING: temperature needs to be in C and precipitation in mm/month') # there is no check - # check if accumulation period is possible - if(accum > dim(prlr$data)[leadtime_dim][[1]]){ - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) - } +#------------------------------------------------------------------------------- +# To DO: Add documentation of same dimensions exp and exp_cor +# To DO: Add documentation for units +# TO DO: Write Documentation + +# library(s2dv) # InsertDim, ... +# library(multiApply) # Apply +# library(ClimProjDiags) # Subset +# library(zeallot) # multiple assignment %<-% +# library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) +# library(zoo) # rollapply, as.Date +# library(TLMoments) # pwd (Calculate probability weighted moments based on fit) +# library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) +# library(lubridate) # year(), month(),... + +# library(CSTools) +#------------------------------------------------------------------------------- +#'@param exp A list with the 's2dv_cube' object class containing the seasonal +#' forecast experiment in the data element. +#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile +#' PeriodSPEI should be applied. If it is not specified, the PeriodSPEI +#' is calculated from object 'exp'. +#'@param pet Multidimensional array containing the Potential EvapoTranspiration +#' data. If it is NULL it is calculated using pet_method. It is NULL by default. +#'@param time_dim +#'@param leadtime_dim +#'@param memb_dim +#'@param lat_dim +#--- other params from CSIndicators +#'@param accum +#'@param start +#'@param end +#' +# --- other params +#'@param pet_method +#'@param standardization +#'@param params +#'@param param_error +#'@param handle_infinity +#'@param cross_validation +#'@param method +#'@param distribution +#'@param fit +#'@param ncores +CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, # not used if standardization is FALSE + param_error = -9999, # not used if standardization is FALSE + handle_infinity = FALSE, # not used if standardization is FALSE + cross_validation = FALSE, # not used if standardization is FALSE + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', # not used if standardization is FALSE + ncores = 4) { + + #----------------------------------------------------------------------------- + # Part (1): Initial structure checks + # coordinates + .KnownLatNames <- CSTools:::.KnownLatNames() + + if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { + stop("Spatial coordinate names of parameter 'obsL' do not match any ", + "of the names accepted by the package.") + } + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] + + #----------------------------------------------------------------------------- + # Part (2): Call to PeriodSPEI + + res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), + dates = exp[[1]]$attrs$Dates, + lat = exp[[1]]$coords[[lat_name]], + exp_cor = lapply(exp_cor, function(x) x$data), + pet = pet, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method, standardization = standardization, + params = params, param_error = param_error, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, + fit = fit, ncores = ncores) + return(res) - # complete dates - dates <- prlr$attrs$Dates - dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim=length(dates_complete_monthly)) - for (dd in 1:length(dates)){ - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) - dates_monthly[ii] <- 1 - } +} - # Evapotranspiration estimation (unless pet is already provided) - if (is.null(pet)){ - if (pet_method == 'hargreaves'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, lat = prlr$coords$latitude) - } else if (pet_method == 'hargreaves_modified'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, prlr = prlr$data, lat = prlr$coords$latitude) - } else if (pet_method == 'thornthwaite'){ - data <- list(tas = tas$data, lat = prlr$coords$latitude) - } - pet <- evapotranspiration(data, dates_monthly, pet_method, time_dim, leadtime_dim, memb_dim, lon_dim, lat_dim, n_procs) - } - # Accumulation - diff_P_PET <- prlr$data - pet - data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, leadtime_dim, memb_dim, n_procs) - - # Standardization: - if (standardization == TRUE){ - spei_dat <- spei_standardization(data_accum = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - n_procs = n_procs, - accum = accum, - param_error = param_error, - method = method, distribution = distribution, fit = fit) - } else { - spei_dat <- data_accum - } - return(spei_dat) +PeriodSPEI <- function(exp, dates, lat, + exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, param_error = -9999, + handle_infinity = FALSE, cross_validation = FALSE, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', ncores = 4) { + + #----------------------------------------------------------------------------- + + # Part (1): Initial checks + + ## pet and pet_method + if (is.null(pet_method) & is.null(pet)) { + warning("Parameter 'pet_method' cannot be NULL if pet is not provided.") + } + if (!is.null(exp_cor)) { + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) + } + if (length(exp_cor) < 1) { + exp_cor <- NULL + } + } else { + if (lenght(pet_method) > 1) { + warning("Parameter 'pet_method' is of length 2, only first value will be used.") + pet_method <- pet_method[1] + } + } + ## exp + ## TO DO: Check order: tasmax, tasmin, prlr + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves_modified' or 'hargreaves'.") + pet_method[1] <- 'hargreaves_modified' + } + } else if (all(c('tasmin', 'tasmax') %in% names(exp))) { + # hargreaves: 'tasmin', 'tasmax' and 'lat' + if (pet_method != c('hargreaves')) { + warning("Parameter 'pet_method' needs to be 'hargreaves'.") + pet_method[1] <- 'hargreaves' + } + + } else if ('tas' %in% names(exp)) { + # thornthwaite: 'tas' (mean), 'lat' + pet_method[1] <- 'thornthwaite' + + } else { + stop("Parameter 'exp' needs to be a list with the needed variables.") + } + + # warning(' needs to be in C and precipitation in mm/month') # there is no check + + # check if accumulation period is possible + if (accum > dim(exp[[1]])[leadtime_dim]) { + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', + dim(exp[[1]])[leadtime_dim], ' months.')) + } + # method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + } + # distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + } + # fit + if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + } + + #----------------------------------------------------------------------------- + # Part (2): preparation + + # complete dates + ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) + end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) + dates_complete_daily <- as.Date(ini_date:end_date) + + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly <- array(0, dim=length(dates_complete_monthly)) + for (dd in 1:length(dates)) { + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + dates_monthly[ii] <- 1 + } + + #----------------------------------------------------------------------------- + # Part (3): Compute PeriodSPEI + k = 0 + spei_res <- NULL + computed_pet <- FALSE + + for (data in .return2list(exp, exp_cor)) { + k = k + 1 + # Evapotranspiration estimation (unless pet is already provided) + if (is.null(pet) | computed_pet) { + pet <- evapotranspiration(data, dates_monthly, pet_method = pet_method[k], time_dim, + leadtime_dim, memb_dim, lat_dim, ncores) + computed_pet <- TRUE + } + # Accumulation + diff_P_PET <- data$prlr - pet + data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, + leadtime_dim, memb_dim, ncores) + # Standardization: + if (standardization == TRUE) { + spei_dat <- spei_standardization(data_accum = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + ncores = ncores, + accum = accum, + param_error = param_error, + params = params, method = method, + distribution = distribution, fit = fit) + params <- spei_dat$params + } else { + spei_dat <- data_accum + } + spei_res[[k]] <- spei_dat[[1]] + } + + if (standardization) { + spei_res[[k+1]] <- params + if (is.null(exp_cor)) { + names(spei_res) <- c('exp', 'params') + } else { + names(spei_res) <- c('exp', 'exp_cor', 'params') + } + } else { + if (is.null(exp_cor)) { + names(spei_res) <- c('exp') + } else { + names(spei_res) <- c('exp', 'exp_cor') + } + } + + return(spei_res) + } - -## functions +.return2list <- function(data1, data2 = NULL) { + if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } +} + + + evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - lon_dim = 'longitude', lat_dim = 'latitude', - n_procs = 4){ + lat_dim = 'latitude', + ncores = 4) { - lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + ## corrected:: + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) # extract mask of NA locations to return to NA the final result - mask_NA <- array(1, dim = dim(data[[1]])) - if (pet_method == 'hargreaves'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - } - if (pet_method == 'hargreaves_modified'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - mask_NA[which(is.na(data$prlr))] <- 0 - } - if (pet_method == 'thornthwaite'){ - mask_NA[which(is.na(tas$data))] <- 0 - } - + + mask_NA <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + mask_NA[which(is.na(data$prlr))] <- 0 + } else if (pet_method == 'thornthwaite') { + mask_NA[which(is.na(data$tas))] <- 0 + } + # replace NA with 0 - for (dd in 1:length(data)){ + for (dd in 1:length(data)) { data[[dd]][which(is.na(data[[dd]]))] <- 0 } # prepare data - if (pet_method == 'hargreaves'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) - } else if (pet_method == 'hargreaves_modified'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) - } else if (pet_method == 'thornthwaite'){ - data_input <- list(lat_mask = lat_mask, tas = data$tas) - targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) - } else { - stop (paste0('Unknown pet_method ', pet_method)) - } - - PET_estimated <- Apply(data = data_input, - target_dims = targetdim_input, + target_dims_data <- lapply(data, function(x) rep(c(leadtime_dim, time_dim), 1)) + # file <- tempfile() + # file = 'out.txt' + # sink(file) + PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_pet, ncores = n_procs) - + dates_monthly = dates_monthly, + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_pet, ncores = ncores)$output1 + # sink(NULL) + # captured_message <- readLines(file) + # print(paste0('Length captured!!', length(captured_message))) + # print(paste0('unique(captured_message)', length(unique(captured_message)))) + # reorder dims in PET_estimated - dims_order <- array(NA, length(dim(data[[1]]))) - for (ord in 1:length(dim(data[[1]]))){ - dims_order[ord] <- which(names(dim(PET_estimated$output1)) == names(dim(data[[1]]))[ord]) - } - data_pet <- aperm(PET_estimated$output1, dims_order) + pos <- match(names(dim(data[[1]])),names(dim(PET_estimated))) + PET_estimated <- aperm(PET_estimated, pos) # restore original NAs from mask_NA - data_pet[which(mask_NA == 0 )] <- NA + PET_estimated[which(mask_NA == 0 )] <- NA - return(data_pet) + return(PET_estimated) } -accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', n_procs = 4){ +accumulation <- function(diff_P_PET, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', ncores = 4) { - if(!time_dim %in% names(dim(diff_P_PET))){ + if (!time_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) } - if(!leadtime_dim %in% names(dim(diff_P_PET))){ + if (!leadtime_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) } accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, accum = accum, output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_accum, ncores = n_procs) + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_accum, ncores = ncores) # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): - for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)){ + for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)) { if(!d %in% names(dim(accum_result$output1))){ accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) } @@ -179,113 +346,126 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear } spei_standardization <- function(data_accum, - leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, - n_procs, accum, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - data_spei <- data_accum - - data_spei <- Apply(data = list(data_accum), - target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), - output_dims = c(leadtime_dim,time_dim, memb_dim), - dim_month = leadtime_dim, - dim_year = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = n_procs)$output1 - - return(data_spei) + leadtime_dim, time_dim, memb_dim, handle_infinity, + cross_validation, ncores, accum, param_error, + params = NULL, method ='parametric', + distribution = 'log-Logistic', fit='ub-pwm') { -} + n_leadtimes <- dim(data_accum)[leadtime_dim] + n_sdates_params <- dim(data_accum)[time_dim] + + if (!cross_validation) { + n_sdates_params <- 1 + } -### Atomic functions + if (is.null(params)) { + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data_accum, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + leadtime_dim = leadtime_dim, + time_dim = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = ncores) + + return(spei) # spei is a list of data_spei and params +} -atomic_pet <- function(pet_method, dates_monthly, dim_month, dim_year, lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL){ +atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, + pet_method, dates_monthly, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data2)[leadtime_dim]) - n_sdates = as.numeric(dim(data2)[time_dim]) - - # create a vector from data but adding 0 to achive complete time series of the considered period - #(starting in January of the first year) so that the solar radiation estimation is computed - # in each case for the correct month - if (!is.null(data2)){ - data_tmp <- as.vector(data2) - data2 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data2[dd] <- data_tmp[count] - count <- count + 1 - } + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data3)){ - data_tmp <- as.vector(data3) - data3 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data3[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data4)){ - data_tmp <- as.vector(data4) - data4 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data4[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (pet_method == 'hargreaves'){ - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = FALSE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + # NOTE EVA: if NA.RM is FALSE this gives error + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = TRUE) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'hargreaves_modified'){ # not sure this works properly - PET <- array(NA, dim = c(ftime,n_sdates)) - for(ns in 1:n_sdates){ - tmax_mod_temp <- data2[12*(ns-1)+(1:12)] #data2[,ns] - tmin_mod_temp <- data3[12*(ns-1)+(1:12)] #data3[,ns] - pre_mod_temp <- data4[12*(ns-1)+(1:12)] #data4[,ns] - - # Computation of PET - x_PET <- hargreaves(Tmin = tmin_mod_temp, Tmax = tmax_mod_temp, lat = lat_mask, Pre = pre_mod_temp, na.rm = FALSE) - PET[1:ftime,ns] <- x_PET #dim <- (lon lat ft sd memb) - } + if (pet_method == 'hargreaves_modified') { + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE) + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'thornthwaite'){ + if (pet_method == 'thornthwaite') { x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - return(PET) - } -atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ - # data:[time, syear] +atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data)[dim_month]) - n_sdates = as.numeric(dim(data)[dim_year]) + # data:[time, syear] + ftime <- dim(data)[1] + n_sdates <- dim(data)[2] data_vector <- array(0, dim = length(dates_monthly)) count <- 1 - for (dd in 1:length(dates_monthly)){ + for (dd in 1:length(dates_monthly)) { if (dates_monthly[dd] == 1){ data_vector[dd] <- as.vector(data)[count] count <- count + 1 @@ -294,21 +474,23 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ # Accumulation at different timescales data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form - if (accum > 1){ + if (accum > 1) { accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data } return(accum_result) } - -atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ +atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, + handle_infinity, cross_validation, param_error, method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm') { # data: [time, sdate, memb] - if (is.null(ref_period)){ + if (is.null(ref_period)) { ref.start <- NULL ref.end <- NULL } else { @@ -318,55 +500,76 @@ atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_ if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { - speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, - method = method, distribution = distribution, fit = fit, - dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, + method = method, distribution = distribution, fit = fit, + leadtime_dim = leadtime_dim, time_dim = time_dim, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) } - return(speiX) + + return(list(speiX, params)) } +#------------------------------------------------------------------------------- + ### Support functions -spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ - +spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + leadtime_dim, time_dim, dim_memb, handle_infinity, cross_validation){ + # acc_data: [ftime, syear, nmemb] + # params: [syear, ftime, coef] - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') - } - - ftime <- as.numeric(dim(acc_data)[dim_month]) - n_sdates <- as.numeric(dim(acc_data)[dim_year]) + n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions + + ftime <- as.numeric(dim(acc_data)[leadtime_dim]) + n_sdates <- as.numeric(dim(acc_data)[time_dim]) nmemb <- as.numeric(dim(acc_data)[dim_memb]) spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, dim_memb) + if (cross_validation) { + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } - for(ff in 1:ftime){ # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) - x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + for (ff in 1:ftime) { # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = leadtime_dim, indices = ff, drop = FALSE) + params_tmp <- if (all(is.na(params))) {NULL} else {params[,ff,]} + c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, cross_validation = cross_validation) spei_mod[ff,,] <- x_spei_mod + coef_names <- names(params_ff) + if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[,ff,] <- params_ff } + + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - return(spei_mod) + return(list(spei = spei_mod, params = params_result)) } +#------------------------------------------------------------------------------- + # HandleInfinity: spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { # data:[ftime = 1, syear, ensemble] - - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('SPEI can be only computed using the following approach: parametric or non-parametric') - } - if (method == 'non-parametric'){ + if (method == 'non-parametric') { if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') @@ -379,48 +582,45 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm SPEI = qnorm((bp-0.44)/(length(data)+0.12)) - return(SPEI) + return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { std_index <- array(NA, c(n_sdates, nmemb)) - if (anyNA(data) && na.rm==FALSE) { + if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') } - if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { - stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') - } - if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') - } + coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), - "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma'))) - ) + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma')))) dim_one <- length(coef) if (!is.null(params)) { - if (length(params)!=dim_one) { + if (length(params) != dim_one) { stop(paste0('parameters array should have dimensions [', dim_one, ']')) } } # Select window if necessary - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data,ref.start,ref.end) - } else { - data.fit <- data - } + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data,ref.start,ref.end) + } else { + data.fit <- data + } - if(cross_validation == "TRUE") { + if (cross_validation == "TRUE") { loop_years <- n_sdates } else { loop_years <- 1 } + + params_result <- array(NA, dim = c(loop_years, dim_one)) + colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) @@ -436,12 +636,13 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm acu_sd = sd(acu.sorted) if (!is.na(acu_sd)){ if (acu_sd != 0){ - if(distribution != "log-Logistic"){ + if(distribution != "log-Logistic") { pze <- sum(acu==0)/length(acu) acu.sorted = acu.sorted[acu.sorted > 0] } if (!is.null(params)) { f_params = as.vector(params) + params_result[nsd,] <- f_params } else { if (length(acu.sorted) >= 4){ # else coef will be NA # Calculate probability weighted moments based on fit with lmomco or TLMoments @@ -470,16 +671,23 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm if(distribution == 'log-Logistic' && fit == 'max-lik'){ f_params = parglo.maxlik(acu.sorted, f_params)$para } + params_result[nsd,] <- f_params } # end if dor the case the L-moments are not valid (std_index will be NA) } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom - cdf_res = switch(distribution, + if (all(is.na(params_result[nsd,]))){ + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data, f_params), "Gamma" = lmom::cdfgam(data, f_params), "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - + ) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development @@ -490,20 +698,22 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm std_index[nsd,] <- std_index_cv[nsd,] } else { std_index <- std_index_cv - } + } } } # end if for the case there is no variability } # end if for the case all NA in acu } # next year (in case of cross_validation or all done if cross_validation == F) - + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - } # end parametric - return(std_index) # fitted is the only thing used after calling this function + return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + + } # end parametric } - -- GitLab From 4875863e48e8041e167508182d91ef3f744f7a31 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 25 May 2023 17:23:17 +0200 Subject: [PATCH 04/87] Reapply same changes as before resolving the conflicts --- R/PeriodSPEI.R | 740 +++++++++++++++++++++++++++++++------------------ 1 file changed, 475 insertions(+), 265 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index d56916b..1eef63d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,167 +1,334 @@ -PeriodSPEI <- function(tas = NULL, tasmax = NULL, tasmin = NULL, pet = NULL, prlr, # s2dv_cubes (with coordinates lat = prlr$coords$latitude and Dates) - time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lon_dim = 'longitude', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = NULL, - standardization = TRUE, - param_error = -9999, - handle_infinity = FALSE, - cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', fit='ub-pwm', - n_procs = 4){ - - # check provided data and parameters: - if (!is.null(pet) & !is.null(pet_method)){ - print ('WARNING: pet data is provided and also a pet estimation method, the provided pet data will be used and NOT estimated') - pet_method <- NULL - } - if (is.null(pet_method) & is.null(pet)){ - stop ('variable pet needs to be provided or a pet_method selected') - } - if ((pet_method == 'hargreaves' | pet_method == 'hargreaves_modified') & (is.null(tasmax) | is.null(tasmin))){ - stop (paste0('Need to provide tasmax and tasmin for the selected method ', pet_method)) - } - if (pet_method == 'thornthwaite' & is.null(tas)){ - stop (paste0('Need to provide tas for the selected method ', pet_method)) - } - print('WARNING: temperature needs to be in C and precipitation in mm/month') # there is no check - # check if accumulation period is possible - if(accum > dim(prlr$data)[leadtime_dim][[1]]){ - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(prlr$data)[leadtime_dim][[1]], ' months.')) - } +#------------------------------------------------------------------------------- +# To DO: Add documentation of same dimensions exp and exp_cor +# To DO: Add documentation for units +# TO DO: Write Documentation + +# library(s2dv) # InsertDim, ... +# library(multiApply) # Apply +# library(ClimProjDiags) # Subset +# library(zeallot) # multiple assignment %<-% +# library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) +# library(zoo) # rollapply, as.Date +# library(TLMoments) # pwd (Calculate probability weighted moments based on fit) +# library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) +# library(lubridate) # year(), month(),... + +# library(CSTools) +#------------------------------------------------------------------------------- +#'@param exp A list with the 's2dv_cube' object class containing the seasonal +#' forecast experiment in the data element. +#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile +#' PeriodSPEI should be applied. If it is not specified, the PeriodSPEI +#' is calculated from object 'exp'. +#'@param pet Multidimensional array containing the Potential EvapoTranspiration +#' data. If it is NULL it is calculated using pet_method. It is NULL by default. +#'@param time_dim +#'@param leadtime_dim +#'@param memb_dim +#'@param lat_dim +#--- other params from CSIndicators +#'@param accum +#'@param start +#'@param end +#' +# --- other params +#'@param pet_method +#'@param standardization +#'@param params +#'@param param_error +#'@param handle_infinity +#'@param cross_validation +#'@param method +#'@param distribution +#'@param fit +#'@param ncores +CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, # not used if standardization is FALSE + param_error = -9999, # not used if standardization is FALSE + handle_infinity = FALSE, # not used if standardization is FALSE + cross_validation = FALSE, # not used if standardization is FALSE + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', # not used if standardization is FALSE + ncores = 4) { + + #----------------------------------------------------------------------------- + # Part (1): Initial structure checks + # coordinates + .KnownLatNames <- CSTools:::.KnownLatNames() + + if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { + stop("Spatial coordinate names of parameter 'obsL' do not match any ", + "of the names accepted by the package.") + } + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] + + #----------------------------------------------------------------------------- + # Part (2): Call to PeriodSPEI + + res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), + dates = exp[[1]]$attrs$Dates, + lat = exp[[1]]$coords[[lat_name]], + exp_cor = lapply(exp_cor, function(x) x$data), + pet = pet, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, lat_dim = lat_dim, + accum = accum, start = start, end = end, + pet_method = pet_method, standardization = standardization, + params = params, param_error = param_error, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, + fit = fit, ncores = ncores) + return(res) - # complete dates - dates <- prlr$attrs$Dates - dates_complete_daily <- as.Date(as.Date(paste(lubridate::year(min(dates)), 01, 01, sep='-')):as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-'))) - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim=length(dates_complete_monthly)) - for (dd in 1:length(dates)){ - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) - dates_monthly[ii] <- 1 - } +} - # Evapotranspiration estimation (unless pet is already provided) - if (is.null(pet)){ - if (pet_method == 'hargreaves'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, lat = prlr$coords$latitude) - } else if (pet_method == 'hargreaves_modified'){ - data <- list(tasmax = tasmax$data, tasmin = tasmin$data, prlr = prlr$data, lat = prlr$coords$latitude) - } else if (pet_method == 'thornthwaite'){ - data <- list(tas = tas$data, lat = prlr$coords$latitude) - } - pet <- evapotranspiration(data, dates_monthly, pet_method, time_dim, leadtime_dim, memb_dim, lon_dim, lat_dim, n_procs) - } - # Accumulation - diff_P_PET <- prlr$data - pet - data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, leadtime_dim, memb_dim, n_procs) - - # Standardization: - if (standardization == TRUE){ - spei_dat <- spei_standardization(data_accum = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - n_procs = n_procs, - accum = accum, - param_error = param_error, - method = method, distribution = distribution, fit = fit) - } else { - spei_dat <- data_accum - } - return(spei_dat) +PeriodSPEI <- function(exp, dates, lat, + exp_cor = NULL, pet = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', lat_dim = 'latitude', + accum = 1, start = NULL, end = NULL, + pet_method = c('hargreaves', 'hargreaves'), + standardization = TRUE, + params = NULL, param_error = -9999, + handle_infinity = FALSE, cross_validation = FALSE, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', ncores = 4) { + + #----------------------------------------------------------------------------- + + # Part (1): Initial checks + + ## pet and pet_method + if (is.null(pet_method) & is.null(pet)) { + warning("Parameter 'pet_method' cannot be NULL if pet is not provided.") + } + if (!is.null(exp_cor)) { + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) + } + if (length(exp_cor) < 1) { + exp_cor <- NULL + } + } else { + if (lenght(pet_method) > 1) { + warning("Parameter 'pet_method' is of length 2, only first value will be used.") + pet_method <- pet_method[1] + } + } + ## exp + ## TO DO: Check order: tasmax, tasmin, prlr + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves_modified' or 'hargreaves'.") + pet_method[1] <- 'hargreaves_modified' + } + } else if (all(c('tasmin', 'tasmax') %in% names(exp))) { + # hargreaves: 'tasmin', 'tasmax' and 'lat' + if (pet_method != c('hargreaves')) { + warning("Parameter 'pet_method' needs to be 'hargreaves'.") + pet_method[1] <- 'hargreaves' + } + + } else if ('tas' %in% names(exp)) { + # thornthwaite: 'tas' (mean), 'lat' + pet_method[1] <- 'thornthwaite' + + } else { + stop("Parameter 'exp' needs to be a list with the needed variables.") + } + + # warning(' needs to be in C and precipitation in mm/month') # there is no check + + # check if accumulation period is possible + if (accum > dim(exp[[1]])[leadtime_dim]) { + stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', + dim(exp[[1]])[leadtime_dim], ' months.')) + } + # method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + } + # distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + } + # fit + if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + } + + #----------------------------------------------------------------------------- + # Part (2): preparation + + # complete dates + ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) + end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) + dates_complete_daily <- as.Date(ini_date:end_date) + + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly <- array(0, dim=length(dates_complete_monthly)) + for (dd in 1:length(dates)) { + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + dates_monthly[ii] <- 1 + } + + #----------------------------------------------------------------------------- + # Part (3): Compute PeriodSPEI + k = 0 + spei_res <- NULL + computed_pet <- FALSE + + for (data in .return2list(exp, exp_cor)) { + k = k + 1 + # Evapotranspiration estimation (unless pet is already provided) + if (is.null(pet) | computed_pet) { + pet <- evapotranspiration(data, dates_monthly, pet_method = pet_method[k], time_dim, + leadtime_dim, memb_dim, lat_dim, ncores) + computed_pet <- TRUE + } + # Accumulation + diff_P_PET <- data$prlr - pet + data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, + leadtime_dim, memb_dim, ncores) + # Standardization: + if (standardization == TRUE) { + spei_dat <- spei_standardization(data_accum = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + ncores = ncores, + accum = accum, + param_error = param_error, + params = params, method = method, + distribution = distribution, fit = fit) + params <- spei_dat$params + } else { + spei_dat <- data_accum + } + spei_res[[k]] <- spei_dat[[1]] + } + + if (standardization) { + spei_res[[k+1]] <- params + if (is.null(exp_cor)) { + names(spei_res) <- c('exp', 'params') + } else { + names(spei_res) <- c('exp', 'exp_cor', 'params') + } + } else { + if (is.null(exp_cor)) { + names(spei_res) <- c('exp') + } else { + names(spei_res) <- c('exp', 'exp_cor') + } + } + + return(spei_res) + } - -## functions +.return2list <- function(data1, data2 = NULL) { + if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } +} + + + evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - lon_dim = 'longitude', lat_dim = 'latitude', - n_procs = 4){ + lat_dim = 'latitude', + ncores = 4) { - lat_mask <- InsertDim(InsertDim(data$lat,pos = 1, len = 1, name = 'dat'),pos = 3, len = dim(data[[1]])[lon_dim], name = lon_dim) + ## corrected:: + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) # extract mask of NA locations to return to NA the final result - mask_NA <- array(1, dim = dim(data[[1]])) - if (pet_method == 'hargreaves'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - } - if (pet_method == 'hargreaves_modified'){ - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - mask_NA[which(is.na(data$prlr))] <- 0 - } - if (pet_method == 'thornthwaite'){ - mask_NA[which(is.na(tas$data))] <- 0 - } - + + mask_NA <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + mask_NA[which(is.na(data$tasmax))] <- 0 + mask_NA[which(is.na(data$tasmin))] <- 0 + mask_NA[which(is.na(data$prlr))] <- 0 + } else if (pet_method == 'thornthwaite') { + mask_NA[which(is.na(data$tas))] <- 0 + } + # replace NA with 0 - for (dd in 1:length(data)){ + for (dd in 1:length(data)) { data[[dd]][which(is.na(data[[dd]]))] <- 0 } # prepare data - if (pet_method == 'hargreaves'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim)) - } else if (pet_method == 'hargreaves_modified'){ - data_input <- list(lat_mask = lat_mask, tasmax = data$tasmax, tasmin = data$tasmin, prlr = data$prlr) - targetdim_input <- list(lat_mask = c('dat'), tasmax = c(leadtime_dim, time_dim), tasmin = c(leadtime_dim, time_dim), prlr = c(leadtime_dim, time_dim)) - } else if (pet_method == 'thornthwaite'){ - data_input <- list(lat_mask = lat_mask, tas = data$tas) - targetdim_input <- list(lat_mask = c('dat'), tas = c(leadtime_dim, time_dim)) - } else { - stop (paste0('Unknown pet_method ', pet_method)) - } - - PET_estimated <- Apply(data = data_input, - target_dims = targetdim_input, + target_dims_data <- lapply(data, function(x) rep(c(leadtime_dim, time_dim), 1)) + # file <- tempfile() + # file = 'out.txt' + # sink(file) + PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_pet, ncores = n_procs) - + dates_monthly = dates_monthly, + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_pet, ncores = ncores)$output1 + # sink(NULL) + # captured_message <- readLines(file) + # print(paste0('Length captured!!', length(captured_message))) + # print(paste0('unique(captured_message)', length(unique(captured_message)))) + # reorder dims in PET_estimated - dims_order <- array(NA, length(dim(data[[1]]))) - for (ord in 1:length(dim(data[[1]]))){ - dims_order[ord] <- which(names(dim(PET_estimated$output1)) == names(dim(data[[1]]))[ord]) - } - data_pet <- aperm(PET_estimated$output1, dims_order) + pos <- match(names(dim(data[[1]])),names(dim(PET_estimated))) + PET_estimated <- aperm(PET_estimated, pos) # restore original NAs from mask_NA - data_pet[which(mask_NA == 0 )] <- NA + PET_estimated[which(mask_NA == 0 )] <- NA - return(data_pet) + return(PET_estimated) } -accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', n_procs = 4){ +accumulation <- function(diff_P_PET, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', ncores = 4) { - if(!time_dim %in% names(dim(diff_P_PET))){ + if (!time_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) } - if(!leadtime_dim %in% names(dim(diff_P_PET))){ + if (!leadtime_dim %in% names(dim(diff_P_PET))) { diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) } accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, # array of 0 and 1 used to build the data_input to a complete array of the full years that are considered + dates_monthly = dates_monthly, accum = accum, output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), - dim_month = leadtime_dim, dim_year = time_dim, - fun = atomic_accum, ncores = n_procs) + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_accum, ncores = ncores) # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): - for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)){ + for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)) { if(!d %in% names(dim(accum_result$output1))){ accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) } @@ -179,113 +346,126 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear } spei_standardization <- function(data_accum, - leadtime_dim, time_dim, memb_dim, handle_infinity, cross_validation, - n_procs, accum, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ - data_spei <- data_accum - - data_spei <- Apply(data = list(data_accum), - target_dims = list(data = c(leadtime_dim,time_dim, memb_dim)), - output_dims = c(leadtime_dim,time_dim, memb_dim), - dim_month = leadtime_dim, - dim_year = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = n_procs)$output1 - - return(data_spei) + leadtime_dim, time_dim, memb_dim, handle_infinity, + cross_validation, ncores, accum, param_error, + params = NULL, method ='parametric', + distribution = 'log-Logistic', fit='ub-pwm') { -} + n_leadtimes <- dim(data_accum)[leadtime_dim] + n_sdates_params <- dim(data_accum)[time_dim] + + if (!cross_validation) { + n_sdates_params <- 1 + } -### Atomic functions + if (is.null(params)) { + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data_accum, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + leadtime_dim = leadtime_dim, + time_dim = time_dim, + dim_memb = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = NULL, + param_error = param_error, + fun = atomic_spei, + ncores = ncores) + + return(spei) # spei is a list of data_spei and params +} -atomic_pet <- function(pet_method, dates_monthly, dim_month, dim_year, lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL){ +atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, + pet_method, dates_monthly, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data2)[leadtime_dim]) - n_sdates = as.numeric(dim(data2)[time_dim]) - - # create a vector from data but adding 0 to achive complete time series of the considered period - #(starting in January of the first year) so that the solar radiation estimation is computed - # in each case for the correct month - if (!is.null(data2)){ - data_tmp <- as.vector(data2) - data2 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data2[dd] <- data_tmp[count] - count <- count + 1 - } + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data3)){ - data_tmp <- as.vector(data3) - data3 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data3[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (!is.null(data4)){ - data_tmp <- as.vector(data4) - data4 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)){ - if (dates_monthly[dd] == 1){ - data4[dd] <- data_tmp[count] - count <- count + 1 - } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 } - rm(data_tmp) } - if (pet_method == 'hargreaves'){ - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = FALSE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + # NOTE EVA: if NA.RM is FALSE this gives error + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = TRUE) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'hargreaves_modified'){ # not sure this works properly - PET <- array(NA, dim = c(ftime,n_sdates)) - for(ns in 1:n_sdates){ - tmax_mod_temp <- data2[12*(ns-1)+(1:12)] #data2[,ns] - tmin_mod_temp <- data3[12*(ns-1)+(1:12)] #data3[,ns] - pre_mod_temp <- data4[12*(ns-1)+(1:12)] #data4[,ns] - - # Computation of PET - x_PET <- hargreaves(Tmin = tmin_mod_temp, Tmax = tmax_mod_temp, lat = lat_mask, Pre = pre_mod_temp, na.rm = FALSE) - PET[1:ftime,ns] <- x_PET #dim <- (lon lat ft sd memb) - } + if (pet_method == 'hargreaves_modified') { + x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE) + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - if (pet_method == 'thornthwaite'){ + if (pet_method == 'thornthwaite') { x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) - x_PET <- x_PET[which(dates_monthly == 1)] # line to return the vector to the size of the actual original data - PET <- array(x_PET, dim = c(ftime,n_sdates)) + # line to return the vector to the size of the actual original data + x_PET <- x_PET[which(dates_monthly == 1)] + PET <- array(x_PET, dim = dims) } - return(PET) - } -atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ - # data:[time, syear] +atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { - ftime = as.numeric(dim(data)[dim_month]) - n_sdates = as.numeric(dim(data)[dim_year]) + # data:[time, syear] + ftime <- dim(data)[1] + n_sdates <- dim(data)[2] data_vector <- array(0, dim = length(dates_monthly)) count <- 1 - for (dd in 1:length(dates_monthly)){ + for (dd in 1:length(dates_monthly)) { if (dates_monthly[dd] == 1){ data_vector[dd] <- as.vector(data)[count] count <- count + 1 @@ -294,21 +474,23 @@ atomic_accum <- function(data, dates_monthly, accum, dim_month, dim_year){ # Accumulation at different timescales data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added (depends on accu) and so that the position in the vector corresponds to the accumulated of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form - if (accum > 1){ + if (accum > 1) { accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data } return(accum_result) } - -atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_infinity, cross_validation, param_error, - method ='parametric', distribution = 'log-Logistic', fit='ub-pwm'){ +atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, + handle_infinity, cross_validation, param_error, method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm') { # data: [time, sdate, memb] - if (is.null(ref_period)){ + if (is.null(ref_period)) { ref.start <- NULL ref.end <- NULL } else { @@ -318,55 +500,76 @@ atomic_spei <- function(data, dim_month, dim_year, dim_memb, ref_period, handle_ if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, so we assign a value to the result and skip the step + } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { - speiX <- spX(data, ref.start=ref.start, ref.end=ref.end, - method = method, distribution = distribution, fit = fit, - dim_month = dim_month, dim_year = dim_year, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, + method = method, distribution = distribution, fit = fit, + leadtime_dim = leadtime_dim, time_dim = time_dim, dim_memb = dim_memb, + handle_infinity = handle_infinity, cross_validation = cross_validation) } - return(speiX) + + return(list(speiX, params)) } +#------------------------------------------------------------------------------- + ### Support functions -spX <- function(acc_data, ref.start, ref.end, na.rm = TRUE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', dim_month, dim_year, dim_memb, handle_infinity, cross_validation){ - +spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, + method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', + leadtime_dim, time_dim, dim_memb, handle_infinity, cross_validation){ + # acc_data: [ftime, syear, nmemb] + # params: [syear, ftime, coef] - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') - } - - ftime <- as.numeric(dim(acc_data)[dim_month]) - n_sdates <- as.numeric(dim(acc_data)[dim_year]) + n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions + + ftime <- as.numeric(dim(acc_data)[leadtime_dim]) + n_sdates <- as.numeric(dim(acc_data)[time_dim]) nmemb <- as.numeric(dim(acc_data)[dim_memb]) spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(dim_month, dim_year, dim_memb) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, dim_memb) + if (cross_validation) { + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } - for(ff in 1:ftime){ # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = dim_month, indices = ff, drop = FALSE) - x_spei_mod <- spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, fit = fit, ref.start = ref.start, ref.end = ref.end, n_sdates = n_sdates, nmemb = nmemb, handle_infinity = handle_infinity, cross_validation = cross_validation) + for (ff in 1:ftime) { # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = leadtime_dim, indices = ff, drop = FALSE) + params_tmp <- if (all(is.na(params))) {NULL} else {params[,ff,]} + c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, cross_validation = cross_validation) spei_mod[ff,,] <- x_spei_mod + coef_names <- names(params_ff) + if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[,ff,] <- params_ff } + + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - return(spei_mod) + return(list(spei = spei_mod, params = params_result)) } +#------------------------------------------------------------------------------- + # HandleInfinity: spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { # data:[ftime = 1, syear, ensemble] - - if (!(method %in% c('parametric', 'non-parametric'))) { - stop('SPEI can be only computed using the following approach: parametric or non-parametric') - } - if (method == 'non-parametric'){ + if (method == 'non-parametric') { if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') @@ -379,48 +582,45 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm SPEI = qnorm((bp-0.44)/(length(data)+0.12)) - return(SPEI) + return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { std_index <- array(NA, c(n_sdates, nmemb)) - if (anyNA(data) && na.rm==FALSE) { + if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') } - if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { - stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') - } - if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') - } + coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), - "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma'))) - ) + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma')))) dim_one <- length(coef) if (!is.null(params)) { - if (length(params)!=dim_one) { + if (length(params) != dim_one) { stop(paste0('parameters array should have dimensions [', dim_one, ']')) } } # Select window if necessary - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data,ref.start,ref.end) - } else { - data.fit <- data - } + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data,ref.start,ref.end) + } else { + data.fit <- data + } - if(cross_validation == "TRUE") { + if (cross_validation == "TRUE") { loop_years <- n_sdates } else { loop_years <- 1 } + + params_result <- array(NA, dim = c(loop_years, dim_one)) + colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) @@ -436,12 +636,13 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm acu_sd = sd(acu.sorted) if (!is.na(acu_sd)){ if (acu_sd != 0){ - if(distribution != "log-Logistic"){ + if(distribution != "log-Logistic") { pze <- sum(acu==0)/length(acu) acu.sorted = acu.sorted[acu.sorted > 0] } if (!is.null(params)) { f_params = as.vector(params) + params_result[nsd,] <- f_params } else { if (length(acu.sorted) >= 4){ # else coef will be NA # Calculate probability weighted moments based on fit with lmomco or TLMoments @@ -470,16 +671,23 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm if(distribution == 'log-Logistic' && fit == 'max-lik'){ f_params = parglo.maxlik(acu.sorted, f_params)$para } + params_result[nsd,] <- f_params } # end if dor the case the L-moments are not valid (std_index will be NA) } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom - cdf_res = switch(distribution, + if (all(is.na(params_result[nsd,]))){ + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data, f_params), "Gamma" = lmom::cdfgam(data, f_params), "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - + ) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development @@ -490,20 +698,22 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm std_index[nsd,] <- std_index_cv[nsd,] } else { std_index <- std_index_cv - } + } } } # end if for the case there is no variability } # end if for the case all NA in acu } # next year (in case of cross_validation or all done if cross_validation == F) - + if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - } # end parametric - return(std_index) # fitted is the only thing used after calling this function + return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + + } # end parametric } - -- GitLab From 7de3fdde479d3dd32109e9245e5486151b9d3fc1 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 26 May 2023 17:31:11 +0200 Subject: [PATCH 05/87] Improve code and write documentation --- R/PeriodSPEI.R | 250 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 163 insertions(+), 87 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 1eef63d..b73ef69 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,38 +1,36 @@ - -#------------------------------------------------------------------------------- -# To DO: Add documentation of same dimensions exp and exp_cor -# To DO: Add documentation for units -# TO DO: Write Documentation - -# library(s2dv) # InsertDim, ... -# library(multiApply) # Apply -# library(ClimProjDiags) # Subset -# library(zeallot) # multiple assignment %<-% -# library(SPEI) # estimation of evapotranspiration (e.g. hargreaves function) -# library(zoo) # rollapply, as.Date -# library(TLMoments) # pwd (Calculate probability weighted moments based on fit) -# library(lmomco) # pwm2lmom (probability weighted moments to L-Moments) -# library(lubridate) # year(), month(),... - -# library(CSTools) -#------------------------------------------------------------------------------- -#'@param exp A list with the 's2dv_cube' object class containing the seasonal -#' forecast experiment in the data element. -#'@param exp_cor An object of class \code{s2dv_cube} in which the quantile -#' PeriodSPEI should be applied. If it is not specified, the PeriodSPEI -#' is calculated from object 'exp'. -#'@param pet Multidimensional array containing the Potential EvapoTranspiration -#' data. If it is NULL it is calculated using pet_method. It is NULL by default. -#'@param time_dim -#'@param leadtime_dim -#'@param memb_dim -#'@param lat_dim -#--- other params from CSIndicators -#'@param accum +#'Compute the Standardised Precipitation-Evapotranspiration Index +#' +#'The Standardised Precipitation-Evapotranspiration Index (SPEI) is a +#'multiscalar drought index based on climatic data. It can be used for +#'determining the onset, duration and magnitude of drought conditions with +#'respect to normal conditions in a variety of natural and managed systems such +#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +#'using monthly (or weekly) precipitation as the input data. The SPEI uses the +#'monthly (or weekly) difference between precipitation and PET. This represents +#'a simple climatic water balance which is calculated at different time scales +#'to obtain the SPEI. +#' +#'@param exp A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each +#' variable in which the quantile PeriodSPEI should be applied. If it is not +#' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param pet A multidimensional array containing the Potential +#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +#' is NULL by default. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. #'@param start #'@param end -#' -# --- other params #'@param pet_method #'@param standardization #'@param params @@ -43,11 +41,26 @@ #'@param distribution #'@param fit #'@param ncores +#' +#'@examples +#'exp <- 1 +#' +# '@import s2dv +# '@import multiApply +# '@import ClimProjDiags +# '@import zeallot +# '@import SPEI +# '@import zoo +# '@import TLMoments +# '@import lmomco +# '@import lubridate +# '@import CSTools +# '@export CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = c('hargreaves', 'hargreaves'), + pet_method = 'hargreaves', standardization = TRUE, params = NULL, # not used if standardization is FALSE param_error = -9999, # not used if standardization is FALSE @@ -57,9 +70,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, fit = 'ub-pwm', # not used if standardization is FALSE ncores = 4) { - #----------------------------------------------------------------------------- - # Part (1): Initial structure checks - # coordinates + # Check 's2dv_cube' + .KnownLatNames <- CSTools:::.KnownLatNames() if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { @@ -86,7 +98,6 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, method = method, distribution = distribution, fit = fit, ncores = ncores) return(res) - } @@ -96,79 +107,135 @@ PeriodSPEI <- function(exp, dates, lat, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = c('hargreaves', 'hargreaves'), + pet_method = 'hargreaves', standardization = TRUE, params = NULL, param_error = -9999, handle_infinity = FALSE, cross_validation = FALSE, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', ncores = 4) { - #----------------------------------------------------------------------------- - - # Part (1): Initial checks + # Initial checks - ## pet and pet_method - if (is.null(pet_method) & is.null(pet)) { - warning("Parameter 'pet_method' cannot be NULL if pet is not provided.") + ## exp + if (!inherits(exp, 'list')) { + stop("Parameter 'exp' needs to be a named list with the needed variables.") + } + if (is.null(names(exp))) { + stop("Parameter 'exp' needs to be a named list with the variable names.") } + + ## exp_cor if (!is.null(exp_cor)) { - if (length(pet_method) == 1) { - pet_method <- rep(pet_method, 2) - } - if (length(exp_cor) < 1) { + if (!inherits(exp_cor, 'list')) { + warning("Parameter 'exp_cor' needs to be a named list with the needed ", + "variables. Only 'exp' will be used.") exp_cor <- NULL } - } else { - if (lenght(pet_method) > 1) { - warning("Parameter 'pet_method' is of length 2, only first value will be used.") - pet_method <- pet_method[1] + if (is.null(names(exp_cor))) { + stop("Parameter 'exp_cor' needs to be a named list with the variable names.") } } - ## exp - ## TO DO: Check order: tasmax, tasmin, prlr - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { - warning("Parameter 'pet_method' needs to be 'hargreaves_modified' or 'hargreaves'.") - pet_method[1] <- 'hargreaves_modified' - } - } else if (all(c('tasmin', 'tasmax') %in% names(exp))) { - # hargreaves: 'tasmin', 'tasmax' and 'lat' - if (pet_method != c('hargreaves')) { - warning("Parameter 'pet_method' needs to be 'hargreaves'.") - pet_method[1] <- 'hargreaves' + + # Variable checks + if (is.null(pet)) { + ## exp (2) + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves'.") + pet_method[1] <- 'hargreaves' + } + } else if (all(c('tas', 'prlr') %in% names(exp))) { + # thornthwaite: 'tas' (mean), 'lat' + if (!any(pet_method != 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method[1] <- 'thornthwaite' + } + } else { + stop("Parameter 'exp' needs to be a named list with accepted variable names.", + "See documentation.") } - - } else if ('tas' %in% names(exp)) { - # thornthwaite: 'tas' (mean), 'lat' - pet_method[1] <- 'thornthwaite' + ## exp_cor (2) + if (!is.null(exp_cor)) { + if (length(exp_cor) < 1) { + exp_cor <- NULL + } else { + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) + } + } + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp_cor))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves'.") + pet_method[2] <- 'hargreaves' + } + } else if (all(c('tas', 'prlr') %in% names(exp_cor))) { + # thornthwaite: 'tas' (mean), 'lat' + if (!any(pet_method != 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method[2] <- 'thornthwaite' + } + } else { + stop("Parameter 'exp_cor' needs to be a list with the needed variables.") + } + } } else { - stop("Parameter 'exp' needs to be a list with the needed variables.") + if (!('prlr' %in% names(exp))) { + stop("Variable 'prlr' is not included in 'exp'.") + } + if (!is.null(exp_cor)) { + if (!('prlr' %in% names(exp_cor))) { + stop("Variable 'prlr' is not included in 'exp_cor'.") + } + } } - # warning(' needs to be in C and precipitation in mm/month') # there is no check + ## time_dim + + ## leadtime_dim + + ## memb_dim + + ## lat_dim - # check if accumulation period is possible + ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', dim(exp[[1]])[leadtime_dim], ' months.')) } - # method + + ## start + + ## end + + ## standardization + + ## param_error + + ## handle_infinity + + ## cross_validation + + ## method if (!(method %in% c('parametric', 'non-parametric'))) { stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') } - # distribution + ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') } - # fit + ## fit if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') } - #----------------------------------------------------------------------------- - # Part (2): preparation + ## ncores + + # Data preparation # complete dates ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) @@ -183,8 +250,7 @@ PeriodSPEI <- function(exp, dates, lat, dates_monthly[ii] <- 1 } - #----------------------------------------------------------------------------- - # Part (3): Compute PeriodSPEI + # Compute PeriodSPEI k = 0 spei_res <- NULL computed_pet <- FALSE @@ -193,14 +259,21 @@ PeriodSPEI <- function(exp, dates, lat, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet) | computed_pet) { - pet <- evapotranspiration(data, dates_monthly, pet_method = pet_method[k], time_dim, - leadtime_dim, memb_dim, lat_dim, ncores) + pet <- evapotranspiration(data = data, dates_monthly = dates_monthly, + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, lat_dim = lat_dim, + ncores = ncores) computed_pet <- TRUE } + # Accumulation diff_P_PET <- data$prlr - pet - data_accum <- accumulation(diff_P_PET, dates_monthly, accum, time_dim, - leadtime_dim, memb_dim, ncores) + data_accum <- accumulation(diff_P_PET = diff_P_PET, + dates_monthly = dates_monthly, accum = accum, + time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, ncores = ncores) # Standardization: if (standardization == TRUE) { spei_dat <- spei_standardization(data_accum = data_accum, @@ -252,7 +325,7 @@ PeriodSPEI <- function(exp, dates, lat, -evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', +evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', ncores = 4) { @@ -265,13 +338,16 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', mask_NA <- array(1, dim = dim(data[[1]])) if (pet_method == 'hargreaves') { + varnames <- c('tasmax', 'tasmin') mask_NA[which(is.na(data$tasmax))] <- 0 mask_NA[which(is.na(data$tasmin))] <- 0 } else if (pet_method == 'hargreaves_modified') { + varnames <- c('tasmax', 'tasmin', 'prlr') mask_NA[which(is.na(data$tasmax))] <- 0 mask_NA[which(is.na(data$tasmin))] <- 0 mask_NA[which(is.na(data$prlr))] <- 0 } else if (pet_method == 'thornthwaite') { + varnames <- c('tas') mask_NA[which(is.na(data$tas))] <- 0 } @@ -281,11 +357,11 @@ evapotranspiration <- function(data, dates_monthly, pet_method = 'hargreaves', } # prepare data - target_dims_data <- lapply(data, function(x) rep(c(leadtime_dim, time_dim), 1)) + target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) # file <- tempfile() # file = 'out.txt' # sink(file) - PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data), + PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, -- GitLab From 09753eb01a00265bdcc0480913019349b9cfae5f Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 29 May 2023 17:31:04 +0200 Subject: [PATCH 06/87] Add unit test --- R/PeriodSPEI.R | 135 ++++++++++++++++++++++--------- tests/testthat/test-PeriodSPEI.R | 113 ++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 36 deletions(-) create mode 100644 tests/testthat/test-PeriodSPEI.R diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index b73ef69..7d419d0 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -71,17 +71,26 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, ncores = 4) { # Check 's2dv_cube' + if (is.null(exp)) { + stop("Parameter 'exp' cannot be NULL.") + } + if (!all(sapply(exp, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'exp' must be a list of 's2dv_cube' class.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'exp_cor' must be a list of 's2dv_cube' class.") + } + } .KnownLatNames <- CSTools:::.KnownLatNames() if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { - stop("Spatial coordinate names of parameter 'obsL' do not match any ", + stop("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") } - lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] - #----------------------------------------------------------------------------- - # Part (2): Call to PeriodSPEI + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), dates = exp[[1]]$attrs$Dates, @@ -115,7 +124,6 @@ PeriodSPEI <- function(exp, dates, lat, fit = 'ub-pwm', ncores = 4) { # Initial checks - ## exp if (!inherits(exp, 'list')) { stop("Parameter 'exp' needs to be a named list with the needed variables.") @@ -123,17 +131,37 @@ PeriodSPEI <- function(exp, dates, lat, if (is.null(names(exp))) { stop("Parameter 'exp' needs to be a named list with the variable names.") } + if (any(sapply(exp, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'exp' needs to be a list of arrays with dimension names.") + } + dims <- lapply(exp, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'exp' variables needs to have the same dimension names.") + } ## exp_cor if (!is.null(exp_cor)) { if (!inherits(exp_cor, 'list')) { - warning("Parameter 'exp_cor' needs to be a named list with the needed ", - "variables. Only 'exp' will be used.") - exp_cor <- NULL + stop("Parameter 'exp_cor' needs to be a named list with the needed ", + "variables if it is not NULL.") } if (is.null(names(exp_cor))) { stop("Parameter 'exp_cor' needs to be a named list with the variable names.") } + if (any(sapply(exp_cor, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'exp_cor' needs to be a list of arrays with dimension names.") + } + if (!all(names(exp_cor) %in% names(exp))) { + stop("Parameter 'exp_cor' needs to have the same variable names as 'exp'.") + } + dims <- lapply(exp_cor, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'exp_cor' variables needs to have the same dimension names.") + } } # Variable checks @@ -195,19 +223,58 @@ PeriodSPEI <- function(exp, dates, lat, } ## time_dim - + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + } ## leadtime_dim - + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'exp_cor' dimension.") + } + } ## memb_dim - + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) memb_dim %in% names(dim(x))))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) memb_dim %in% names(dim(x))))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } ## lat_dim - + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!all(sapply(exp, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'exp' dimension.") + } + if (!is.null(exp_cor)) { + if (!all(sapply(exp_cor, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") + } + } ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { - stop(paste0('ERROR: Cannot compute accumulation of ', accum, ' months because loaded data has only ', - dim(exp[[1]])[leadtime_dim], ' months.')) + stop(paste0("Cannot compute accumulation of ", accum, " months because ", + "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } - ## start ## end @@ -234,7 +301,12 @@ PeriodSPEI <- function(exp, dates, lat, } ## ncores - + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } # Data preparation # complete dates @@ -388,13 +460,6 @@ accumulation <- function(diff_P_PET, dates_monthly, accum = 1, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', ncores = 4) { - if (!time_dim %in% names(dim(diff_P_PET))) { - diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = time_dim) - } - if (!leadtime_dim %in% names(dim(diff_P_PET))) { - diff_P_PET <- InsertDim(diff_P_PET, posdim = 1, lendim = 1, name = leadtime_dim) - } - accum_result <- Apply(data = list(diff_P_PET), target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), dates_monthly = dates_monthly, @@ -510,22 +575,22 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } if (pet_method == 'hargreaves') { # NOTE EVA: if NA.RM is FALSE this gives error - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = TRUE) + x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data x_PET <- x_PET[which(dates_monthly == 1)] PET <- array(x_PET, dim = dims) } if (pet_method == 'hargreaves_modified') { - x_PET <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE) + x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE)) x_PET <- x_PET[which(dates_monthly == 1)] PET <- array(x_PET, dim = dims) } if (pet_method == 'thornthwaite') { - x_PET <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE) + x_PET <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data x_PET <- x_PET[which(dates_monthly == 1)] PET <- array(x_PET, dim = dims) @@ -536,8 +601,7 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { # data:[time, syear] - ftime <- dim(data)[1] - n_sdates <- dim(data)[2] + dims <- dim(data) data_vector <- array(0, dim = length(dates_monthly)) count <- 1 @@ -554,14 +618,13 @@ atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { # (depends on accu) and so that the position in the vector corresponds to the accumulated # of the previous months (instead of the accumulated of the next months) data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data - accum_result <- array(data_sum_x, dim = c(ftime,n_sdates)) # return to matrix form + dim(data_sum_x) <- dims if (accum > 1) { - accum_result[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data + data_sum_x[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data } - - return(accum_result) - + return(data_sum_x) } + atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, handle_infinity, cross_validation, param_error, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm') { @@ -576,7 +639,7 @@ atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_peri if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm=T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, + } else if (var(data, na.rm = T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, # so we assign a value to the result and skip the step speiX <- array(param_error, dim(data)) } else { diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R new file mode 100644 index 0000000..0a9a6bd --- /dev/null +++ b/tests/testthat/test-PeriodSPEI.R @@ -0,0 +1,113 @@ +context("CSIndicators::PeriodSPEI tests") + +############################################## +# cube1 +cube1 <- NULL +cube1$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube1) <- 's2dv_cube' + +# dat1 +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) +dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) + +set.seed(1) +exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +set.seed(2) +exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +set.seed(3) +exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) + +set.seed(1) +expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +set.seed(2) +expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +set.seed(3) +expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) + +dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) + +res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates) +source("/esarchive/scratch/erifarov/git/csindicators/R/PeriodSPEI.R") + +############################################## + +test_that("1. Initial checks CST_PeriodSPEI", { + expect_error( + CST_PeriodSPEI(exp = NULL), + "Parameter 'exp' cannot be NULL." + ) + expect_error( + CST_PeriodSPEI(exp = array(10)), + "Parameter 'exp' must be a list of 's2dv_cube' class." + ) + expect_error( + CST_PeriodSPEI(exp = list(cube1)), + paste0("Spatial coordinate names of parameter 'exp' do not match any ", + "of the names accepted by the package.") + ) +}) + +############################################## + +test_that("1. Initial checks PeriodSPEI", { + # exp + expect_error( + PeriodSPEI(exp = NULL), + "Parameter 'exp' needs to be a named list with the needed variables." + ) + expect_error( + PeriodSPEI(exp = list(1)), + "Parameter 'exp' needs to be a named list with the variable names." + ) + expect_error( + PeriodSPEI(exp = list('tasmax' = array(10))), + "Parameter 'exp' needs to be a list of arrays with dimension names." + ) + # exp_cor + expect_error( + PeriodSPEI(exp = exp1, exp_cor = 1), + paste0("Parameter 'exp_cor' needs to be a named list with the needed ", + "variables if it is not NULL.") + ) + expect_error( + PeriodSPEI(exp = exp1, exp_cor = list(1)), + "Parameter 'exp_cor' needs to be a named list with the variable names." + ) + expect_error( + PeriodSPEI(exp = exp1, exp_cor = list('tasmax' = array(10))), + "Parameter 'exp_cor' needs to be a list of arrays with dimension names." + ) + expect_error( + PeriodSPEI(exp = list('tas' = array(10, dim = c(time = 10))), + exp_cor = list('tos' = array(10, dim = c(time = 10)))), + "Parameter 'exp_cor' needs to have the same variable names as 'exp'." + ) + # exp (2) + expect_warning( + PeriodSPEI(exp = exp1, pet_method = '1', dates = dates, lat = lat), + paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves'.") + ) + # time_dim + expect_error( + PeriodSPEI(exp = exp1, ) + ) + # leadtime_dim + + # memb_dim + + # lat_dim + + # accum + +}) + +############################################## + -- GitLab From 677219da70aa26feb38d948a24dae8e2d95bf604 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 1 Jun 2023 16:25:12 +0200 Subject: [PATCH 07/87] Improve code --- R/PeriodSPEI.R | 585 ++++++++++++++++++++++++------------------------- 1 file changed, 290 insertions(+), 295 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 7d419d0..2eec05f 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -6,7 +6,7 @@ #'respect to normal conditions in a variety of natural and managed systems such #'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated #'using monthly (or weekly) precipitation as the input data. The SPEI uses the -#'monthly (or weekly) difference between precipitation and PET. This represents +#'monthly (or weekly) difference between precipitation and pet. This represents #'a simple climatic water balance which is calculated at different time scales #'to obtain the SPEI. #' @@ -59,8 +59,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = 'hargreaves', + accum = 1, start = NULL, end = NULL, + ref_period = NULL, pet_method = 'hargreaves', standardization = TRUE, params = NULL, # not used if standardization is FALSE param_error = -9999, # not used if standardization is FALSE @@ -68,7 +68,7 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, cross_validation = FALSE, # not used if standardization is FALSE method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', # not used if standardization is FALSE - ncores = 4) { + ncores = NULL) { # Check 's2dv_cube' if (is.null(exp)) { @@ -96,9 +96,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, dates = exp[[1]]$attrs$Dates, lat = exp[[1]]$coords[[lat_name]], exp_cor = lapply(exp_cor, function(x) x$data), - pet = pet, + pet = pet, ref_period = ref_period, time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, lat_dim = lat_dim, + memb_dim = memb_dim, lat_dim = lat_dim, accum = accum, start = start, end = end, pet_method = pet_method, standardization = standardization, params = params, param_error = param_error, @@ -116,7 +116,7 @@ PeriodSPEI <- function(exp, dates, lat, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, start = NULL, end = NULL, - pet_method = 'hargreaves', + pet_method = 'hargreaves', ref_period = NULL, standardization = TRUE, params = NULL, param_error = -9999, handle_infinity = FALSE, cross_validation = FALSE, @@ -169,14 +169,14 @@ PeriodSPEI <- function(exp, dates, lat, ## exp (2) if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + if (!(pet_method[1] %in% c('hargreaves_modified', 'hargreaves'))) { warning("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves'.") pet_method[1] <- 'hargreaves' } } else if (all(c('tas', 'prlr') %in% names(exp))) { # thornthwaite: 'tas' (mean), 'lat' - if (!any(pet_method != 'thornthwaite')) { + if (!(pet_method[1] == 'thornthwaite')) { warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") pet_method[1] <- 'thornthwaite' } @@ -196,14 +196,14 @@ PeriodSPEI <- function(exp, dates, lat, } if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp_cor))) { # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!any(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + if (!(pet_method[2] %in% c('hargreaves_modified', 'hargreaves'))) { warning("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves'.") pet_method[2] <- 'hargreaves' } } else if (all(c('tas', 'prlr') %in% names(exp_cor))) { # thornthwaite: 'tas' (mean), 'lat' - if (!any(pet_method != 'thornthwaite')) { + if (!(pet_method[2] == 'thornthwaite')) { warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") pet_method[2] <- 'thornthwaite' } @@ -334,31 +334,31 @@ PeriodSPEI <- function(exp, dates, lat, pet <- evapotranspiration(data = data, dates_monthly = dates_monthly, lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, lat_dim = lat_dim, - ncores = ncores) + lat_dim = lat_dim, ncores = ncores) computed_pet <- TRUE } # Accumulation - diff_P_PET <- data$prlr - pet - data_accum <- accumulation(diff_P_PET = diff_P_PET, + diff_p_pet <- data$prlr - pet + data_accum <- accumulation(data = diff_p_pet, dates_monthly = dates_monthly, accum = accum, - time_dim = time_dim, - leadtime_dim = leadtime_dim, - memb_dim = memb_dim, ncores = ncores) + time_dim = time_dim, leadtime_dim = leadtime_dim, + ncores = ncores) # Standardization: if (standardization == TRUE) { - spei_dat <- spei_standardization(data_accum = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - ncores = ncores, - accum = accum, - param_error = param_error, - params = params, method = method, - distribution = distribution, fit = fit) + spei_dat <- spei_standardization(data = data_accum, + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + ref_period = ref_period, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + ncores = ncores, + accum = accum, + param_error = param_error, + params = params, method = method, + distribution = distribution, + fit = fit) params <- spei_dat$params } else { spei_dat <- data_accum @@ -395,32 +395,29 @@ PeriodSPEI <- function(exp, dates, lat, } - +#------------------------------------------------------------------------------- evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', - time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - lat_dim = 'latitude', - ncores = 4) { + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', ncores = NULL) { - ## corrected:: lat_mask <- array(lat, dim = c(1, length(lat))) names(dim(lat_mask)) <- c('dat', lat_dim) # extract mask of NA locations to return to NA the final result - - mask_NA <- array(1, dim = dim(data[[1]])) + mask_na <- array(1, dim = dim(data[[1]])) if (pet_method == 'hargreaves') { varnames <- c('tasmax', 'tasmin') - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 } else if (pet_method == 'hargreaves_modified') { varnames <- c('tasmax', 'tasmin', 'prlr') - mask_NA[which(is.na(data$tasmax))] <- 0 - mask_NA[which(is.na(data$tasmin))] <- 0 - mask_NA[which(is.na(data$prlr))] <- 0 + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 + mask_na[which(is.na(data$prlr))] <- 0 } else if (pet_method == 'thornthwaite') { varnames <- c('tas') - mask_NA[which(is.na(data$tas))] <- 0 + mask_na[which(is.na(data$tas))] <- 0 } # replace NA with 0 @@ -433,7 +430,7 @@ evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreave # file <- tempfile() # file = 'out.txt' # sink(file) - PET_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + pet_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), target_dims = c(list(lat_mask = 'dat'), target_dims_data), output_dims = c(leadtime_dim, time_dim), pet_method = pet_method, @@ -442,91 +439,18 @@ evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreave fun = atomic_pet, ncores = ncores)$output1 # sink(NULL) # captured_message <- readLines(file) - # print(paste0('Length captured!!', length(captured_message))) - # print(paste0('unique(captured_message)', length(unique(captured_message)))) - - # reorder dims in PET_estimated - pos <- match(names(dim(data[[1]])),names(dim(PET_estimated))) - PET_estimated <- aperm(PET_estimated, pos) - - # restore original NAs from mask_NA - PET_estimated[which(mask_NA == 0 )] <- NA - return(PET_estimated) + # reorder dims in pet_estimated + pos <- match(names(dim(data[[1]])),names(dim(pet_estimated))) + pet_estimated <- aperm(pet_estimated, pos) -} - -accumulation <- function(diff_P_PET, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time', - memb_dim = 'ensemble', ncores = 4) { - - accum_result <- Apply(data = list(diff_P_PET), - target_dims = list(diff_P_PET = c(leadtime_dim,time_dim)), - dates_monthly = dates_monthly, - accum = accum, - output_dims = c(leadtime_dim, time_dim), #c('time','sdate'), - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = atomic_accum, ncores = ncores) - - # recover essential lost dims (if they had length 1 they'd have been dropped in previous step): - for (d in c(time_dim, leadtime_dim, 'latitude', 'longitude', memb_dim)) { - if(!d %in% names(dim(accum_result$output1))){ - accum_result$output1 <- InsertDim(data = accum_result$output1, posdim = length(names(dim(accum_result$output1))) + 1, lendim = 1, name = d) - } - } - - # reorder dims in accum_result - dims_order <- array(NA, length(dim(diff_P_PET))) - for (ord in 1:length(dim(diff_P_PET))){ - dims_order[ord] <- which(names(dim(accum_result$output1)) == names(dim(diff_P_PET))[ord]) - } - data_accum <- aperm(accum_result$output1, dims_order) + # restore original NAs from mask_na + pet_estimated[which(mask_na == 0 )] <- NA - return(data_accum) + return(pet_estimated) } -spei_standardization <- function(data_accum, - leadtime_dim, time_dim, memb_dim, handle_infinity, - cross_validation, ncores, accum, param_error, - params = NULL, method ='parametric', - distribution = 'log-Logistic', fit='ub-pwm') { - - n_leadtimes <- dim(data_accum)[leadtime_dim] - n_sdates_params <- dim(data_accum)[time_dim] - - if (!cross_validation) { - n_sdates_params <- 1 - } - - if (is.null(params)) { - params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) - names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') - } else if (length(dim(params)) < 2) { - params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) - params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim - names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') - } - - spei <- Apply(data = list(data = data_accum, params = params), - target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), - output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), - leadtime_dim = leadtime_dim, - time_dim = time_dim, - dim_memb = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = NULL, - param_error = param_error, - fun = atomic_spei, - ncores = ncores) - - return(spei) # spei is a list of data_spei and params -} - atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, pet_method, dates_monthly, leadtime_dim, time_dim) { @@ -575,30 +499,52 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } if (pet_method == 'hargreaves') { # NOTE EVA: if NA.RM is FALSE this gives error - x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data - x_PET <- x_PET[which(dates_monthly == 1)] - PET <- array(x_PET, dim = dims) + x_pet <- x_pet[which(dates_monthly == 1)] + pet <- array(x_pet, dim = dims) } if (pet_method == 'hargreaves_modified') { - x_PET <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE)) - x_PET <- x_PET[which(dates_monthly == 1)] - PET <- array(x_PET, dim = dims) + x_pet <- x_pet[which(dates_monthly == 1)] + pet <- array(x_pet, dim = dims) } if (pet_method == 'thornthwaite') { - x_PET <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) + x_pet <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) # line to return the vector to the size of the actual original data - x_PET <- x_PET[which(dates_monthly == 1)] - PET <- array(x_PET, dim = dims) + x_pet <- x_pet[which(dates_monthly == 1)] + pet <- array(x_pet, dim = dims) } - return(PET) + return(pet) } -atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { +#------------------------------------------------------------------------------- + +accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + ncores = NULL) { + + accum_result <- Apply(data = list(data), + target_dims = list(data = c(leadtime_dim, time_dim)), + dates_monthly = dates_monthly, + accum = accum, + output_dims = c(leadtime_dim, time_dim), + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = atomic_accum, ncores = ncores)$output1 + + pos <- match(names(dim(accum_result)), names(dim(data))) + data_accum <- aperm(accum_result, pos) + + return(data_accum) + +} + +atomic_accum <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time') { # data:[time, syear] dims <- dim(data) @@ -606,29 +552,90 @@ atomic_accum <- function(data, dates_monthly, accum, leadtime_dim, time_dim) { data_vector <- array(0, dim = length(dates_monthly)) count <- 1 for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1){ + if (dates_monthly[dd] == 1) { data_vector[dd] <- as.vector(data)[count] count <- count + 1 } } - # Accumulation at different timescales - data_sum_x <- rollapply(data_vector, accum, sum) # rollapply {zoo} A generic function for applying a function to rolling margins of an array. - data_sum_x <- c(rep(NA, accum-1), data_sum_x) # adds as many NAs as needed at the begining to account for the months that cannot be added - # (depends on accu) and so that the position in the vector corresponds to the accumulated - # of the previous months (instead of the accumulated of the next months) - data_sum_x <- data_sum_x[which(dates_monthly == 1)] # discard the months that don't appear in the original data - dim(data_sum_x) <- dims + # rollapply {zoo} A generic function for applying a function to rolling margins of an array. + data_sum_x <- rollapply(data_vector, accum, sum) + # adds as many NAs as needed at the begining to account for the months that cannot be added + # (depends on accu) and so that the position in the vector corresponds to the accumulated + # of the previous months (instead of the accumulated of the next months) + data_sum_x <- c(rep(NA, accum-1), data_sum_x) + # discard the months that don't appear in the original data + data_sum_x <- data_sum_x[which(dates_monthly == 1)] + accum_result <- array(data_sum_x, dim = c(dims)) + # replace by NA when the accumulation corresponds to months that where not present in the original data if (accum > 1) { - data_sum_x[1:(accum-1),] <- NA # replace by NA when the accumulation corresponds to months that where not present in the original data + accum_result[1:(accum-1), ] <- NA } - return(data_sum_x) + return(accum_result) } -atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_period, - handle_infinity, cross_validation, param_error, method = 'parametric', +#------------------------------------------------------------------------------- + + +spei_standardization <- function(data, accum = 1, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, + handle_infinity = FALSE, + cross_validation = FALSE, + param_error = -9999, params = NULL, + method = 'parametric', + distribution = 'log-Logistic', + fit = 'ub-pwm', + ncores = NULL) { + + n_leadtimes <- dim(data)[leadtime_dim] + n_sdates_params <- dim(data)[time_dim] + + if (!cross_validation) { + n_sdates_params <- 1 + } + + if (is.null(params)) { + params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) # hardcoded + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else if (length(dim(params)) < 2) { + params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) + params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } + + spei <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), + leadtime_dim = leadtime_dim, + time_dim = time_dim, + memb_dim = memb_dim, + handle_infinity = handle_infinity, + cross_validation = cross_validation, + method = method, distribution = distribution, fit = fit, + ref_period = ref_period, + param_error = param_error, + fun = atomic_spei, + ncores = ncores) + + return(spei) # spei is a list of data_spei and params +} + + + + + +atomic_spei <- function(data, params, leadtime_dim = 'time', time_dim = 'syear', + memb_dim = 'ensemble', ref_period = NULL, + handle_infinity = FALSE, cross_validation = FALSE, + param_error = -9999, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm') { - # data: [time, sdate, memb] + + # data: [leadtime_dim, time_dim, memb_dim] + # params: [time_dim, leadtime_dim, 'coef'] + if (is.null(ref_period)) { ref.start <- NULL ref.end <- NULL @@ -639,99 +646,83 @@ atomic_spei <- function(data, params, leadtime_dim, time_dim, dim_memb, ref_peri if (all(is.na(data))) { speiX <- array(NA, dim(data)) - } else if (var(data, na.rm = T) == 0) { # if the data [time, sdate, memb] has no variability it will raise an error further down, - # so we assign a value to the result and skip the step + # if the data [time, sdate, memb] has no variability it will raise an error further down, + # so we assign a value to the result and skip the step + } else if (var(data, na.rm = T) == 0) { speiX <- array(param_error, dim(data)) + return(speiX) } else { - c(speiX, params) %<-% spX(data, ref.start=ref.start, ref.end=ref.end, params = params, - method = method, distribution = distribution, fit = fit, - leadtime_dim = leadtime_dim, time_dim = time_dim, dim_memb = dim_memb, - handle_infinity = handle_infinity, cross_validation = cross_validation) + n_coef_max <- dim(params)['coef'] + ftime <- as.numeric(dim(data)[leadtime_dim]) + n_sdates <- as.numeric(dim(data)[time_dim]) + nmemb <- as.numeric(dim(data)[memb_dim]) + + spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) + if (cross_validation) { + params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + } else { + params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + } + for (ff in 1:ftime) { # treat months independently + Accum_D_temp <- ClimProjDiags::Subset(data, along = leadtime_dim, + indices = ff, drop = FALSE) + params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} + + c(x_spei_mod, params_ff) %<-% spX_ftime(data = Accum_D_temp, na.rm = TRUE, method = method, + distribution = distribution, + fit = fit, ref.start = ref.start, ref.end = ref.end, + params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, + handle_infinity = handle_infinity, + cross_validation = cross_validation) + spei_mod[ff, , ] <- x_spei_mod + coef_names <- names(params_ff) + # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months + if (length(params_ff) < n_coef_max) { + params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) + coef_names <- append(coef_names, '') + } + params_result[, ff, ] <- params_ff + } + colnames(params_result) <- coef_names + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') + return(list(spei = spei_mod, params = params_result)) } - - return(list(speiX, params)) - } #------------------------------------------------------------------------------- ### Support functions -spX <- function(acc_data, ref.start, ref.end, params = params, na.rm = TRUE, - method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', - leadtime_dim, time_dim, dim_memb, handle_infinity, cross_validation){ - - # acc_data: [ftime, syear, nmemb] - # params: [syear, ftime, coef] - - n_coef_max <- dim(params)['coef'] # maximum number of parameters needed to define any of the considered distributions - - ftime <- as.numeric(dim(acc_data)[leadtime_dim]) - n_sdates <- as.numeric(dim(acc_data)[time_dim]) - nmemb <- as.numeric(dim(acc_data)[dim_memb]) - - spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(leadtime_dim, time_dim, dim_memb) - if (cross_validation) { - params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) - } else { - params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) - } - - for (ff in 1:ftime) { # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(acc_data, along = leadtime_dim, indices = ff, drop = FALSE) - params_tmp <- if (all(is.na(params))) {NULL} else {params[,ff,]} - c(x_spei_mod, params_ff) %<-% spX_ftime(Accum_D_temp, na.rm = TRUE, method = method, distribution = distribution, - fit = fit, ref.start = ref.start, ref.end = ref.end, - params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, - handle_infinity = handle_infinity, cross_validation = cross_validation) - spei_mod[ff,,] <- x_spei_mod - coef_names <- names(params_ff) - if (length(params_ff) < n_coef_max){ # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months - params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) - coef_names <- append(coef_names, '') - } - params_result[,ff,] <- params_ff - } +spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', + na.rm = TRUE, method = 'parametric', n_sdates, nmemb, + ref.start = NULL, ref.end = NULL, params = NULL, + handle_infinity = FALSE, cross_validation = FALSE) { - colnames(params_result) <- coef_names - names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - - return(list(spei = spei_mod, params = params_result)) -} + # data: [leadtime_dim = 1, time_dim, memb_dim] -#------------------------------------------------------------------------------- + if (anyNA(data) && na.rm == FALSE) { + stop('Error: Data must not contain NAs') # TO DO: return something? + } -# HandleInfinity: -spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = TRUE, method = 'parametric', n_sdates, nmemb, - ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { - - # data:[ftime = 1, syear, ensemble] - if (method == 'non-parametric') { - - if (anyNA(data) && na.rm == FALSE) { - stop('Error: Data must not contain NAs') - } - + bp = matrix(0, length(data), 1) for (i in 1:length(data)) { - bp[i,1] = sum(data[] <= data[i]); # Writes the rank of the data + bp[i,1] = sum(data[] <= data[i], na.rm = na.rm); # Writes the rank of the data } - - SPEI = qnorm((bp-0.44)/(length(data)+0.12)) + SPEI = qnorm((bp - 0.44)/(length(data) + 0.12)) return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation } else { std_index <- array(NA, c(n_sdates, nmemb)) - + if (anyNA(data) && na.rm == FALSE) { stop('Error: Data must not contain NAs') } - coef = switch(distribution, "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), @@ -740,11 +731,11 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm dim_one <- length(coef) if (!is.null(params)) { - if (length(params) != dim_one) { + if (length(params)!=dim_one) { stop(paste0('parameters array should have dimensions [', dim_one, ']')) } } - + # Select window if necessary if (!is.null(ref.start) && !is.null(ref.end)) { data.fit <- window(data,ref.start,ref.end) @@ -752,107 +743,111 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', na.rm data.fit <- data } - if (cross_validation == "TRUE") { + if (cross_validation) { loop_years <- n_sdates } else { loop_years <- 1 } + params_result <- array(NA, dim = c(loop_years, dim_one)) colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) - if (cross_validation == TRUE){ + if (cross_validation) { acu <- as.vector(data.fit[,-nsd,]) } else { acu <- as.vector(data.fit) } acu.sorted <- sort.default(acu, method = "quick") - acu.sorted <- acu.sorted[!is.na(acu.sorted)] # remove NAs (no need if(na.rm) because if there are NA and na.rm=F we don't get to this point) - if (length(acu.sorted)!=0){ # else all acu was NA and we don't need to continue with this case + # remove NAs (no need if(na.rm) because if there are NA and na.rm = F we don't get to this point) + acu.sorted <- acu.sorted[!is.na(acu.sorted)] + # else all acu was NA and we don't need to continue with this case + if (length(acu.sorted) != 0) { acu_sd = sd(acu.sorted) - if (!is.na(acu_sd)){ - if (acu_sd != 0){ - if(distribution != "log-Logistic") { - pze <- sum(acu==0)/length(acu) - acu.sorted = acu.sorted[acu.sorted > 0] - } - if (!is.null(params)) { - f_params = as.vector(params) - params_result[nsd,] <- f_params - } else { - if (length(acu.sorted) >= 4){ # else coef will be NA - # Calculate probability weighted moments based on fit with lmomco or TLMoments - pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), - pwm.ub(acu.sorted, nmom=3) - #TLMoments::PWM(acu.sorted, order=0:2) - ) - - # Check L-moments validity - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))){ - - # lmom fortran functions need specific inputs L1, L2, T3 - # this is handled by lmomco internally with lmorph - fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - - # Calculate parameters based on distribution with lmom then lmomco - f_params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), error = function(e){ parglo(lmom)$para }), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), error = function(e){ pargam(lmom)$para }), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){ parpe3(lmom)$para }) - ) - - # Adjust if user chose log-Logistic and max-lik - if(distribution == 'log-Logistic' && fit == 'max-lik'){ - f_params = parglo.maxlik(acu.sorted, f_params)$para - } - params_result[nsd,] <- f_params - } # end if dor the case the L-moments are not valid (std_index will be NA) - } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) - } # end estimation of f_param - # Calculate cdf based on distribution with lmom - if (all(is.na(params_result[nsd,]))){ - cdf_res <- NA - } else { - f_params <- params_result[nsd,] - f_params <- f_params[which(!is.na(f_params))] - cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data, f_params), - "Gamma" = lmom::cdfgam(data, f_params), - "PearsonIII" = lmom::cdfpe3(data, f_params) - ) - } - - std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) - - # Adjust if user chose Gamma or PearsonIII - Not tested: For future development - #if(distribution != 'log-Logistic'){ - # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point - #} - if (cross_validation == TRUE){ - std_index[nsd,] <- std_index_cv[nsd,] - } else { - std_index <- std_index_cv + if (!is.na(acu_sd)) { + if (acu_sd != 0) { + if (distribution != "log-Logistic") { + pze <- sum(acu == 0) / length(acu) + acu.sorted = acu.sorted[acu.sorted > 0] + } + if (!is.null(params)) { + f_params = as.vector(params) + params_result[nsd, ] <- f_params + } else { + # else coef will be NA + if (length(acu.sorted) >= 4) { + # Calculate probability weighted moments based on fit with lmomco or TLMoments + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), + pwm.ub(acu.sorted, nmom=3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) + + # Check L-moments validity + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + # lmom fortran functions need specific inputs L1, L2, T3 + # this is handled by lmomco internally with lmorph + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + + # Calculate parameters based on distribution with lmom then lmomco + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){ parglo(lmom)$para }), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){ pargam(lmom)$para }), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){ parpe3(lmom)$para })) + + # Adjust if user chose log-Logistic and max-lik + if (distribution == 'log-Logistic' && fit == 'max-lik') { + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + params_result[nsd, ] <- f_params + } # end if dor the case the L-moments are not valid (std_index will be NA) + } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) + } # end estimation of f_param + # Calculate cdf based on distribution with lmom + if (all(is.na(params_result[nsd,]))) { + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data, f_params), + "Gamma" = lmom::cdfgam(data, f_params), + "PearsonIII" = lmom::cdfpe3(data, f_params)) + } + + std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) + + # Adjust if user chose Gamma or PearsonIII - Not tested: For future development + # if (distribution != 'log-Logistic') { + # std_index[ff,s] = qnorm(pze + (1-pze)*pnorm(std_index[ff,s])) # ff doesn't exist at this point + # } + if (cross_validation) { + std_index[nsd, ] <- std_index_cv[nsd, ] + } else { + std_index <- std_index_cv + } } - } - } # end if for the case there is no variability - } # end if for the case all NA in acu - } # next year (in case of cross_validation or all done if cross_validation == F) + } # end if for the case there is no variability + } # end if for the case all NA in acu + } # next year (in case of cross_validation or all done if cross_validation == F) - if(handle_infinity == 'TRUE'){ # could also use "param_error" ?; we are giving it the min/max value of the grid point + if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) std_index[is.infinite(std_index) & std_index > 0] <- max(std_index[!is.infinite(std_index)]) } - - return(list(std_index = std_index, params = params_result)) # f_params will be params only if cross_validation is FALSE - # (otherwise there will be one f_params per year; - # but the output params will be read only in the case that - # it is called with cross_validation FALSE) + # f_params will be params only if cross_validation is FALSE + # (otherwise there will be one f_params per year; + # but the output params will be read only in the case that + # it is called with cross_validation FALSE) + return(list(std_index = std_index, params = params_result)) - } # end parametric - + } } +#------------------------------------------------------------------------------- -- GitLab From b5dde53a71224afbda3c61b5d4fc0eb6e4f4728d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 1 Jun 2023 17:26:56 +0200 Subject: [PATCH 08/87] Develop original_dates and correct Dates --- R/PeriodMean.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/PeriodMean.R b/R/PeriodMean.R index 85a12a7..303b38e 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -65,12 +65,19 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, total <- PeriodMean(data = data$data, dates = data$attrs$Dates, start, end, time_dim = time_dim, na.rm = na.rm, ncores = ncores) data$data <- total + original_dates <- data$attrs$Dates if (!is.null(start) && !is.null(end)) { data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, start = start, end = end, time_dim = time_dim, ncores = ncores) } + data$attrs$Dates <- ClimProjDiags::Subset(data$attrs$Dates, time_dim, 1, + drop = 'selected') + # Option (1) + # attr(data$attrs$Dates, 'original_dates') <- original_dates + # Option (2) + data$attrs$original_dates <- original_dates return(data) } -- GitLab From 96c3f9a56d23b0716220366a84573840e97de2a9 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 2 Jun 2023 09:41:57 +0200 Subject: [PATCH 09/87] Use testthat edition 3; remove context() --- DESCRIPTION | 1 + tests/testthat/test-AbsToProbs.R | 2 -- tests/testthat/test-AccumulationExceedingThreshold.R | 3 +-- tests/testthat/test-MergeRefToExp.R | 2 -- tests/testthat/test-PeriodAccumulation.R | 3 +-- tests/testthat/test-PeriodMean.R | 3 +-- tests/testthat/test-QThreshold.R | 3 +-- tests/testthat/test-SelectPeriod.R | 2 +- tests/testthat/test-Threshold.R | 3 +-- tests/testthat/test-TotalSpellTimeExceedingThreshold.R | 2 +- tests/testthat/test-TotalTimeExceedingThreshold.R | 2 +- tests/testthat/test-WindCapacityFactor.R | 2 +- tests/testthat/test-WindPowerDensity.R | 2 +- 13 files changed, 11 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 90e0e83..4151ba4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,3 +40,4 @@ URL: https://earth.bsc.es/gitlab/es/csindicators/ BugReports: https://earth.bsc.es/gitlab/es/csindicators/-/issues Encoding: UTF-8 RoxygenNote: 7.2.0 +Config/testthat/edition: 3 \ No newline at end of file diff --git a/tests/testthat/test-AbsToProbs.R b/tests/testthat/test-AbsToProbs.R index 902b3f1..c2cdc9f 100644 --- a/tests/testthat/test-AbsToProbs.R +++ b/tests/testthat/test-AbsToProbs.R @@ -1,5 +1,3 @@ -context("CSIndicators::AbsToProbs tests") - ############################################## # dat1 dat1 <- NULL diff --git a/tests/testthat/test-AccumulationExceedingThreshold.R b/tests/testthat/test-AccumulationExceedingThreshold.R index a6b598e..0331e8e 100644 --- a/tests/testthat/test-AccumulationExceedingThreshold.R +++ b/tests/testthat/test-AccumulationExceedingThreshold.R @@ -1,5 +1,4 @@ -context("CSIndicators::AccumulationExceedingThreshold tests") - +############################################## # dat1 dat1 <- 1:20 diff --git a/tests/testthat/test-MergeRefToExp.R b/tests/testthat/test-MergeRefToExp.R index 2c3e8f6..adbdfd6 100644 --- a/tests/testthat/test-MergeRefToExp.R +++ b/tests/testthat/test-MergeRefToExp.R @@ -1,5 +1,3 @@ -context("CSIndicators::MergeRefToExp tests") - ########################################################################### test_that("Sanity checks", { diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 777dc30..81718f5 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -1,5 +1,4 @@ -context("CSIndicators::PeriodAccumulation tests") - +############################################## test_that("Sanity Checks", { expect_error( PeriodAccumulation('x'), diff --git a/tests/testthat/test-PeriodMean.R b/tests/testthat/test-PeriodMean.R index 7576b11..4186639 100644 --- a/tests/testthat/test-PeriodMean.R +++ b/tests/testthat/test-PeriodMean.R @@ -1,5 +1,4 @@ -context("CSIndicators::PeriodMean tests") - +############################################## test_that("Sanity Checks", { expect_error( PeriodMean('x'), diff --git a/tests/testthat/test-QThreshold.R b/tests/testthat/test-QThreshold.R index 41cc3e5..deb35df 100644 --- a/tests/testthat/test-QThreshold.R +++ b/tests/testthat/test-QThreshold.R @@ -1,5 +1,4 @@ -context("CSIndicators::QThreshold tests") - +############################################## test_that("Sanity checks", { expect_error( QThreshold(NULL), diff --git a/tests/testthat/test-SelectPeriod.R b/tests/testthat/test-SelectPeriod.R index 1c264c2..deb8656 100644 --- a/tests/testthat/test-SelectPeriod.R +++ b/tests/testthat/test-SelectPeriod.R @@ -1,4 +1,4 @@ -context("CSIndicators::SelectPeriodOnData and SelectPeriodOnDates tests") +############################################## library(s2dv) diff --git a/tests/testthat/test-Threshold.R b/tests/testthat/test-Threshold.R index 24ca601..84e4e0d 100644 --- a/tests/testthat/test-Threshold.R +++ b/tests/testthat/test-Threshold.R @@ -1,5 +1,4 @@ -context("CSIndicators::Threshold tests") - +############################################## test_that("Sanity checks", { expect_error( Threshold(NULL), diff --git a/tests/testthat/test-TotalSpellTimeExceedingThreshold.R b/tests/testthat/test-TotalSpellTimeExceedingThreshold.R index d215529..36e46cf 100644 --- a/tests/testthat/test-TotalSpellTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalSpellTimeExceedingThreshold.R @@ -1,4 +1,4 @@ -context("CSIndicators::TotalSpellTimeExceedingThreshold tests") +############################################## # dat1 dat <- array(1:20, dim = c(2, 10)) diff --git a/tests/testthat/test-TotalTimeExceedingThreshold.R b/tests/testthat/test-TotalTimeExceedingThreshold.R index 68c6d77..1c3ca19 100644 --- a/tests/testthat/test-TotalTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalTimeExceedingThreshold.R @@ -1,4 +1,4 @@ -context("CSIndicators::TotalTimeExceedingThreshold tests") +############################################## # dat1 dat <- array(1:20, dim = c(2, 10)) diff --git a/tests/testthat/test-WindCapacityFactor.R b/tests/testthat/test-WindCapacityFactor.R index 1bf9089..bcecdbc 100644 --- a/tests/testthat/test-WindCapacityFactor.R +++ b/tests/testthat/test-WindCapacityFactor.R @@ -1,4 +1,4 @@ -context("CSIndicators::WindCapacityFactor tests") +############################################## # dat1 wind <- NULL diff --git a/tests/testthat/test-WindPowerDensity.R b/tests/testthat/test-WindPowerDensity.R index 249c529..184b062 100644 --- a/tests/testthat/test-WindPowerDensity.R +++ b/tests/testthat/test-WindPowerDensity.R @@ -1,4 +1,4 @@ -context("CSIndicators::WindPowerDensity tests") +########################################################################### # dat1 wind <- NULL -- GitLab From 20d7a12e3b2a3bf489b677f0808a6a3349fb3dee Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 9 Jun 2023 17:21:21 +0200 Subject: [PATCH 10/87] Add documentation and improve code --- R/PeriodSPEI.R | 704 ++++++++++++++++++------------- R/zzz.R | 8 + tests/testthat/test-PeriodSPEI.R | 33 +- 3 files changed, 460 insertions(+), 285 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 2eec05f..552c0d2 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,7 +1,7 @@ #'Compute the Standardised Precipitation-Evapotranspiration Index #' -#'The Standardised Precipitation-Evapotranspiration Index (SPEI) is a -#'multiscalar drought index based on climatic data. It can be used for +#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +#'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with #'respect to normal conditions in a variety of natural and managed systems such #'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated @@ -11,7 +11,14 @@ #'to obtain the SPEI. #' #'@param exp A named list with the needed \code{s2dv_cube} objects containing -#' the seasonal forecast experiment in the data element for each variable. +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +#' Variable 'prlr' is always needed. The units for temperature variables +#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +#' precipitation ('prlr') need to be in mm/month. #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. @@ -29,46 +36,85 @@ #' dimension. By default it is set by 'latitude'. #'@param accum An integer value indicating the number of months for the #' accumulation for each variable. -#'@param start -#'@param end -#'@param pet_method -#'@param standardization -#'@param params -#'@param param_error -#'@param handle_infinity -#'@param cross_validation -#'@param method -#'@param distribution -#'@param fit -#'@param ncores +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params A multidimensional array with named dimensions for computing the +#' SPEI. This option overrides computation of fitting parameters. It needs +#' to be of same leadtime and time dimensions of exp and a dimension named +#' 'coef' with the length of the coefficients needed for the used distribution +#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +#' dimension must be of length 1. +#'@param standardization A logical value indicating wether the standardization +#' is computed. +#'@param cross_validation A logical value indicating if cross validation is +#' done (TRUE), or not (FALSE). It is FALSE by default. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tasmin and tasmax; and 'thornthwaite', that requires variable +#' 'tas'. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +#' default. +#'@param fit A character string indicating the name of the method used for +#' computing the distribution function parameters The accepteed names are: +#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. #' #'@examples -#'exp <- 1 +#'# Test random data +#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#' latitude = 2, longitude = 1, ensemble = 25) +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#' latitude = 2, longitude = 1, ensemble = 15) +#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) #' -# '@import s2dv -# '@import multiApply -# '@import ClimProjDiags -# '@import zeallot -# '@import SPEI -# '@import zoo -# '@import TLMoments -# '@import lmomco -# '@import lubridate -# '@import CSTools -# '@export +#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +#' +#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16"))) +#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#' +#'lat <- c(40,40.1) +#' +#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, +#' 'prlr' = expcor_prlr) +#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#' +#'@import multiApply +#'@import ClimProjDiags +#'@import SPEI +#'@import zoo +#'@import TLMoments +#'@import lmomco +#'@import lubridate +#'@export CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - ref_period = NULL, pet_method = 'hargreaves', - standardization = TRUE, - params = NULL, # not used if standardization is FALSE - param_error = -9999, # not used if standardization is FALSE - handle_infinity = FALSE, # not used if standardization is FALSE - cross_validation = FALSE, # not used if standardization is FALSE - method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', # not used if standardization is FALSE - ncores = NULL) { + accum = 1, ref_period = NULL, params = NULL, + standardization = TRUE, cross_validation = FALSE, + pet_method = 'hargreaves', method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm', + param_error = -9999, handle_infinity = FALSE, + na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(exp)) { @@ -83,45 +129,148 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, } } - .KnownLatNames <- CSTools:::.KnownLatNames() - - if (!any(names(exp[[1]]$coords) %in% .KnownLatNames)) { + if (!any(names(exp[[1]]$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") } - lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames)]] + lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), dates = exp[[1]]$attrs$Dates, lat = exp[[1]]$coords[[lat_name]], exp_cor = lapply(exp_cor, function(x) x$data), - pet = pet, ref_period = ref_period, - time_dim = time_dim, leadtime_dim = leadtime_dim, + pet = pet, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lat_dim = lat_dim, - accum = accum, start = start, end = end, - pet_method = pet_method, standardization = standardization, - params = params, param_error = param_error, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, - fit = fit, ncores = ncores) + accum = accum, ref_period = ref_period, params = params, + standardization = standardization, + cross_validation = cross_validation, + pet_method = pet_method, method = method, + distribution = distribution, fit = fit, + param_error = param_error, + handle_infinity = handle_infinity, na.rm = na.rm, + ncores = ncores) return(res) } - -PeriodSPEI <- function(exp, dates, lat, - exp_cor = NULL, pet = NULL, +#'Compute the Standardised Precipitation-Evapotranspiration Index +#' +#'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +#'that is a multiscalar drought index based on climatic data. It can be used for +#'determining the onset, duration and magnitude of drought conditions with +#'respect to normal conditions in a variety of natural and managed systems such +#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +#'using monthly (or weekly) precipitation as the input data. The SPEI uses the +#'monthly (or weekly) difference between precipitation and pet. This represents +#'a simple climatic water balance which is calculated at different time scales +#'to obtain the SPEI. +#' +#'@param exp A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +#' Variable 'prlr' is always needed. The units for temperature variables +#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +#' precipitation ('prlr') need to be in mm/month. +#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each +#' variable in which the quantile PeriodSPEI should be applied. If it is not +#' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param pet A multidimensional array containing the Potential +#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +#' is NULL by default. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params A multidimensional array with named dimensions for computing the +#' SPEI. This option overrides computation of fitting parameters. It needs +#' to be of same leadtime and time dimensions of exp and a dimension named +#' 'coef' with the length of the coefficients needed for the used distribution +#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +#' dimension must be of length 1. +#'@param standardization A logical value indicating wether the standardization +#' is computed. +#'@param cross_validation A logical value indicating if cross validation is +#' done (TRUE), or not (FALSE). It is FALSE by default. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tasmin and tasmax; and 'thornthwaite', that requires variable +#' 'tas'. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +#' default. +#'@param fit A character string indicating the name of the method used for +#' computing the distribution function parameters The accepteed names are: +#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'# Test random data +#'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#' latitude = 2, longitude = 1, ensemble = 25) +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#' latitude = 2, longitude = 1, ensemble = 15) +#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) +#' +#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +#' +#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16"))) +#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#' +#'lat <- c(40,40.1) +#' +#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, +#' 'prlr' = expcor_prlr) +#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#' +#'@import multiApply +#'@import ClimProjDiags +#'@import SPEI +#'@import zoo +#'@import TLMoments +#'@import lmomco +#'@import lubridate +#'@export +PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', - accum = 1, start = NULL, end = NULL, - pet_method = 'hargreaves', ref_period = NULL, - standardization = TRUE, - params = NULL, param_error = -9999, - handle_infinity = FALSE, cross_validation = FALSE, - method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', ncores = 4) { + accum = 1, ref_period = NULL, params = NULL, + standardization = TRUE, cross_validation = FALSE, + pet_method = 'hargreaves', method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm', + param_error = -9999, handle_infinity = FALSE, + na.rm = FALSE, ncores = NULL) { # Initial checks ## exp @@ -297,7 +446,7 @@ PeriodSPEI <- function(exp, dates, lat, } ## fit if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik"') + stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik".') } ## ncores @@ -316,9 +465,11 @@ PeriodSPEI <- function(exp, dates, lat, dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim=length(dates_complete_monthly)) + dates_monthly <- array(0, dim = length(dates_complete_monthly)) for (dd in 1:length(dates)) { - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), lubridate::month(dates[dd]), 01, sep = '-'))) + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) dates_monthly[ii] <- 1 } @@ -331,34 +482,31 @@ PeriodSPEI <- function(exp, dates, lat, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet) | computed_pet) { - pet <- evapotranspiration(data = data, dates_monthly = dates_monthly, - lat = lat, pet_method = pet_method[k], - time_dim = time_dim, leadtime_dim = leadtime_dim, - lat_dim = lat_dim, ncores = ncores) + pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly, + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, leadtime_dim = leadtime_dim, + lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) computed_pet <- TRUE } # Accumulation diff_p_pet <- data$prlr - pet - data_accum <- accumulation(data = diff_p_pet, - dates_monthly = dates_monthly, accum = accum, - time_dim = time_dim, leadtime_dim = leadtime_dim, - ncores = ncores) + data_accum <- .Accumulation(data = diff_p_pet, + dates_monthly = dates_monthly, accum = accum, + time_dim = time_dim, leadtime_dim = leadtime_dim, + ncores = ncores) # Standardization: - if (standardization == TRUE) { - spei_dat <- spei_standardization(data = data_accum, - leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - ref_period = ref_period, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - ncores = ncores, - accum = accum, - param_error = param_error, - params = params, method = method, - distribution = distribution, - fit = fit) + if (standardization) { + spei_dat <- .Standardization(data = data_accum, params = params, + accum = accum, time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + param_error = param_error, + method = method, distribution = distribution, + fit = fit, ncores = ncores) params <- spei_dat$params } else { spei_dat <- data_accum @@ -380,9 +528,7 @@ PeriodSPEI <- function(exp, dates, lat, names(spei_res) <- c('exp', 'exp_cor') } } - return(spei_res) - } @@ -395,11 +541,10 @@ PeriodSPEI <- function(exp, dates, lat, } -#------------------------------------------------------------------------------- - -evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', - time_dim = 'syear', leadtime_dim = 'time', - lat_dim = 'latitude', ncores = NULL) { +.Evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { lat_mask <- array(lat, dim = c(1, length(lat))) names(dim(lat_mask)) <- c('dat', lat_dim) @@ -428,31 +573,33 @@ evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreave # prepare data target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) # file <- tempfile() - # file = 'out.txt' # sink(file) - pet_estimated <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), - target_dims = c(list(lat_mask = 'dat'), target_dims_data), - output_dims = c(leadtime_dim, time_dim), - pet_method = pet_method, - dates_monthly = dates_monthly, - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = atomic_pet, ncores = ncores)$output1 - # sink(NULL) + # print(varnames) + # print(is.na(data['tasmax'])) + # print(is.na(data['tasmin'])) + pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), + fun = .evapotranspiration, + dates_monthly = dates_monthly, pet_method = pet_method, + leadtime_dim = leadtime_dim, time_dim = time_dim, + output_dims = c(leadtime_dim, time_dim), + ncores = ncores)$output1 + # sink() # captured_message <- readLines(file) - + # print(unique(captured_message)) # reorder dims in pet_estimated - pos <- match(names(dim(data[[1]])),names(dim(pet_estimated))) - pet_estimated <- aperm(pet_estimated, pos) + pos <- match(names(dim(data[[1]])), names(dim(pet))) + pet <- aperm(pet, pos) # restore original NAs from mask_na - pet_estimated[which(mask_na == 0 )] <- NA - - return(pet_estimated) + pet[which(mask_na == 0 )] <- NA + return(pet) } -atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL, - pet_method, dates_monthly, leadtime_dim, time_dim) { +.evapotranspiration <- function(lat_mask, data2, data3 = NULL, data4 = NULL, + dates_monthly, pet_method = 'hargreaves', + leadtime_dim = 'time', time_dim = 'syear') { dims <- dim(data2) @@ -460,7 +607,7 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL # of the considered period # (starting in January of the first year) so that the solar radiation # estimation is computed in each case for the correct month - + any(is.na(data2)) if (!is.null(data2)) { data_tmp <- as.vector(data2) data2 <- array(0, dim = length(dates_monthly)) @@ -473,6 +620,7 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } rm(data_tmp) } + any(is.na(data3)) if (!is.null(data3)) { data_tmp <- as.vector(data3) data3 <- array(0, dim = length(dates_monthly)) @@ -499,52 +647,58 @@ atomic_pet <- function(lat_mask = NULL, data2 = NULL, data3 = NULL, data4 = NULL } if (pet_method == 'hargreaves') { # NOTE EVA: if NA.RM is FALSE this gives error - x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = TRUE)) + # print('hi') + # print(any(is.na(data3))) + # print('he') + # print(any(is.na(data2))) + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = na.rm, verbose = FALSE) # line to return the vector to the size of the actual original data - x_pet <- x_pet[which(dates_monthly == 1)] - pet <- array(x_pet, dim = dims) + pet <- pet[which(dates_monthly == 1)] + pet <- array(pet, dim = dims) } if (pet_method == 'hargreaves_modified') { - x_pet <- invisible(hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = TRUE)) - x_pet <- x_pet[which(dates_monthly == 1)] - pet <- array(x_pet, dim = dims) + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = na.rm, + verbose = FALSE) + pet <- pet[which(dates_monthly == 1)] + pet <- array(pet, dim = dims) } if (pet_method == 'thornthwaite') { - x_pet <- invisible(thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE)) + pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = na.rm, + verbose = FALSE) # line to return the vector to the size of the actual original data - x_pet <- x_pet[which(dates_monthly == 1)] - pet <- array(x_pet, dim = dims) + pet <- pet[which(dates_monthly == 1)] + pet <- array(pet, dim = dims) } return(pet) } -#------------------------------------------------------------------------------- -accumulation <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time', - ncores = NULL) { +.Accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time', + ncores = NULL) { - accum_result <- Apply(data = list(data), - target_dims = list(data = c(leadtime_dim, time_dim)), - dates_monthly = dates_monthly, - accum = accum, - output_dims = c(leadtime_dim, time_dim), - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = atomic_accum, ncores = ncores)$output1 + accumulated <- Apply(data = list(data), + target_dims = list(data = c(leadtime_dim, time_dim)), + dates_monthly = dates_monthly, + accum = accum, + output_dims = c(leadtime_dim, time_dim), + leadtime_dim = leadtime_dim, time_dim = time_dim, + fun = .accumulation, + ncores = ncores)$output1 - pos <- match(names(dim(accum_result)), names(dim(data))) - data_accum <- aperm(accum_result, pos) + pos <- match(names(dim(accumulated)), names(dim(data))) + accumulated <- aperm(accumulated, pos) - return(data_accum) + return(accumulated) } -atomic_accum <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time') { +.accumulation <- function(data, dates_monthly, accum = 1, + time_dim = 'syear', leadtime_dim = 'time') { # data:[time, syear] dims <- dim(data) @@ -558,215 +712,205 @@ atomic_accum <- function(data, dates_monthly, accum = 1, } } # Accumulation at different timescales - # rollapply {zoo} A generic function for applying a function to rolling margins of an array. + # rollapply {zoo} A generic function for applying a function to rolling + # margins of an array. data_sum_x <- rollapply(data_vector, accum, sum) - # adds as many NAs as needed at the begining to account for the months that cannot be added - # (depends on accu) and so that the position in the vector corresponds to the accumulated - # of the previous months (instead of the accumulated of the next months) + # adds as many NAs as needed at the begining to account for the months that + # cannot be added (depends on accu) and so that the position in the vector + # corresponds to the accumulated of the previous months (instead of the + # accumulated of the next months) data_sum_x <- c(rep(NA, accum-1), data_sum_x) # discard the months that don't appear in the original data data_sum_x <- data_sum_x[which(dates_monthly == 1)] accum_result <- array(data_sum_x, dim = c(dims)) - # replace by NA when the accumulation corresponds to months that where not present in the original data + # replace by NA when the accumulation corresponds to months that where not + # present in the original data if (accum > 1) { accum_result[1:(accum-1), ] <- NA } return(accum_result) } -#------------------------------------------------------------------------------- +.Standardization <- function(data, params = NULL, accum = 1, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, cross_validation = FALSE, + handle_infinity = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', na.rm = FALSE, ncores = NULL) { - -spei_standardization <- function(data, accum = 1, time_dim = 'syear', - leadtime_dim = 'time', memb_dim = 'ensemble', - ref_period = NULL, - handle_infinity = FALSE, - cross_validation = FALSE, - param_error = -9999, params = NULL, - method = 'parametric', - distribution = 'log-Logistic', - fit = 'ub-pwm', - ncores = NULL) { - - n_leadtimes <- dim(data)[leadtime_dim] - n_sdates_params <- dim(data)[time_dim] + nleadtime <- dim(data)[leadtime_dim] + ntime <- dim(data)[time_dim] if (!cross_validation) { - n_sdates_params <- 1 + ntime <- 1 } + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + if (is.null(params)) { - params <- array(NA, dim = c(n_sdates_params, n_leadtimes, coef = 3)) # hardcoded + params <- array(NA, dim = c(ntime, nleadtime, length(coef))) names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') } else if (length(dim(params)) < 2) { - params <- array(params, dim = c(length(params), n_sdates_params, n_leadtimes)) - params <- aperm(params, c(2,3,1)) # dim(params): [time_dim, leadtime_dim, coef] with the values repeated each time_dim and leadtime_dim + params <- array(params, dim = c(length(params), ntime, nleadtime)) + # dim(params): [time_dim, leadtime_dim, coef] + # with the values repeated each time_dim and leadtime_dim + params <- aperm(params, c(2,3,1)) names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') + } else { + if (dim(params)['coef'] != length(coef)) { + stop(paste0("Params array should have 'coef' dimension of length ", + length(coef), ".")) + } } spei <- Apply(data = list(data = data, params = params), target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), - output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), - params = c(time_dim, leadtime_dim, 'coef')), + fun = .standardization, + coef = coef, leadtime_dim = leadtime_dim, time_dim = time_dim, memb_dim = memb_dim, handle_infinity = handle_infinity, cross_validation = cross_validation, method = method, distribution = distribution, fit = fit, - ref_period = ref_period, - param_error = param_error, - fun = atomic_spei, + ref_period = ref_period, param_error = param_error, + na.rm = na.rm, + output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) return(spei) # spei is a list of data_spei and params } - - - - -atomic_spei <- function(data, params, leadtime_dim = 'time', time_dim = 'syear', - memb_dim = 'ensemble', ref_period = NULL, - handle_infinity = FALSE, cross_validation = FALSE, - param_error = -9999, method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm') { +.standardization <- function(data, params, coef, leadtime_dim = 'time', + time_dim = 'syear', memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + cross_validation = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + fit = 'ub-pwm', na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] # params: [time_dim, leadtime_dim, 'coef'] - - if (is.null(ref_period)) { - ref.start <- NULL - ref.end <- NULL + + # maximum number of parameters needed to define any of the considered distributions + ncoef <- length(coef) + nleadtime <- as.numeric(dim(data)[leadtime_dim]) + ntime <- as.numeric(dim(data)[time_dim]) + nmemb <- as.numeric(dim(data)[memb_dim]) + + if (cross_validation) { + params_result <- array(data = NA, dim = c(ntime, nleadtime, ncoef)) } else { - ref.start <- ref_period[[1]] - ref.end <- ref_period[[2]] + params_result <- array(data = NA, dim = c(1, nleadtime, ncoef)) } + names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') if (all(is.na(data))) { - speiX <- array(NA, dim(data)) - # if the data [time, sdate, memb] has no variability it will raise an error further down, - # so we assign a value to the result and skip the step - } else if (var(data, na.rm = T) == 0) { - speiX <- array(param_error, dim(data)) - return(speiX) + spei_mod <- array(NA, dim(data)) + # if the data [time, sdate, memb] has no variability it will raise an error + # further down, so we assign a value to the result and skip the step + } else if (any(is.na(data)) && !na.rm) { + spei_mod <- array(NA, dim(data)) + } else if (var(data, na.rm = T) == 0) { + spei_mod <- array(param_error, dim(data)) } else { - n_coef_max <- dim(params)['coef'] - ftime <- as.numeric(dim(data)[leadtime_dim]) - n_sdates <- as.numeric(dim(data)[time_dim]) - nmemb <- as.numeric(dim(data)[memb_dim]) - - spei_mod <- array(data = NA, dim = c(ftime, n_sdates, nmemb)) - names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) - if (cross_validation) { - params_result <- array(data = NA, dim = c(n_sdates, ftime, n_coef_max)) + if (is.null(ref_period)) { + ref.start <- NULL + ref.end <- NULL } else { - params_result <- array(data = NA, dim = c(1, ftime, n_coef_max)) + ref.start <- ref_period[[1]] + ref.end <- ref_period[[2]] } - for (ff in 1:ftime) { # treat months independently - Accum_D_temp <- ClimProjDiags::Subset(data, along = leadtime_dim, - indices = ff, drop = FALSE) + + spei_mod <- array(data = NA, dim = c(nleadtime, ntime, nmemb)) + names(dim(spei_mod)) <- c(leadtime_dim, time_dim, memb_dim) + + for (ff in 1:nleadtime) { + data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, + indices = ff, drop = 'selected') params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} - - c(x_spei_mod, params_ff) %<-% spX_ftime(data = Accum_D_temp, na.rm = TRUE, method = method, - distribution = distribution, - fit = fit, ref.start = ref.start, ref.end = ref.end, - params = params_tmp, n_sdates = n_sdates, nmemb = nmemb, - handle_infinity = handle_infinity, - cross_validation = cross_validation) - spei_mod[ff, , ] <- x_spei_mod - coef_names <- names(params_ff) - # lengthen dimension coef of params_ff in case it doesn't match the corresponding dimension of parms_months - if (length(params_ff) < n_coef_max) { - params_ff <- append(params_ff, array(NA, dim = n_coef_max - length(params_ff))) - coef_names <- append(coef_names, '') + + + spei_data <- .std(data = data_subset, coef = coef, + ntime = ntime, nmemb = nmemb, + method = method, + distribution = distribution, + fit = fit, na.rm = na.rm, + ref.start = ref.start, + ref.end = ref.end, + params = params_tmp, + handle_infinity = handle_infinity, + cross_validation = cross_validation) + spei_mod[ff, , ] <- spei_data[[1]] + params_ff <- spei_data[[2]] + # lengthen dimension coef of params_ff in case it doesn't match the + # corresponding dimension of parms_months + if (!is.null(params_ff)) { + if (length(params_ff) < ncoef) { + params_ff <- append(params_ff, array(NA, dim = ncoef - length(params_ff))) + } + params_result[, ff, ] <- params_ff } - params_result[, ff, ] <- params_ff } - colnames(params_result) <- coef_names - names(dim(params_result)) <- c(time_dim, leadtime_dim, 'coef') - return(list(spei = spei_mod, params = params_result)) } + return(list(spei = spei_mod, params = params_result)) } -#------------------------------------------------------------------------------- - -### Support functions - -spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', - na.rm = TRUE, method = 'parametric', n_sdates, nmemb, - ref.start = NULL, ref.end = NULL, params = NULL, - handle_infinity = FALSE, cross_validation = FALSE) { +.std <- function(data, coef, ntime, nmemb, method = 'parametric', + distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = FALSE, + ref.start = NULL, ref.end = NULL, params = NULL, + handle_infinity = FALSE, cross_validation = FALSE) { - # data: [leadtime_dim = 1, time_dim, memb_dim] - - if (anyNA(data) && na.rm == FALSE) { - stop('Error: Data must not contain NAs') # TO DO: return something? - } + # data: [time_dim, memb_dim] + # params: NULL or [(ntime), coef] if (method == 'non-parametric') { - - bp = matrix(0, length(data), 1) + bp <- matrix(0, length(data), 1) for (i in 1:length(data)) { bp[i,1] = sum(data[] <= data[i], na.rm = na.rm); # Writes the rank of the data } - SPEI = qnorm((bp - 0.44)/(length(data) + 0.12)) - - return(SPEI) # it won't return params to be used in exp_cor; also it is not using handle_infinity nor cross_validation - + std_index <- qnorm((bp - 0.44)/(length(data) + 0.12)) + dim(std_index) <- c(ntime, nmemb) + # it won't return params to be used in exp_cor; also it is not using + # handle_infinity nor cross_validation + params_result <- NULL } else { - - std_index <- array(NA, c(n_sdates, nmemb)) - - if (anyNA(data) && na.rm == FALSE) { - stop('Error: Data must not contain NAs') - } - - coef = switch(distribution, - "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha','beta'))), - "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi','alpha','kappa'))), - "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu','sigma','gamma')))) - - dim_one <- length(coef) - - if (!is.null(params)) { - if (length(params)!=dim_one) { - stop(paste0('parameters array should have dimensions [', dim_one, ']')) - } - } - + std_index <- array(NA, c(ntime, nmemb)) # Select window if necessary if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data,ref.start,ref.end) + data.fit <- window(data, ref.start, ref.end) } else { data.fit <- data } if (cross_validation) { - loop_years <- n_sdates + loop_years <- ntime } else { loop_years <- 1 } - - params_result <- array(NA, dim = c(loop_years, dim_one)) + params_result <- array(NA, dim = c(loop_years, length(coef))) colnames(params_result) <- names(coef) for (nsd in 1:loop_years) { # loop over years (in case of cross_validation) # Cumulative series (acu) if (cross_validation) { - acu <- as.vector(data.fit[,-nsd,]) + acu <- as.vector(data.fit[-nsd, ]) } else { acu <- as.vector(data.fit) } acu.sorted <- sort.default(acu, method = "quick") - # remove NAs (no need if(na.rm) because if there are NA and na.rm = F we don't get to this point) + # remove NAs (no need if(na.rm) because if there are NA and na.rm = F + # we don't get to this point) acu.sorted <- acu.sorted[!is.na(acu.sorted)] # else all acu was NA and we don't need to continue with this case if (length(acu.sorted) != 0) { - acu_sd = sd(acu.sorted) + acu_sd <- sd(acu.sorted) if (!is.na(acu_sd)) { if (acu_sd != 0) { if (distribution != "log-Logistic") { @@ -774,17 +918,17 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', acu.sorted = acu.sorted[acu.sorted > 0] } if (!is.null(params)) { - f_params = as.vector(params) + f_params <- as.vector(params) params_result[nsd, ] <- f_params } else { # else coef will be NA if (length(acu.sorted) >= 4) { # Calculate probability weighted moments based on fit with lmomco or TLMoments pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted,-0.35,0, nmom=3), - pwm.ub(acu.sorted, nmom=3) - # TLMoments::PWM(acu.sorted, order = 0:2) - ) + "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), + pwm.ub(acu.sorted, nmom = 3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) # Check L-moments validity lmom <- pwm2lmom(pwm) @@ -792,23 +936,21 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', # lmom fortran functions need specific inputs L1, L2, T3 # this is handled by lmomco internally with lmorph fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - # Calculate parameters based on distribution with lmom then lmomco f_params = switch(distribution, "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){ parglo(lmom)$para }), + error = function(e){parglo(lmom)$para}), "Gamma" = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){ pargam(lmom)$para }), + error = function(e){pargam(lmom)$para}), "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){ parpe3(lmom)$para })) - + error = function(e){parpe3(lmom)$para})) # Adjust if user chose log-Logistic and max-lik if (distribution == 'log-Logistic' && fit == 'max-lik') { f_params = parglo.maxlik(acu.sorted, f_params)$para } params_result[nsd, ] <- f_params - } # end if dor the case the L-moments are not valid (std_index will be NA) - } # end if for the case there are not enough values to estimate the parameters (std_index will be NA) + } # end for the case the L-moments are not valid (std_index will be NA) + } # end for case there are not enough values to estimate the parameters (std_index will be NA) } # end estimation of f_param # Calculate cdf based on distribution with lmom if (all(is.na(params_result[nsd,]))) { @@ -822,7 +964,7 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', "PearsonIII" = lmom::cdfpe3(data, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = c(n_sdates,nmemb)) + std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) # Adjust if user chose Gamma or PearsonIII - Not tested: For future development # if (distribution != 'log-Logistic') { @@ -834,9 +976,9 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', std_index <- std_index_cv } } - } # end if for the case there is no variability - } # end if for the case all NA in acu - } # next year (in case of cross_validation or all done if cross_validation == F) + } # end if for the case there is no variability + } # end if for the case all NA in acu + } # next year (in case of cross_validation or all done if cross_validation == F) if (handle_infinity) { # could also use "param_error" ?; we are giving it the min/max value of the grid point std_index[is.infinite(std_index) & std_index < 0] <- min(std_index[!is.infinite(std_index)]) @@ -846,8 +988,6 @@ spX_ftime <- function(data, distribution = 'log-Logistic', fit = 'ub-pwm', # (otherwise there will be one f_params per year; # but the output params will be read only in the case that # it is called with cross_validation FALSE) - return(list(std_index = std_index, params = params_result)) - } + return(list(std_index, params_result)) } -#------------------------------------------------------------------------------- diff --git a/R/zzz.R b/R/zzz.R index cf91639..81bd0ce 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -88,4 +88,12 @@ wind2CF <- function(wind, pc) { power <- wind2power(wind, pc) CF <- power / pc$attr$RatedPower return(CF) +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'lons', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') } \ No newline at end of file diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 0a9a6bd..7c55211 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -1,5 +1,3 @@ -context("CSIndicators::PeriodSPEI tests") - ############################################## # cube1 cube1 <- NULL @@ -97,7 +95,7 @@ test_that("1. Initial checks PeriodSPEI", { ) # time_dim expect_error( - PeriodSPEI(exp = exp1, ) + PeriodSPEI(exp = exp1, time_dim = 1) ) # leadtime_dim @@ -111,3 +109,32 @@ test_that("1. Initial checks PeriodSPEI", { ############################################## +# Test different time dimensions of exp and exp_cor +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) +dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) + +set.seed(1) +exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +set.seed(2) +exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +set.seed(3) +exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) + +set.seed(1) +expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +set.seed(2) +expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +set.seed(3) +expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) + +dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) + +res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates, + cross_validation = TRUE, na.rm = TRUE) \ No newline at end of file -- GitLab From db9ecd3627f220ddf5d8402b12dbfe5fe30f82bf Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 12 Jun 2023 21:15:35 +0200 Subject: [PATCH 11/87] Add parameter dates_expcor; improve documentation; add warning for cross_validation; return correct values within 's2dv_cube' --- NAMESPACE | 8 + R/PeriodSPEI.R | 265 ++++++++++++++++++++++--------- man/CST_PeriodSPEI.Rd | 157 ++++++++++++++++++ man/PeriodSPEI.Rd | 160 +++++++++++++++++++ tests/testthat/test-PeriodSPEI.R | 88 +++++----- 5 files changed, 559 insertions(+), 119 deletions(-) create mode 100644 man/CST_PeriodSPEI.Rd create mode 100644 man/PeriodSPEI.Rd diff --git a/NAMESPACE b/NAMESPACE index d80accb..61b6b92 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(CST_AccumulationExceedingThreshold) export(CST_MergeRefToExp) export(CST_PeriodAccumulation) export(CST_PeriodMean) +export(CST_PeriodSPEI) export(CST_QThreshold) export(CST_SelectPeriodOnData) export(CST_Threshold) @@ -17,6 +18,7 @@ export(CST_WindPowerDensity) export(MergeRefToExp) export(PeriodAccumulation) export(PeriodMean) +export(PeriodSPEI) export(QThreshold) export(SelectPeriodOnData) export(SelectPeriodOnDates) @@ -25,7 +27,13 @@ export(TotalSpellTimeExceedingThreshold) export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) +import(ClimProjDiags) +import(SPEI) +import(TLMoments) +import(lmomco) +import(lubridate) import(multiApply) +import(zoo) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 552c0d2..284c174 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -73,30 +73,53 @@ #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' +#'@return A list with elements: +#'\itemize{ +#' \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' +#' in element data from 'exp' array with the same dimensions as 'exp'. +#' If it is FALSE, it is an array with the accumulated values of PET +#' minus 'prlr' data.} +#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not +#' NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in +#' element 'data'. If 'standarization' is FALSE, only the accumulated +#' values of PET minus 'prlr' is returned.} +#' \item{'params', returned if 'standarization' is TRUE, it contains the +#' parameters used for the standarization of 'exp' that are used for +#' computing the 'SPEI' for 'exp_cor'.} +#' } +#' #'@examples #'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) -#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) -#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) -#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) -#' -#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) -#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) -#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) -#' -#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16"))) +#' +#'dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16")), 'UTC') #'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#' +#' #'lat <- c(40,40.1) -#' +#' +#'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) +#' +#'expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) +#' #'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) #'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, #' 'prlr' = expcor_prlr) -#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#' +#'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), +#' Dates = dates) +#'exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), +#' Dates = dates) +#' +#'res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) #' #'@import multiApply #'@import ClimProjDiags @@ -128,18 +151,32 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, stop("Parameter 'exp_cor' must be a list of 's2dv_cube' class.") } } - + # latitude if (!any(names(exp[[1]]$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") } + # Dates + dates_exp <- exp[[1]]$attrs$Dates + if (is.null(exp[[1]]$attrs$Dates)) { + stop("Element 'Dates' is not found in 'exp$attrs' list.") + } + + if (!is.null(exp_cor)) { + if (is.null(exp_cor[[1]]$attrs$Dates)) { + stop("Element 'Dates' is not found in 'exp_cor$attrs'.") + } + } - lat_name <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] + lat_dim <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] + res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), - dates = exp[[1]]$attrs$Dates, - lat = exp[[1]]$coords[[lat_name]], - exp_cor = lapply(exp_cor, function(x) x$data), + dates_exp = exp[[1]]$attrs$Dates, + lat = exp[[1]]$coords[[lat_dim]], + exp_cor = if (is.null(exp_cor)) {NULL} else { + lapply(exp_cor, function(x) x$data)}, + dates_expcor = exp_cor[[1]]$attrs$Dates, pet = pet, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lat_dim = lat_dim, accum = accum, ref_period = ref_period, params = params, @@ -150,6 +187,22 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, param_error = param_error, handle_infinity = handle_infinity, na.rm = na.rm, ncores = ncores) + if (is.null(exp_cor)) { + exp$data <- res[[1]] + if (standardization) { + return(list(exp = exp, params = res[[2]])) + } else { + return(exp) + } + } else { + exp$data <- res[[1]] + exp_cor$data <- res[[2]] + if (standardization) { + return(list(exp = exp, exp_cor = exp_cor, params = res[[3]])) + } else { + return(list(exp = exp, exp_cor = exp_cor)) + } + } return(res) } @@ -175,9 +228,14 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #' Variable 'prlr' is always needed. The units for temperature variables #' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for #' precipitation ('prlr') need to be in mm/month. +#'@param dates_exp An array of temporal dimensions containing the Dates of +#' 'exp'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'exp'. #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. +#'@param dates_expcor An array of temporal dimensions containing the Dates of +#' 'exp_cor'. It must be of class 'Date' or 'POSIXct'. #'@param pet A multidimensional array containing the Potential #' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It #' is NULL by default. @@ -229,19 +287,33 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' +#'@return A list with elements: +#'\itemize{ +#' \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from +#' 'exp' array with the same dimensions as 'exp'. If it is FALSE, it +#' is an array with the accumulated values of PET minus 'prlr' data.} +#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not +#' NULL. It is an array with the SPEI data from 'exp_cor'. If +#' 'standarization' is FALSE, only the accumulated values of PET minus +#' 'prlr' is returned.} +#' \item{'params', returned if 'standarization' is TRUE, it contains the +#' parameters used for the standarization of 'exp' that are used for +#' computing the 'SPEI' for 'exp_cor'.} +#'} +#' #'@examples #'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) -#'exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) -#'exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) -#'exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) +#'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) #' -#'expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) -#'expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) -#'expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +#'expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +#'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +#'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' #'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), #' paste0(2010:2015, "-10-16"))) @@ -262,7 +334,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'@import lmomco #'@import lubridate #'@export -PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, +PeriodSPEI <- function(exp, dates_exp, lat, + exp_cor = NULL, dates_expcor = NULL, pet = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, @@ -305,14 +378,40 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, if (!all(names(exp_cor) %in% names(exp))) { stop("Parameter 'exp_cor' needs to have the same variable names as 'exp'.") } - dims <- lapply(exp_cor, function(x) dim(x)) - first_dims <- dims[[1]] - all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + dimscor <- lapply(exp_cor, function(x) dim(x)) + first_dims <- dimscor[[1]] + all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dims, x))) if (!all_equal) { stop("Parameter 'exp_cor' variables needs to have the same dimension names.") } } + # dates + if (!(is.Date(dates_exp)) & !(is.POSIXct(dates_exp))) { + stop("Parameter 'dates_exp' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!is.null(exp_cor)) { + if (!(is.Date(dates_expcor)) & !(is.POSIXct(dates_expcor))) { + stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + } + # lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be numeric.") + } + if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'exp'.") + } + if (!is.null(exp_cor)) { + if (any(sapply(dimscor, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'exp'.") + } + } + # Variable checks if (is.null(pet)) { ## exp (2) @@ -424,17 +523,32 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } - ## start - - ## end ## standardization + if (!is.logical(standardization)) { + stop("Parameter 'standardization' must be a logical value.") + } ## param_error + if (!is.numeric(param_error)) { + stop("Parameter 'param_error' must be a numeric value.") + } ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } ## cross_validation + if (!is.logical(cross_validation)) { + stop("Parameter 'cross_validation' must be a logical value.") + } + if (cross_validation) { + warning("Detected 'cross_validation' = TRUE. This functionality ", + "is being developed, sorry for the inconvenience. It will ", + "be set to FALSE.") + cross_validation <- FALSE + } ## method if (!(method %in% c('parametric', 'non-parametric'))) { @@ -459,18 +573,23 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # Data preparation # complete dates - ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) - end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) - dates_complete_daily <- as.Date(ini_date:end_date) - - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly <- array(0, dim = length(dates_complete_monthly)) - for (dd in 1:length(dates)) { - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), - lubridate::month(dates[dd]), - 01, sep = '-'))) - dates_monthly[ii] <- 1 + dates_monthly <- NULL + k = 0 + for (dates in .return2list(dates_exp, dates_expcor)) { + k = k + 1 + ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) + end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) + dates_complete_daily <- as.Date(ini_date:end_date) + + dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) + dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] + dates_monthly[[k]] <- array(0, dim = length(dates_complete_monthly)) + for (dd in 1:length(dates)) { + ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_monthly[[k]][ii] <- 1 + } } # Compute PeriodSPEI @@ -482,7 +601,7 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet) | computed_pet) { - pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly, + pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly[[k]], lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) @@ -492,10 +611,10 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # Accumulation diff_p_pet <- data$prlr - pet data_accum <- .Accumulation(data = diff_p_pet, - dates_monthly = dates_monthly, accum = accum, + dates_monthly = dates_monthly[[k]], accum = accum, time_dim = time_dim, leadtime_dim = leadtime_dim, ncores = ncores) - # Standardization: + # Standardization if (standardization) { spei_dat <- .Standardization(data = data_accum, params = params, accum = accum, time_dim = time_dim, @@ -508,10 +627,18 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, method = method, distribution = distribution, fit = fit, ncores = ncores) params <- spei_dat$params + + pos <- match(names(dim(data[[1]])), names(dim(spei_dat[[1]]))) + spei_dat[[1]] <- aperm(spei_dat[[1]], pos) + + spei_res[[k]] <- spei_dat[[1]] } else { - spei_dat <- data_accum + + pos <- match(names(dim(data[[1]])), names(dim(data_accum))) + data_accum <- aperm(data_accum, pos) + + spei_res[[k]] <- data_accum } - spei_res[[k]] <- spei_dat[[1]] } if (standardization) { @@ -572,11 +699,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # prepare data target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) - # file <- tempfile() - # sink(file) - # print(varnames) - # print(is.na(data['tasmax'])) - # print(is.na(data['tasmin'])) pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), target_dims = c(list(lat_mask = 'dat'), target_dims_data), fun = .evapotranspiration, @@ -584,9 +706,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, leadtime_dim = leadtime_dim, time_dim = time_dim, output_dims = c(leadtime_dim, time_dim), ncores = ncores)$output1 - # sink() - # captured_message <- readLines(file) - # print(unique(captured_message)) # reorder dims in pet_estimated pos <- match(names(dim(data[[1]])), names(dim(pet))) pet <- aperm(pet, pos) @@ -607,7 +726,7 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # of the considered period # (starting in January of the first year) so that the solar radiation # estimation is computed in each case for the correct month - any(is.na(data2)) + if (!is.null(data2)) { data_tmp <- as.vector(data2) data2 <- array(0, dim = length(dates_monthly)) @@ -646,11 +765,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, rm(data_tmp) } if (pet_method == 'hargreaves') { - # NOTE EVA: if NA.RM is FALSE this gives error - # print('hi') - # print(any(is.na(data3))) - # print('he') - # print(any(is.na(data2))) pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), lat = lat_mask, na.rm = na.rm, verbose = FALSE) # line to return the vector to the size of the actual original data @@ -832,19 +946,18 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, for (ff in 1:nleadtime) { data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, indices = ff, drop = 'selected') + params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} spei_data <- .std(data = data_subset, coef = coef, - ntime = ntime, nmemb = nmemb, - method = method, - distribution = distribution, - fit = fit, na.rm = na.rm, - ref.start = ref.start, - ref.end = ref.end, - params = params_tmp, - handle_infinity = handle_infinity, - cross_validation = cross_validation) + ntime = ntime, nmemb = nmemb, + method = method, distribution = distribution, + fit = fit, na.rm = na.rm, + ref.start = ref.start, ref.end = ref.end, + params = params_tmp, + handle_infinity = handle_infinity, + cross_validation = cross_validation) spei_mod[ff, , ] <- spei_data[[1]] params_ff <- spei_data[[2]] # lengthen dimension coef of params_ff in case it doesn't match the @@ -867,7 +980,6 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, # data: [time_dim, memb_dim] # params: NULL or [(ntime), coef] - if (method == 'non-parametric') { bp <- matrix(0, length(data), 1) for (i in 1:length(data)) { @@ -903,12 +1015,13 @@ PeriodSPEI <- function(exp, dates, lat, exp_cor = NULL, pet = NULL, } else { acu <- as.vector(data.fit) } - + acu.sorted <- sort.default(acu, method = "quick") # remove NAs (no need if(na.rm) because if there are NA and na.rm = F # we don't get to this point) acu.sorted <- acu.sorted[!is.na(acu.sorted)] - # else all acu was NA and we don't need to continue with this case + # else all acu was NA and we don't need to continue with this case + if (length(acu.sorted) != 0) { acu_sd <- sd(acu.sorted) if (!is.na(acu_sd)) { diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd new file mode 100644 index 0000000..2bc5c47 --- /dev/null +++ b/man/CST_PeriodSPEI.Rd @@ -0,0 +1,157 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodSPEI.R +\name{CST_PeriodSPEI} +\alias{CST_PeriodSPEI} +\title{Compute the Standardised Precipitation-Evapotranspiration Index} +\usage{ +CST_PeriodSPEI( + exp, + exp_cor = NULL, + pet = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + lat_dim = "latitude", + accum = 1, + ref_period = NULL, + params = NULL, + standardization = TRUE, + cross_validation = FALSE, + pet_method = "hargreaves", + method = "parametric", + distribution = "log-Logistic", + fit = "ub-pwm", + param_error = -9999, + handle_infinity = FALSE, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +Variable 'prlr' is always needed. The units for temperature variables +('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +precipitation ('prlr') need to be in mm/month.} + +\item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each +variable in which the quantile PeriodSPEI should be applied. If it is not +specified, the PeriodSPEI is calculated from object 'exp'.} + +\item{pet}{A multidimensional array containing the Potential +EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +is NULL by default.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{params}{A multidimensional array with named dimensions for computing the +SPEI. This option overrides computation of fitting parameters. It needs +to be of same leadtime and time dimensions of exp and a dimension named +'coef' with the length of the coefficients needed for the used distribution +(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +dimension must be of length 1.} + +\item{standardization}{A logical value indicating wether the standardization +is computed.} + +\item{cross_validation}{A logical value indicating if cross validation is +done (TRUE), or not (FALSE). It is FALSE by default.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tasmin and tasmax; and 'thornthwaite', that requires variable +'tas'.} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +default.} + +\item{fit}{A character string indicating the name of the method used for +computing the distribution function parameters The accepteed names are: +'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +that is a multiscalar drought index based on climatic data. It can be used for +determining the onset, duration and magnitude of drought conditions with +respect to normal conditions in a variety of natural and managed systems such +as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +using monthly (or weekly) precipitation as the input data. The SPEI uses the +monthly (or weekly) difference between precipitation and pet. This represents +a simple climatic water balance which is calculated at different time scales +to obtain the SPEI. +} +\examples{ +# Test random data +dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 25) +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 15) + +dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), + paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16")), 'UTC') +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) + +expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) + +exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, + 'prlr' = expcor_prlr) + +exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), + Dates = dates) +exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), + Dates = dates) + +res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) + +} diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd new file mode 100644 index 0000000..1168122 --- /dev/null +++ b/man/PeriodSPEI.Rd @@ -0,0 +1,160 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodSPEI.R +\name{PeriodSPEI} +\alias{PeriodSPEI} +\title{Compute the Standardised Precipitation-Evapotranspiration Index} +\usage{ +PeriodSPEI( + exp, + dates_exp, + lat, + exp_cor = NULL, + dates_expcor = NULL, + pet = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + lat_dim = "latitude", + accum = 1, + ref_period = NULL, + params = NULL, + standardization = TRUE, + cross_validation = FALSE, + pet_method = "hargreaves", + method = "parametric", + distribution = "log-Logistic", + fit = "ub-pwm", + param_error = -9999, + handle_infinity = FALSE, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +Variable 'prlr' is always needed. The units for temperature variables +('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +precipitation ('prlr') need to be in mm/month.} + +\item{dates_exp}{An array of temporal dimensions containing the Dates of +'exp'. It must be of class 'Date' or 'POSIXct'.} + +\item{lat}{A numeric vector containing the latitude values of 'exp'.} + +\item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each +variable in which the quantile PeriodSPEI should be applied. If it is not +specified, the PeriodSPEI is calculated from object 'exp'.} + +\item{dates_expcor}{An array of temporal dimensions containing the Dates of +'exp_cor'. It must be of class 'Date' or 'POSIXct'.} + +\item{pet}{A multidimensional array containing the Potential +EvapoTranspiration data. If it is NULL it is calculated using pet_method. It +is NULL by default.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{params}{A multidimensional array with named dimensions for computing the +SPEI. This option overrides computation of fitting parameters. It needs +to be of same leadtime and time dimensions of exp and a dimension named +'coef' with the length of the coefficients needed for the used distribution +(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or +'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime +dimension must be of length 1.} + +\item{standardization}{A logical value indicating wether the standardization +is computed.} + +\item{cross_validation}{A logical value indicating if cross validation is +done (TRUE), or not (FALSE). It is FALSE by default.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tasmin and tasmax; and 'thornthwaite', that requires variable +'tas'.} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by +default.} + +\item{fit}{A character string indicating the name of the method used for +computing the distribution function parameters The accepteed names are: +'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) +that is a multiscalar drought index based on climatic data. It can be used for +determining the onset, duration and magnitude of drought conditions with +respect to normal conditions in a variety of natural and managed systems such +as crops, ecosystems, rivers, water resources, etc. The SPI is calculated +using monthly (or weekly) precipitation as the input data. The SPEI uses the +monthly (or weekly) difference between precipitation and pet. This represents +a simple climatic water balance which is calculated at different time scales +to obtain the SPEI. +} +\examples{ +# Test random data +dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 25) +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, + latitude = 2, longitude = 1, ensemble = 15) +exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(900, 21.19, 25.64), dim = dims) + +expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) +expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) +expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) + +dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, + 'prlr' = expcor_prlr) +res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) + +} diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 7c55211..e009ad0 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -9,30 +9,34 @@ dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) set.seed(1) -exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) set.seed(2) -exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) set.seed(3) -exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) set.seed(1) -expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) +expcor_tasmax <- array(rnorm(60, 29.03, 5.67), dim = dimscor) set.seed(2) -expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) +expcor_tasmin <- array(rnorm(60, 15.70, 4.40), dim = dimscor) set.seed(3) -expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) +expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) -dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), paste0(2010:2015, "-10-16"))) -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16"))) +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) -res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates) -source("/esarchive/scratch/erifarov/git/csindicators/R/PeriodSPEI.R") +res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) ############################################## @@ -89,52 +93,50 @@ test_that("1. Initial checks PeriodSPEI", { ) # exp (2) expect_warning( - PeriodSPEI(exp = exp1, pet_method = '1', dates = dates, lat = lat), + PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves'.") ) # time_dim expect_error( - PeriodSPEI(exp = exp1, time_dim = 1) + PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) ) # leadtime_dim - + expect_error( + PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) + ) # memb_dim - + expect_error( + PeriodSPEI(exp = exp1, memb_dim = 1, dates_exp = dates_exp, lat = lat) + ) # lat_dim - + expect_error( + PeriodSPEI(exp = exp1, lat_dim = 1, dates_exp = dates_exp, lat = lat) + ) # accum + expect_error( + PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat) + ) }) ############################################## -# Test different time dimensions of exp and exp_cor -dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) -dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) - -set.seed(1) -exp_tasmax <- array(rnorm(100, 27.73, 5.26), dim = dims) -set.seed(2) -exp_tasmin <- array(rnorm(100, 14.83, 3.86), dim = dims) -set.seed(3) -exp_prlr <- array(rnorm(100, 21.19, 25.64), dim = dims) - -set.seed(1) -expcor_tasmax <- array(rnorm(100, 29.03, 5.67), dim = dimscor) -set.seed(2) -expcor_tasmin <- array(rnorm(100, 15.70, 4.40), dim = dimscor) -set.seed(3) -expcor_prlr <- array(rnorm(100, 15.62, 21.38), dim = dimscor) - -dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) - -lat <- c(40,40.1) - -exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) +test_that("2. Output checks", { + res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) + expect_equal( + length(res), + 3 + ) + expect_equal( + dim(res[[1]]), + dims + ) + expect_equal( + dim(res[[3]])[which(!names(dim(res[[3]])) %in% c('coef', 'syear'))], + dims[which(!names(dims) %in% c('syear', 'ensemble'))] + ) +}) -res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates = dates, - cross_validation = TRUE, na.rm = TRUE) \ No newline at end of file +############################################## \ No newline at end of file -- GitLab From 9243ba3c9cb649c4b2cac55cd769db0064c60d95 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 13 Jun 2023 17:41:38 +0200 Subject: [PATCH 12/87] Separate pet parameter to pet_exp and pet_expcor; add initial checks; add few tests --- R/PeriodSPEI.R | 247 +++++++++++++++++++++---------- man/CST_PeriodSPEI.Rd | 57 +++++-- man/PeriodSPEI.Rd | 50 +++++-- tests/testthat/test-PeriodSPEI.R | 112 ++++++++++++-- 4 files changed, 345 insertions(+), 121 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 284c174..39e6daf 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -22,9 +22,6 @@ #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. -#'@param pet A multidimensional array containing the Potential -#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -#' is NULL by default. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -42,11 +39,19 @@ #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same leadtime and time dimensions of exp and a dimension named +#' to be of same leadtime and time dimensions of 'exp' and a dimension named #' 'coef' with the length of the coefficients needed for the used distribution #' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or #' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime #' dimension must be of length 1. +#'@param pet_exp A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the +#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variables with specified 'pet_method'. It is NULL by default. +#'@param pet_expcor A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is @@ -57,7 +62,8 @@ #' variables tasmin and tasmax; and 'thornthwaite', that requires variable #' 'tas'. #'@param method A character string indicating the standardization method used. -#' If can be: 'parametric' or 'non-parametric'. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by @@ -89,17 +95,18 @@ #' } #' #'@examples -#'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) #' -#'dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), -#' paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16")), 'UTC') -#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#' +#'dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16")), 'UTC') +#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), +#' paste0(2020, "-10-16")), 'UTC') +#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' #'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) @@ -115,9 +122,9 @@ #' 'prlr' = expcor_prlr) #' #'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), -#' Dates = dates) +#' Dates = dates_exp) #'exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), -#' Dates = dates) +#' Dates = dates_expcor) #' #'res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) #' @@ -129,10 +136,11 @@ #'@import lmomco #'@import lubridate #'@export -CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, +CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', @@ -158,18 +166,21 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, } # Dates dates_exp <- exp[[1]]$attrs$Dates - if (is.null(exp[[1]]$attrs$Dates)) { - stop("Element 'Dates' is not found in 'exp$attrs' list.") + if (!'Dates' %in% names(exp[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'exp'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") } if (!is.null(exp_cor)) { - if (is.null(exp_cor[[1]]$attrs$Dates)) { - stop("Element 'Dates' is not found in 'exp_cor$attrs'.") + if (!'Dates' %in% names(exp_cor[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") } } lat_dim <- names(exp[[1]]$coords)[[which(names(exp[[1]]$coords) %in% .KnownLatNames())]] - res <- PeriodSPEI(exp = lapply(exp, function(x) x$data), dates_exp = exp[[1]]$attrs$Dates, @@ -177,9 +188,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, exp_cor = if (is.null(exp_cor)) {NULL} else { lapply(exp_cor, function(x) x$data)}, dates_expcor = exp_cor[[1]]$attrs$Dates, - pet = pet, time_dim = time_dim, leadtime_dim = leadtime_dim, + time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, lat_dim = lat_dim, accum = accum, ref_period = ref_period, params = params, + pet_exp = pet_exp, pet_expcor = pet_expcor, standardization = standardization, cross_validation = cross_validation, pet_method = pet_method, method = method, @@ -236,9 +248,6 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #' specified, the PeriodSPEI is calculated from object 'exp'. #'@param dates_expcor An array of temporal dimensions containing the Dates of #' 'exp_cor'. It must be of class 'Date' or 'POSIXct'. -#'@param pet A multidimensional array containing the Potential -#' EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -#' is NULL by default. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -261,6 +270,14 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or #' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime #' dimension must be of length 1. +#'@param pet_exp A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp'. It must have the same dimensions of the +#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variables with specified 'pet_method'. It is NULL by default. +#'@param pet_expcor A multidimensional array containing the Potential +#' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is @@ -302,10 +319,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'} #' #'@examples -#'# Test random data #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, #' latitude = 2, longitude = 1, ensemble = 25) -#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +#'dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, #' latitude = 2, longitude = 1, ensemble = 15) #'exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) #'exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) @@ -315,16 +331,21 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) #'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' -#'dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), -#' paste0(2010:2015, "-10-16"))) -#'dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) -#' +#'dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), +#' paste0(2010:2015, "-09-15"), +#' paste0(2010:2015, "-10-16"))) +#'dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +#'dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), +#' paste0(2020, "-09-15"), +#' paste0(2020, "-10-16")), 'UTC') +#'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' #'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) #'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, #' 'prlr' = expcor_prlr) -#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +#'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, +#' dates_exp = dates_exp, dates_expcor = dates_expcor) #' #'@import multiApply #'@import ClimProjDiags @@ -335,10 +356,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet = NULL, #'@import lubridate #'@export PeriodSPEI <- function(exp, dates_exp, lat, - exp_cor = NULL, dates_expcor = NULL, pet = NULL, + exp_cor = NULL, dates_expcor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', lat_dim = 'latitude', accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', @@ -375,28 +397,14 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (any(sapply(exp_cor, function(x) is.null(names(dim(x)))))) { stop("Parameter 'exp_cor' needs to be a list of arrays with dimension names.") } - if (!all(names(exp_cor) %in% names(exp))) { - stop("Parameter 'exp_cor' needs to have the same variable names as 'exp'.") - } dimscor <- lapply(exp_cor, function(x) dim(x)) - first_dims <- dimscor[[1]] - all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dims, x))) + first_dimscor <- dimscor[[1]] + all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dimscor, x))) if (!all_equal) { stop("Parameter 'exp_cor' variables needs to have the same dimension names.") } } - # dates - if (!(is.Date(dates_exp)) & !(is.POSIXct(dates_exp))) { - stop("Parameter 'dates_exp' is not of the correct class, ", - "only 'Date' and 'POSIXct' classes are accepted.") - } - if (!is.null(exp_cor)) { - if (!(is.Date(dates_expcor)) & !(is.POSIXct(dates_expcor))) { - stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", - "only 'Date' and 'POSIXct' classes are accepted.") - } - } # lat if (!is.numeric(lat)) { stop("Parameter 'lat' must be numeric.") @@ -413,8 +421,9 @@ PeriodSPEI <- function(exp, dates_exp, lat, } # Variable checks - if (is.null(pet)) { - ## exp (2) + ## exp (2) + pet <- vector("list", 2) + if (is.null(pet_exp)) { if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' if (!(pet_method[1] %in% c('hargreaves_modified', 'hargreaves'))) { @@ -429,12 +438,30 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_method[1] <- 'thornthwaite' } } else { - stop("Parameter 'exp' needs to be a named list with accepted variable names.", - "See documentation.") + stop("Parameter 'exp' needs to be a named list with accepted ", + "variable names if 'pet_exp' is not provided. See documentation.") } - - ## exp_cor (2) - if (!is.null(exp_cor)) { + } else { + if (!('prlr' %in% names(exp))) { + stop("Variable 'prlr' is not included in 'exp'.") + } + if (length(dim(exp[['prlr']])) != length(dim(pet_exp))) { + stop("Parameter 'pet_exp' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp'.") + } + if (!all(dim(exp[['prlr']]) %in% dim(pet_exp))) { + stop("Parameter 'pet_exp' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp'.") + } + if (any(names(dim(exp[['prlr']])) != names(dim(pet_exp)))) { + pos <- match(names(dim(exp[['prlr']])), names(dim(pet_exp))) + pet_exp <- aperm(pet_exp, pos) + } + pet[[1]] <- pet_exp + } + ## exp_cor (2) + if (!is.null(exp_cor)) { + if (is.null(pet_expcor)) { if (length(exp_cor) < 1) { exp_cor <- NULL } else { @@ -456,17 +483,28 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_method[2] <- 'thornthwaite' } } else { - stop("Parameter 'exp_cor' needs to be a list with the needed variables.") + stop("Parameter 'exp_cor' needs to be a list with the needed ", + "variables if 'pet_expcor' is not provided.") } - } - } else { - if (!('prlr' %in% names(exp))) { - stop("Variable 'prlr' is not included in 'exp'.") - } - if (!is.null(exp_cor)) { - if (!('prlr' %in% names(exp_cor))) { - stop("Variable 'prlr' is not included in 'exp_cor'.") + } else { + if (!is.null(exp_cor)) { + if (!('prlr' %in% names(exp_cor))) { + stop("Variable 'prlr' is not included in 'exp_cor'.") + } + if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") + } + if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") + } + if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { + pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) + pet_expcor <- aperm(pet_expcor, pos) + } } + pet[[2]] <- pet_expcor } } @@ -518,11 +556,50 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") } } + # dates + if (is.null(dates_exp)) { + stop("Parameter 'dates_exp' is missing, dates must be provided.") + } + if (!(is.Date(dates_exp)) & !(is.POSIXct(dates_exp))) { + stop("Parameter 'dates_exp' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!all(dim(exp[[1]])[c(time_dim, leadtime_dim)] == + dim(dates_exp)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates_exp' needs to have the same length as 'time_dim' ", + "and 'leadtime_dim' as 'exp'.") + } + + if (!is.null(exp_cor)) { + if (is.null(dates_expcor)) { + stop("Parameter 'dates_expcor' is missing, dates for 'exp_cor' must be ", + "provided if exp_cor is not NULL.") + } + if (!(is.Date(dates_expcor)) & !(is.POSIXct(dates_expcor))) { + stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!all(dim(exp_cor[[1]])[c(time_dim, leadtime_dim)] == + dim(dates_expcor)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates_expcor' needs to have the same length as ", + "'time_dim' and 'leadtime_dim' as 'exp_cor'.") + } + } ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: 'time_dim', 'leadtime_dim' and 'coef'.") + } + } ## standardization if (!is.logical(standardization)) { @@ -543,26 +620,29 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (!is.logical(cross_validation)) { stop("Parameter 'cross_validation' must be a logical value.") } - if (cross_validation) { - warning("Detected 'cross_validation' = TRUE. This functionality ", - "is being developed, sorry for the inconvenience. It will ", - "be set to FALSE.") - cross_validation <- FALSE - } + # if (cross_validation) { + # warning("Detected 'cross_validation' = TRUE. This functionality ", + # "is being developed, sorry for the inconvenience. It will ", + # "be set to FALSE.") + # cross_validation <- FALSE + # } ## method if (!(method %in% c('parametric', 'non-parametric'))) { - stop('pcX script error: SPEI can be only computed using the following approach: parametric or non-parametric') + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") } ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { - stop('Distrib must be one of "log-Logistic", "Gamma" or "PearsonIII"') + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") } ## fit if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop('Method must be one of "ub-pwm" (default), "pp-pwm" or "max-lik".') + stop("Parameter 'fit' must be a character string containing one of ", + "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") } - ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | @@ -600,16 +680,19 @@ PeriodSPEI <- function(exp, dates_exp, lat, for (data in .return2list(exp, exp_cor)) { k = k + 1 # Evapotranspiration estimation (unless pet is already provided) - if (is.null(pet) | computed_pet) { - pet <- .Evapotranspiration(data = data, dates_monthly = dates_monthly[[k]], - lat = lat, pet_method = pet_method[k], - time_dim = time_dim, leadtime_dim = leadtime_dim, - lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) + if (is.null(pet[[k]]) | computed_pet) { + pet[[k]] <- .Evapotranspiration(data = data, + dates_monthly = dates_monthly[[k]], + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, + leadtime_dim = leadtime_dim, + lat_dim = lat_dim, na.rm = na.rm, + ncores = ncores) computed_pet <- TRUE } # Accumulation - diff_p_pet <- data$prlr - pet + diff_p_pet <- data$prlr - pet[[k]] data_accum <- .Accumulation(data = diff_p_pet, dates_monthly = dates_monthly[[k]], accum = accum, time_dim = time_dim, leadtime_dim = leadtime_dim, @@ -660,7 +743,9 @@ PeriodSPEI <- function(exp, dates_exp, lat, .return2list <- function(data1, data2 = NULL) { - if (is.null(data2)) { + if (is.null(data1) & is.null(data2)) { + return(NULL) + } else if (is.null(data2)) { return(list(data1)) } else { return(list(data1, data2)) diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 2bc5c47..70c9b21 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -7,7 +7,6 @@ CST_PeriodSPEI( exp, exp_cor = NULL, - pet = NULL, time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", @@ -15,6 +14,8 @@ CST_PeriodSPEI( accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, + pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = "hargreaves", @@ -42,10 +43,6 @@ precipitation ('prlr') need to be in mm/month.} variable in which the quantile PeriodSPEI should be applied. If it is not specified, the PeriodSPEI is calculated from object 'exp'.} -\item{pet}{A multidimensional array containing the Potential -EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -is NULL by default.} - \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'syear'.} @@ -69,12 +66,22 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same leadtime and time dimensions of exp and a dimension named +to be of same leadtime and time dimensions of 'exp' and a dimension named 'coef' with the length of the coefficients needed for the used distribution (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime dimension must be of length 1.} +\item{pet_exp}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp'. It must have the same dimensions of the +variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variables with specified 'pet_method'. It is NULL by default.} + +\item{pet_expcor}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +provided variables with specified 'pet_method'. It is NULL by default.} + \item{standardization}{A logical value indicating wether the standardization is computed.} @@ -88,7 +95,8 @@ variables tasmin and tasmax; and 'thornthwaite', that requires variable 'tas'.} \item{method}{A character string indicating the standardization method used. -If can be: 'parametric' or 'non-parametric'.} +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} \item{distribution}{A character string indicating the name of the distribution function to be used for computing the SPEI. The accepted names are: @@ -110,6 +118,22 @@ from data. It is FALSE by default.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } +\value{ +A list with elements: +\itemize{ + \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' + in element data from 'exp' array with the same dimensions as 'exp'. + If it is FALSE, it is an array with the accumulated values of PET + minus 'prlr' data.} + \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not + NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in + element 'data'. If 'standarization' is FALSE, only the accumulated + values of PET minus 'prlr' is returned.} + \item{'params', returned if 'standarization' is TRUE, it contains the + parameters used for the standarization of 'exp' that are used for + computing the 'SPEI' for 'exp_cor'.} +} +} \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for @@ -122,17 +146,18 @@ a simple climatic water balance which is calculated at different time scales to obtain the SPEI. } \examples{ -# Test random data dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 25) -dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) -dates <- as.POSIXct(c(paste0(2010:2015, "-08-16"), - paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16")), 'UTC') -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) - +dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), + paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16")), 'UTC') +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16")), 'UTC') +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) @@ -148,9 +173,9 @@ exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), - Dates = dates) + Dates = dates_exp) exp_cor <- lapply(exp_cor, CSTools::s2dv_cube, coords = list(latitude = lat), - Dates = dates) + Dates = dates_expcor) res <- CST_PeriodSPEI(exp = exp, exp_cor = exp_cor) diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index 1168122..cf2d23c 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -10,7 +10,6 @@ PeriodSPEI( lat, exp_cor = NULL, dates_expcor = NULL, - pet = NULL, time_dim = "syear", leadtime_dim = "time", memb_dim = "ensemble", @@ -18,6 +17,8 @@ PeriodSPEI( accum = 1, ref_period = NULL, params = NULL, + pet_exp = NULL, + pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = "hargreaves", @@ -53,10 +54,6 @@ specified, the PeriodSPEI is calculated from object 'exp'.} \item{dates_expcor}{An array of temporal dimensions containing the Dates of 'exp_cor'. It must be of class 'Date' or 'POSIXct'.} -\item{pet}{A multidimensional array containing the Potential -EvapoTranspiration data. If it is NULL it is calculated using pet_method. It -is NULL by default.} - \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'syear'.} @@ -86,6 +83,16 @@ to be of same leadtime and time dimensions of exp and a dimension named 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime dimension must be of length 1.} +\item{pet_exp}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp'. It must have the same dimensions of the +variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variables with specified 'pet_method'. It is NULL by default.} + +\item{pet_expcor}{A multidimensional array containing the Potential +EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of +the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +provided variables with specified 'pet_method'. It is NULL by default.} + \item{standardization}{A logical value indicating wether the standardization is computed.} @@ -121,6 +128,21 @@ from data. It is FALSE by default.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } +\value{ +A list with elements: +\itemize{ + \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from + 'exp' array with the same dimensions as 'exp'. If it is FALSE, it + is an array with the accumulated values of PET minus 'prlr' data.} + \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not + NULL. It is an array with the SPEI data from 'exp_cor'. If + 'standarization' is FALSE, only the accumulated values of PET minus + 'prlr' is returned.} + \item{'params', returned if 'standarization' is TRUE, it contains the + parameters used for the standarization of 'exp' that are used for + computing the 'SPEI' for 'exp_cor'.} +} +} \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for @@ -133,10 +155,9 @@ a simple climatic water balance which is calculated at different time scales to obtain the SPEI. } \examples{ -# Test random data dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 25) -dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, +dimscor <- c(var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) exp_tasmax <- array(rnorm(900, 27.73, 5.26), dim = dims) exp_tasmin <- array(rnorm(900, 14.83, 3.86), dim = dims) @@ -146,15 +167,20 @@ expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) -dates <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates) <- c(sday = 1, sweek = 1, syear = 6, time = 3) - +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), + paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), + paste0(2020, "-09-15"), + paste0(2020, "-10-16")), 'UTC') +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) -res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates = dates) +res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) } diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index e009ad0..fb9b946 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -86,11 +86,6 @@ test_that("1. Initial checks PeriodSPEI", { PeriodSPEI(exp = exp1, exp_cor = list('tasmax' = array(10))), "Parameter 'exp_cor' needs to be a list of arrays with dimension names." ) - expect_error( - PeriodSPEI(exp = list('tas' = array(10, dim = c(time = 10))), - exp_cor = list('tos' = array(10, dim = c(time = 10)))), - "Parameter 'exp_cor' needs to have the same variable names as 'exp'." - ) # exp (2) expect_warning( PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), @@ -117,26 +112,119 @@ test_that("1. Initial checks PeriodSPEI", { expect_error( PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat) ) - + # standardization + expect_error( + PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat) + ) + # param_error + expect_error( + PeriodSPEI(exp = exp1, param_error = TRUE, dates_exp = dates_exp, lat = lat) + ) + # handle_infinity + expect_error( + PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat) + ) + # cross_validation + expect_error( + PeriodSPEI(exp = exp1, cross_validation = 1, dates_exp = dates_exp, lat = lat) + ) + # method + expect_error( + PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat) + ) + # distribution + expect_error( + PeriodSPEI(exp = exp1, distribution = 1, dates_exp = dates_exp, lat = lat) + ) + # fit + expect_error( + PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat) + ) + # ncores + expect_error( + PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat) + ) }) ############################################## test_that("2. Output checks", { - res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor) + res1 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor) + res2 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + standardization = FALSE) + res3 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp) + res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, standardization = FALSE) + # output dims + expect_equal( + names(res1), + c('exp', 'exp_cor', 'params') + ) expect_equal( - length(res), - 3 + names(res2), + c('exp', 'exp_cor') ) expect_equal( - dim(res[[1]]), + names(res3), + c('exp', 'params') + ) + expect_equal( + names(res4), + c('exp') + ) + expect_equal( + dim(res1[[1]]), dims ) expect_equal( - dim(res[[3]])[which(!names(dim(res[[3]])) %in% c('coef', 'syear'))], + dim(res1[[3]])[which(!names(dim(res1[[3]])) %in% c('coef', 'syear'))], dims[which(!names(dims) %in% c('syear', 'ensemble'))] ) + expect_equal( + dim(res2[[2]]), + dimscor + ) + expect_equal( + dim(res3[[2]]), + c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) + ) + # exp + # exp_cor + # pet + # time_dim + # leadtime_dim + # memb_dim + # lat_dim + # accum + # ref_period + # params + # standarization + # cross_validation + # pet_method - + # method - + # distribution - Only works for 'log-Logistic' + res5 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, distribution = 'PearsonIII') + res6 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, distribution = 'Gamma') + # fit - + res7 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'ub-pwm') + res8 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'max-lik') + res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'pp-pwm') + all.equal(res7, res8) + all.equal(res7, res9) # res9 doesn't work for this data + # param_error - + # handle_infinity - OK + res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, handle_infinity = FALSE) + # na.rm - + # ncores }) ############################################## \ No newline at end of file -- GitLab From f8c059f64c1f1e4c2192b38656a5fe8ba879ddfa Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 11:09:47 +0200 Subject: [PATCH 13/87] Return 's2dv_cube' spei CST_PeriodSPEI correctly --- NAMESPACE | 2 + R/PeriodSPEI.R | 34 +++++++++++++--- tests/testthat/test-PeriodSPEI.R | 70 ++++++++++++++++++++++++-------- 3 files changed, 83 insertions(+), 23 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 61b6b92..0723757 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,9 +27,11 @@ export(TotalSpellTimeExceedingThreshold) export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) +import(CSTools) import(ClimProjDiags) import(SPEI) import(TLMoments) +import(lmom) import(lmomco) import(lubridate) import(multiApply) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 39e6daf..213dd73 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -134,7 +134,9 @@ #'@import zoo #'@import TLMoments #'@import lmomco +#'@import lmom #'@import lubridate +#'@import CSTools #'@export CST_PeriodSPEI <- function(exp, exp_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', @@ -200,19 +202,38 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, handle_infinity = handle_infinity, na.rm = na.rm, ncores = ncores) if (is.null(exp_cor)) { - exp$data <- res[[1]] + source_files <- lapply(exp, function(x) {x$attrs$source_files}) + suppressWarnings( + res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, + varName = c('SPEI'), Dates = dates_exp, + when = Sys.time(), + source_files = source_files) + ) if (standardization) { - return(list(exp = exp, params = res[[2]])) + return(list(spei_exp = res_exp, params = res[[2]])) } else { return(exp) } } else { - exp$data <- res[[1]] - exp_cor$data <- res[[2]] + source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) + suppressWarnings( + res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, + varName = c('SPEI'), Dates = dates_exp, + source_files = source_files_exp, + when = Sys.time()) + ) + source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) + suppressWarnings( + res_expcor <- CSTools::s2dv_cube(data = res[[2]], coords = exp_cor[[1]]$coords, + varName = c('SPEI'), Dates = dates_expcor, + source_files = source_files_expcor, + when = Sys.time()) + ) + if (standardization) { - return(list(exp = exp, exp_cor = exp_cor, params = res[[3]])) + return(list(spei_exp = res_exp, spei_exp_cor = res_expcor, params = res[[3]])) } else { - return(list(exp = exp, exp_cor = exp_cor)) + return(list(spei_exp = res_exp, spei_exp_cor = res_expcor)) } } return(res) @@ -353,6 +374,7 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@import zoo #'@import TLMoments #'@import lmomco +#'@import lmom #'@import lubridate #'@export PeriodSPEI <- function(exp, dates_exp, lat, diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index fb9b946..90622ad 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -35,8 +35,32 @@ lat <- c(40,40.1) exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) -res <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor) +# dat2 +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) +dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) + +set.seed(1) +exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims) +set.seed(2) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) + +set.seed(1) +expcor_tas <- array(rnorm(100, 17.23, 9.19), dim = dimscor) +set.seed(2) +expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) + +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16"))) +dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) + +lat <- c(40,40.1) + +exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) +exp_cor2 <- list('tas' = expcor_tas, 'prlr' = expcor_prlr) ############################################## @@ -202,23 +226,35 @@ test_that("2. Output checks", { # ref_period # params # standarization - # cross_validation - # pet_method - - # method - + # cross_validation - Not working + # pet_method - ok + res5 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + pet_method = c('hargreaves', 'hargreaves_modified')) + res6 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + pet_method = c('hargreaves_modified', 'hargreaves')) + res7 <- PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + pet_method = c('thornthwaite', 'thornthwaite')) + # method - ok + res8 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + method = 'non-parametric') # distribution - Only works for 'log-Logistic' - res5 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, distribution = 'PearsonIII') - res6 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, distribution = 'Gamma') - # fit - - res7 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'ub-pwm') - res8 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'max-lik') res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'pp-pwm') - all.equal(res7, res8) - all.equal(res7, res9) # res9 doesn't work for this data + dates_exp = dates_exp, distribution = 'PearsonIII') # NA + res10 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, distribution = 'Gamma') # NA + # fit - + res12 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'ub-pwm') # ok + res13 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'max-lik') # ok + res14 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, fit = 'pp-pwm') # NA + all.equal(res12, res13) + all.equal(res12, res14) # res14 doesn't work for this data # param_error - # handle_infinity - OK res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, -- GitLab From 387c589075ab109c100dff2bb4cdd0a34c9647a2 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 11:17:07 +0200 Subject: [PATCH 14/87] Move auxiliary function to zzz file --- R/PeriodSPEI.R | 12 ------------ R/zzz.R | 10 ++++++++++ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 213dd73..44c8552 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -763,18 +763,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, return(spei_res) } - -.return2list <- function(data1, data2 = NULL) { - if (is.null(data1) & is.null(data2)) { - return(NULL) - } else if (is.null(data2)) { - return(list(data1)) - } else { - return(list(data1, data2)) - } -} - - .Evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', time_dim = 'syear', leadtime_dim = 'time', lat_dim = 'latitude', na.rm = FALSE, diff --git a/R/zzz.R b/R/zzz.R index 81bd0ce..47d871d 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -96,4 +96,14 @@ wind2CF <- function(wind, pc) { .KnownLatNames <- function() { known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') +} + +.return2list <- function(data1, data2 = NULL) { + if (is.null(data1) & is.null(data2)) { + return(NULL) + } else if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } } \ No newline at end of file -- GitLab From 382dadd2d269e16e0a521dfcc66b06cec7d92c62 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 11:46:21 +0200 Subject: [PATCH 15/87] Correct Dates output for 's2dv_cube' in CST_PeriodSPEI --- R/PeriodSPEI.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 44c8552..2cd02e8 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -205,9 +205,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, source_files <- lapply(exp, function(x) {x$attrs$source_files}) suppressWarnings( res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), Dates = dates_exp, - when = Sys.time(), - source_files = source_files) + varName = c('SPEI'), + Dates = exp[[1]]$attrs$Dates, + source_files = source_files, + when = Sys.time()) ) if (standardization) { return(list(spei_exp = res_exp, params = res[[2]])) @@ -218,14 +219,16 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) suppressWarnings( res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), Dates = dates_exp, + varName = c('SPEI'), + Dates = exp[[1]]$attrs$Dates, source_files = source_files_exp, when = Sys.time()) ) source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) suppressWarnings( res_expcor <- CSTools::s2dv_cube(data = res[[2]], coords = exp_cor[[1]]$coords, - varName = c('SPEI'), Dates = dates_expcor, + varName = c('SPEI'), + Dates = exp_cor[[1]]$attrs$Dates, source_files = source_files_expcor, when = Sys.time()) ) -- GitLab From 617e68fc0b6a785b508e409179ca8c8fc339ffeb Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 17:16:58 +0200 Subject: [PATCH 16/87] Improve documentation and restrict cross_validation TRUE when exp_cor is not provided --- R/PeriodSPEI.R | 48 +++++++++++++++++--------------- man/CST_PeriodSPEI.Rd | 13 +++++---- man/PeriodSPEI.Rd | 13 +++++---- tests/testthat/test-PeriodSPEI.R | 28 ++++++++++++++++++- 4 files changed, 66 insertions(+), 36 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 2cd02e8..f5a4b3b 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -39,11 +39,11 @@ #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same leadtime and time dimensions of 'exp' and a dimension named -#' 'coef' with the length of the coefficients needed for the used distribution -#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -#' dimension must be of length 1. +#' to be of same time dimension (specified in 'time_dim') of 'exp' and a +#' dimension named 'coef' with the length of the coefficients needed for the +#' used distribution (for 'Gamma' coef dimension is of lenght 2, for +#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +#' leadtime dimension (specified in 'leadtime_dim') of length 1. #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided @@ -55,7 +55,8 @@ #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is -#' done (TRUE), or not (FALSE). It is FALSE by default. +#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +#' is not provided. It is FALSE by default. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -211,9 +212,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, when = Sys.time()) ) if (standardization) { - return(list(spei_exp = res_exp, params = res[[2]])) + return(list(exp = res_exp, params = res[[2]])) } else { - return(exp) + return(res_exp) } } else { source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) @@ -234,9 +235,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, ) if (standardization) { - return(list(spei_exp = res_exp, spei_exp_cor = res_expcor, params = res[[3]])) + return(list(exp = res_exp, exp_cor = res_expcor, params = res[[3]])) } else { - return(list(spei_exp = res_exp, spei_exp_cor = res_expcor)) + return(list(exp = res_exp, exp_cor = res_expcor)) } } return(res) @@ -289,11 +290,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same leadtime and time dimensions of exp and a dimension named -#' 'coef' with the length of the coefficients needed for the used distribution -#' (for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -#' 'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -#' dimension must be of length 1. +#' to be of same time dimension (specified in 'time_dim') of 'exp' and a +#' dimension named 'coef' with the length of the coefficients needed for the +#' used distribution (for 'Gamma' coef dimension is of lenght 2, for +#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +#' leadtime dimension (specified in 'leadtime_dim') of length 1. #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided @@ -305,7 +306,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param standardization A logical value indicating wether the standardization #' is computed. #'@param cross_validation A logical value indicating if cross validation is -#' done (TRUE), or not (FALSE). It is FALSE by default. +#' done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +#' is not provided. It is FALSE by default. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have @@ -645,12 +647,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (!is.logical(cross_validation)) { stop("Parameter 'cross_validation' must be a logical value.") } - # if (cross_validation) { - # warning("Detected 'cross_validation' = TRUE. This functionality ", - # "is being developed, sorry for the inconvenience. It will ", - # "be set to FALSE.") - # cross_validation <- FALSE - # } + if (!is.null(exp_cor)) { + if (cross_validation & standardization) { + warning("Detected 'cross_validation' = TRUE. It will be set as FALSE ", + "since 'exp_cor' is provided.") + cross_validation <- FALSE + } + } ## method if (!(method %in% c('parametric', 'non-parametric'))) { @@ -741,7 +744,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, spei_res[[k]] <- spei_dat[[1]] } else { - pos <- match(names(dim(data[[1]])), names(dim(data_accum))) data_accum <- aperm(data_accum, pos) diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 70c9b21..2a7aaa6 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -66,11 +66,11 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same leadtime and time dimensions of 'exp' and a dimension named -'coef' with the length of the coefficients needed for the used distribution -(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -dimension must be of length 1.} +to be of same time dimension (specified in 'time_dim') of 'exp' and a +dimension named 'coef' with the length of the coefficients needed for the +used distribution (for 'Gamma' coef dimension is of lenght 2, for +'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +leadtime dimension (specified in 'leadtime_dim') of length 1.} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the @@ -86,7 +86,8 @@ provided variables with specified 'pet_method'. It is NULL by default.} is computed.} \item{cross_validation}{A logical value indicating if cross validation is -done (TRUE), or not (FALSE). It is FALSE by default.} +done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index cf2d23c..86aaa02 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -77,11 +77,11 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same leadtime and time dimensions of exp and a dimension named -'coef' with the length of the coefficients needed for the used distribution -(for 'Gamma' coef dimension is of lenght 2, for 'log-Logistic' or -'PearsonIII' is of length 3). If cross_validation is FALSE, the leadtime -dimension must be of length 1.} +to be of same time dimension (specified in 'time_dim') of 'exp' and a +dimension named 'coef' with the length of the coefficients needed for the +used distribution (for 'Gamma' coef dimension is of lenght 2, for +'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a +leadtime dimension (specified in 'leadtime_dim') of length 1.} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the @@ -97,7 +97,8 @@ provided variables with specified 'pet_method'. It is NULL by default.} is computed.} \item{cross_validation}{A logical value indicating if cross validation is -done (TRUE), or not (FALSE). It is FALSE by default.} +done (TRUE), or not (FALSE). It only will be used when 'exp_cor' and +is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 90622ad..85a1427 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -226,7 +226,33 @@ test_that("2. Output checks", { # ref_period # params # standarization - # cross_validation - Not working + # cross_validation + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, + dates_exp = dates_exp, dates_expcor = dates_expcor, + cross_validation = TRUE), + paste0("Detected 'cross_validation' = TRUE. It will be set as FALSE ", + "since 'exp_cor' is provided.") + ) + res_crossval_T <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + cross_validation = TRUE) + res_crossval_F <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + cross_validation = FALSE) + # cross_validation = TRUE + expect_equal( + dim(res_crossval_T$exp), + dims + ) + expect_equal( + dim(res_crossval_T$params), + dims + ) + # cross_validation = FALSE + expect_equal( + dim(res_crossval_F$params)[-which(names(dim(res_crossval_F$params)) == 'coef')], + dimscor[-which(names(dimscor) == 'ensemble')] + ) + # pet_method - ok res5 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, -- GitLab From 9b861f0a0b3175848508c654dbab1fc54aca69b7 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 14 Jun 2023 17:22:57 +0200 Subject: [PATCH 17/87] Remove typo --- R/PeriodSPEI.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index f5a4b3b..077137c 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -839,7 +839,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, } rm(data_tmp) } - any(is.na(data3)) if (!is.null(data3)) { data_tmp <- as.vector(data3) data3 <- array(0, dim = length(dates_monthly)) -- GitLab From e5eff31bca61b2fae01030e0ec709030425e6d2d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 16 Jun 2023 18:01:06 +0200 Subject: [PATCH 18/87] Develop new element time_bounds with start and end elements containing the start and end dates of the aggregation for PeriodMean and PeriodAggregation; updated documentation; added tests --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/PeriodAccumulation.R | 74 +++++++++++++++------- R/PeriodMean.R | 69 ++++++++++++++------- man/CST_PeriodAccumulation.Rd | 23 ++++--- man/CST_PeriodMean.Rd | 22 +++++-- man/PeriodAccumulation.Rd | 15 +++-- tests/testthat/test-PeriodAccumulation.R | 78 ++++++++++++++++++++++-- tests/testthat/test-PeriodMean.R | 63 +++++++++++++++++-- 9 files changed, 270 insertions(+), 77 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4151ba4..4e20983 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,4 +40,4 @@ URL: https://earth.bsc.es/gitlab/es/csindicators/ BugReports: https://earth.bsc.es/gitlab/es/csindicators/-/issues Encoding: UTF-8 RoxygenNote: 7.2.0 -Config/testthat/edition: 3 \ No newline at end of file +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index d80accb..0a16d4d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) import(multiApply) +importFrom(ClimProjDiags,Subset) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index d181d8e..2ef738f 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -31,7 +31,10 @@ #' computation. #' #'@return A 's2dv_cube' object containing the indicator in the element -#'\code{data}. +#'\code{data}. A new element called 'time_bounds' will be added into +#' the 'attrs' element in the 's2dv_cube' object. It consists of a list +#' containing two elements, the start and end dates of the aggregated period +#' with the same dimensions of 'Dates' element. #' #'@examples #'exp <- NULL @@ -39,14 +42,16 @@ #' ftime = 9, lat = 2, lon = 2)) #'class(exp) <- 's2dv_cube' #'TP <- CST_PeriodAccumulation(exp) -#'exp$data <- array(rnorm(5 * 3 * 214 * 2), -#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) -#'exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), -#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), -#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), -#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), -#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), -#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'exp$data <- array(rnorm(5 * 3 * 214 * 2), +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'exp$attrs$Dates <- Dates #'SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6)) #'dim(SprR$data) #'head(SprR$attrs$Dates) @@ -55,6 +60,7 @@ #'head(HarR$attrs$Dates) #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, time_dim = 'ftime', na.rm = FALSE, @@ -73,15 +79,29 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, } } - total <- PeriodAccumulation(data$data, dates = data$attrs$Dates, start, end, + Dates <- data$attrs$Dates + + total <- PeriodAccumulation(data$data, dates = Dates, start, end, time_dim = time_dim, na.rm = na.rm, ncores = ncores) data$data <- total - if (!is.null(start) && !is.null(end)) { - data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, - start = start, end = end, - time_dim = time_dim, - ncores = ncores) + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds } + return(data) } @@ -98,9 +118,10 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'} #' #'@param data A multidimensional array with named dimensions. -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of datesW with named dimensions matching +#' the dimensions on parameter 'data'. By default it is NULL, to select a +#' period this parameter must be provided and it needs to have temporal +#' dimensions. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -128,15 +149,17 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'TP <- PeriodAccumulation(exp, time_dim = 'ftime') #'data <- array(rnorm(5 * 3 * 214 * 2), #' c(memb = 5, sdate = 3, time = 214, lon = 2)) -#'# ftime tested #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), end = list(21, 6)) -#'HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), end = list(21, 10)) +#'dim(Dates) <- c(sdate = 3, time = 214) +#'SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), +#' end = list(21, 6)) +#'HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), +#' end = list(21, 10)) #' #'@import multiApply #'@export @@ -159,8 +182,13 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") } - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } total <- Apply(list(data), target_dims = time_dim, fun = sum, diff --git a/R/PeriodMean.R b/R/PeriodMean.R index 303b38e..e12cd09 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -31,19 +31,30 @@ #' computation. #' #'@return An 's2dv_cube' object containing the indicator in the element -#' \code{data}. +#' \code{data}. A new element called 'time_bounds' will be added into +#' the 'attrs' element in the 's2dv_cube' object. It consists of a list +#' containing two elements, the start and end dates of the aggregated period +#' with the same dimensions of 'Dates' element. #' #'@examples #'exp <- NULL -#'exp$data <- array(rnorm(45), dim = c(member = 7, ftime = 8)) +#'exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 6, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'exp$attrs$Dates <- Dates #'class(exp) <- 's2dv_cube' -#'exp$attrs$Dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), -#' as.Date("01-08-1993","%d-%m-%Y", tz = 'UTC'), "day"), -#' seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), -#' as.Date("01-08-1994","%d-%m-%Y", tz = 'UTC'), "day")) +#' #'SA <- CST_PeriodMean(exp) #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export CST_PeriodMean <- function(data, start = NULL, end = NULL, time_dim = 'ftime', na.rm = FALSE, @@ -62,22 +73,29 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, } } - total <- PeriodMean(data = data$data, dates = data$attrs$Dates, start, end, + Dates <- data$attrs$Dates + + total <- PeriodMean(data = data$data, dates = Dates, start, end, time_dim = time_dim, na.rm = na.rm, ncores = ncores) data$data <- total - original_dates <- data$attrs$Dates - if (!is.null(start) && !is.null(end)) { - data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, - start = start, end = end, - time_dim = time_dim, - ncores = ncores) + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds } - data$attrs$Dates <- ClimProjDiags::Subset(data$attrs$Dates, time_dim, 1, - drop = 'selected') - # Option (1) - # attr(data$attrs$Dates, 'original_dates') <- original_dates - # Option (2) - data$attrs$original_dates <- original_dates + return(data) } @@ -139,15 +157,20 @@ PeriodMean <- function(data, dates = NULL, start = NULL, end = NULL, } if (is.null(dates)) { warning("Parameter 'dates' is NULL and the Average of the ", - "full data provided in 'data' is computed.") + "full data provided in 'data' is computed.") } else { if (!is.null(start) && !is.null(end)) { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", - "day and the month of the period start and end.") + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") } - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) } } total <- Apply(list(data), target_dims = time_dim, fun = mean, diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 3928705..71122d6 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -41,7 +41,10 @@ computation.} } \value{ A 's2dv_cube' object containing the indicator in the element -\code{data}. +\code{data}. A new element called 'time_bounds' will be added into + the 'attrs' element in the 's2dv_cube' object. It consists of a list + containing two elements, the start and end dates of the aggregated period + with the same dimensions of 'Dates' element. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a @@ -60,14 +63,16 @@ exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, ftime = 9, lat = 2, lon = 2)) class(exp) <- 's2dv_cube' TP <- CST_PeriodAccumulation(exp) -exp$data <- array(rnorm(5 * 3 * 214 * 2), - c(memb = 5, sdate = 3, ftime = 214, lon = 2)) -exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), - as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), - seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), - as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), - seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), - as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +exp$data <- array(rnorm(5 * 3 * 214 * 2), + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) +exp$attrs$Dates <- Dates SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6)) dim(SprR$data) head(SprR$attrs$Dates) diff --git a/man/CST_PeriodMean.Rd b/man/CST_PeriodMean.Rd index b1004ad..f9db984 100644 --- a/man/CST_PeriodMean.Rd +++ b/man/CST_PeriodMean.Rd @@ -41,7 +41,10 @@ computation.} } \value{ An 's2dv_cube' object containing the indicator in the element - \code{data}. + \code{data}. A new element called 'time_bounds' will be added into + the 'attrs' element in the 's2dv_cube' object. It consists of a list + containing two elements, the start and end dates of the aggregated period + with the same dimensions of 'Dates' element. } \description{ Period Mean computes the average (mean) of a given variable in a period. @@ -56,12 +59,19 @@ this function: } \examples{ exp <- NULL -exp$data <- array(rnorm(45), dim = c(member = 7, ftime = 8)) +exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 6, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +exp$attrs$Dates <- Dates class(exp) <- 's2dv_cube' -exp$attrs$Dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), - as.Date("01-08-1993","\%d-\%m-\%Y", tz = 'UTC'), "day"), - seq(as.Date("01-07-1994", "\%d-\%m-\%Y", tz = 'UTC'), - as.Date("01-08-1994","\%d-\%m-\%Y", tz = 'UTC'), "day")) + SA <- CST_PeriodMean(exp) } diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 9903321..0f55937 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -17,9 +17,10 @@ PeriodAccumulation( \arguments{ \item{data}{A multidimensional array with named dimensions.} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of datesW with named dimensions matching +the dimensions on parameter 'data'. By default it is NULL, to select a +period this parameter must be provided and it needs to have temporal +dimensions.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial @@ -64,14 +65,16 @@ exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, TP <- PeriodAccumulation(exp, time_dim = 'ftime') data <- array(rnorm(5 * 3 * 214 * 2), c(memb = 5, sdate = 3, time = 214, lon = 2)) -# ftime tested Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), end = list(21, 6)) -HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), end = list(21, 10)) +dim(Dates) <- c(sdate = 3, time = 214) +SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), + end = list(21, 6)) +HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), + end = list(21, 10)) } diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 81718f5..74bb45d 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -1,5 +1,7 @@ +library(CSTools) + ############################################## -test_that("Sanity Checks", { +test_that("1. Sanity Checks", { expect_error( PeriodAccumulation('x'), "Parameter 'data' must be numeric." @@ -30,13 +32,21 @@ test_that("Sanity Checks", { PeriodAccumulation(data), array(c(9, 12, 27, 30, 45, 48, 63, 66), c(sdate = 2, lon = 4)) ) - + # Test dates warning + expect_warning( + PeriodAccumulation(array(1:10, c(time = 10)), + dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", + "are not NULL. All data will be used.") + ) }) + ############################################## -library(CSTools) -test_that("seasonal", { - exp <- CSTools::lonlat_prec +test_that("2. Seasonal", { + exp <- NULL exp$data <- array(1:(1 * 3 * 214 * 2), c(memb = 1, sdate = 3, ftime = 214, lon = 2)) exp$dims <- dim(exp$data) @@ -47,6 +57,7 @@ test_that("seasonal", { seq(as.Date("01-04-2002", format = "%d-%m-%Y"), as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) + class(exp) <- 's2dv_cube' output <- exp output$data <- array(c(sum(exp$data[1,1,21:82,1]), sum(exp$data[1,2,21:82,1]), sum(exp$data[1,3,21:82,1]), sum(exp$data[1,1,21:82,2]), @@ -58,3 +69,60 @@ test_that("seasonal", { output$data ) }) + +############################################## + +test_that("3. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodAccumulation(data = CSTools::lonlat_prec, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + res2 <- CST_PeriodAccumulation(data = CSTools::lonlat_prec, time_dim = 'ftime') + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + expect_equal( + dim(res2$data), + dim(exp$data)[-which(names(dim(exp$data)) == 'ftime')] + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + expect_equal( + dim(res2$data)['sdate'], + dim(res2$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + expect_equal( + res2$attrs$Dates, + res2$attrs$time_bounds$start + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) \ No newline at end of file diff --git a/tests/testthat/test-PeriodMean.R b/tests/testthat/test-PeriodMean.R index 4186639..18b5995 100644 --- a/tests/testthat/test-PeriodMean.R +++ b/tests/testthat/test-PeriodMean.R @@ -1,5 +1,7 @@ +library(CSTools) + ############################################## -test_that("Sanity Checks", { +test_that("1. Sanity Checks", { expect_error( PeriodMean('x'), "Parameter 'data' must be numeric." @@ -31,13 +33,21 @@ test_that("Sanity Checks", { c(sdate = 2, lon = 4)) ) ) + # Test dates warning + expect_warning( + PeriodMean(array(1:10, c(time = 10)), + dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", + "are not NULL. All data will be used.") + ) }) ############################################## -library(CSTools) -test_that("seasonal", { - exp <- CSTools::lonlat_prec +test_that("2. Seasonal", { + exp <- NULL exp$data <- array(1:(1 * 3 * 214 * 2), c(memb = 1, sdate = 3, ftime = 214, lon = 2)) exp$dims <- dim(exp$data) @@ -48,6 +58,7 @@ test_that("seasonal", { seq(as.Date("01-04-2002", format = "%d-%m-%Y"), as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) + class(exp) <- 's2dv_cube' output <- exp output$data <- array(c(mean(exp$data[1,1,21:82,1]), mean(exp$data[1,2,21:82,1]), mean(exp$data[1,3,21:82,1]), mean(exp$data[1,1,21:82,2]), @@ -58,3 +69,47 @@ test_that("seasonal", { output$data ) }) + +############################################## +test_that("3. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodMean(data = CSTools::lonlat_prec, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) + -- GitLab From 50f9c6ef7e7c09729a18ddf6718f4dbba03c1bd5 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 19 Jun 2023 17:25:03 +0200 Subject: [PATCH 19/87] Add time_bounds in functions that reduce time dimensions ; Add element; Add checks if dates have dimensions; add tests for this development --- R/AccumulationExceedingThreshold.R | 121 ++++++++++++------ R/PeriodAccumulation.R | 30 +++-- R/PeriodMean.R | 60 +++++---- R/QThreshold.R | 44 +++++-- R/Threshold.R | 28 ++-- R/TotalSpellTimeExceedingThreshold.R | 77 +++++++++-- R/TotalTimeExceedingThreshold.R | 78 ++++++++--- R/WindCapacityFactor.R | 48 +++++-- R/WindPowerDensity.R | 49 +++++-- man/AccumulationExceedingThreshold.Rd | 12 +- man/CST_AccumulationExceedingThreshold.Rd | 21 ++- man/CST_PeriodMean.Rd | 20 +-- man/CST_QThreshold.Rd | 10 +- man/CST_Threshold.Rd | 5 +- man/CST_TotalSpellTimeExceedingThreshold.Rd | 12 +- man/CST_TotalTimeExceedingThreshold.Rd | 11 +- man/CST_WindCapacityFactor.Rd | 13 +- man/CST_WindPowerDensity.Rd | 17 ++- man/PeriodAccumulation.Rd | 7 +- man/PeriodMean.Rd | 6 +- man/QThreshold.Rd | 22 +++- man/Threshold.Rd | 10 +- man/TotalSpellTimeExceedingThreshold.Rd | 22 +++- man/TotalTimeExceedingThreshold.Rd | 23 +++- man/WindCapacityFactor.Rd | 23 +++- man/WindPowerDensity.Rd | 21 ++- .../test-AccumulationExceedingThreshold.R | 58 ++++++++- tests/testthat/test-QThreshold.R | 36 +++++- tests/testthat/test-Threshold.R | 34 ++++- .../test-TotalSpellTimeExceedingThreshold.R | 46 ++++++- .../test-TotalTimeExceedingThreshold.R | 50 +++++++- tests/testthat/test-WindCapacityFactor.R | 28 +++- tests/testthat/test-WindPowerDensity.R | 24 ++++ 33 files changed, 811 insertions(+), 255 deletions(-) diff --git a/R/AccumulationExceedingThreshold.R b/R/AccumulationExceedingThreshold.R index e346b53..d3c7147 100644 --- a/R/AccumulationExceedingThreshold.R +++ b/R/AccumulationExceedingThreshold.R @@ -49,15 +49,29 @@ #' #'@return An 's2dv_cube' object containing the aggregated values in the element #'\code{data} with dimensions of the input parameter 'data' except the dimension -#'where the indicator has been computed. +#'where the indicator has been computed. A new element called 'time_bounds' will +#'be added into the 'attrs' element in the 's2dv_cube' object. It consists of a +#'list containing two elements, the start and end dates of the aggregated period +#'with the same dimensions of 'Dates' element. #'@examples #'exp <- NULL -#'exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, -#' ftime = 9, lat = 2, lon = 2)) +#'exp$data <- array(abs(rnorm(5 * 3 * 214 * 2)*100), +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'class(exp) <- 's2dv_cube' -#'DOT <- CST_AccumulationExceedingThreshold(exp, threshold = 280) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'exp$attrs$Dates <- Dates +#'AT <- CST_AccumulationExceedingThreshold(data = exp, threshold = 100, +#' start = list(21, 4), +#' end = list(21, 6)) #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export CST_AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FALSE, start = NULL, end = NULL, time_dim = 'ftime', @@ -78,7 +92,7 @@ CST_AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = if (length(op) == 1) { if (inherits(threshold, 's2dv_cube')) { - threshold <- threshold$data + threshold <- threshold$data } } else if (length(op) == 2) { if (inherits(threshold[[1]], 's2dv_cube')) { @@ -89,17 +103,38 @@ CST_AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = } } - total <- AccumulationExceedingThreshold(data$data, dates = data$attrs$Dates, + Dates <- data$attrs$Dates + total <- AccumulationExceedingThreshold(data = data$data, dates = Dates, threshold = threshold, op = op, diff = diff, start = start, end = end, time_dim = time_dim, na.rm = na.rm, ncores = ncores) data$data <- total - if (!is.null(start) && !is.null(end)) { - data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, - start = start, end = end, - time_dim = time_dim, - ncores = ncores) + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } } + return(data) } #'Accumulation of a variable when Exceeding (not exceeding) a Threshold @@ -133,9 +168,9 @@ CST_AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = #'@param diff A logical value indicating whether to accumulate the difference #' between data and threshold (TRUE) or not (FALSE by default). It can only be #' TRUE if a unique threshold is used. -#'@param dates A vector of dates or a multidimensional array with of dates with -#' named dimensions matching the dimensions on parameter 'data'. By default it -#' is NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to define the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is @@ -161,12 +196,6 @@ CST_AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = #'# Assuming data is already (tasmax + tasmin)/2 - 10 #'data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), #' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) -#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), -#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), -#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), -#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), -#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), -#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) #'GDD <- AccumulationExceedingThreshold(data, threshold = 0, start = list(1, 4), #' end = list(31, 10)) #'@import multiApply @@ -228,8 +257,8 @@ AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FAL } if (length(op) == 2) { if (length(op) != length(threshold)) { - stop(paste0("If 'op' is a pair of logical operators parameter 'threshold' ", - "also has to be a pair of values.")) + stop("If 'op' is a pair of logical operators parameter 'threshold' ", + "also has to be a pair of values.") } if (!is.numeric(threshold[[1]]) | !is.numeric(threshold[[2]])) { stop("Parameter 'threshold' must be numeric.") @@ -240,7 +269,8 @@ AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FAL if (!is.array(threshold[[1]]) && length(threshold[[1]]) > 1) { if (dim(data)[time_dim] != length(threshold[[1]])) { - stop("If parameter 'threshold' is a vector it must have the same length as data any time dimension.") + stop("If parameter 'threshold' is a vector it must have the same ", + "length as data any time dimension.") } else { dim(threshold[[1]]) <- length(threshold[[1]]) dim(threshold[[2]]) <- length(threshold[[2]]) @@ -265,8 +295,8 @@ AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FAL if (any(names(dim(threshold[[1]])) %in% names(dim(data)))) { common_dims <- dim(threshold[[1]])[names(dim(threshold[[1]])) %in% names(dim(data))] if (!all(common_dims == dim(data)[names(common_dims)])) { - stop(paste0("Parameter 'data' and 'threshold' must have same length of ", - "all common dimensions.")) + stop("Parameter 'data' and 'threshold' must have same length of ", + "all common dimensions.") } } } else if (length(threshold[[1]]) == 1) { @@ -276,7 +306,8 @@ AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FAL } else { if (!is.array(threshold) && length(threshold) > 1) { if (dim(data)[time_dim] != length(threshold)) { - stop("If parameter 'threshold' is a vector it must have the same length as data time dimension.") + stop("If parameter 'threshold' is a vector it must have the same ", + "length as data time dimension.") } else { dim(threshold) <- length(threshold) names(dim(threshold)) <- time_dim @@ -288,8 +319,8 @@ AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FAL if (any(names(dim(threshold)) %in% names(dim(data)))) { common_dims <- dim(threshold)[names(dim(threshold)) %in% names(dim(data))] if (!all(common_dims == dim(data)[names(common_dims)])) { - stop(paste0("Parameter 'data' and 'threshold' must have same length of ", - "all common dimensions.")) + stop("Parameter 'data' and 'threshold' must have same length of ", + "all common dimensions.") } } } else if (length(threshold) == 1) { @@ -313,27 +344,41 @@ AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FAL if (length(op) == 1) { if (time_dim %in% names(dim(threshold))) { if (dim(threshold)[time_dim] == dim(data)[time_dim]) { - threshold <- SelectPeriodOnData(threshold, dates, start, end, - time_dim = time_dim, ncores = ncores) + threshold <- SelectPeriodOnData(data = threshold, dates = dates, + start = start, end = end, + time_dim = time_dim, + ncores = ncores) } } } else if (length(op) == 2) { if (time_dim %in% names(dim(threshold[[1]]))) { if (dim(threshold[[1]])[time_dim] == dim(data)[time_dim]) { - threshold[[1]] <- SelectPeriodOnData(threshold[[1]], dates, start, end, - time_dim = time_dim, ncores = ncores) - threshold[[2]] <- SelectPeriodOnData(threshold[[2]], dates, start, end, - time_dim = time_dim, ncores = ncores) + threshold[[1]] <- SelectPeriodOnData(data = threshold[[1]], + dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + threshold[[2]] <- SelectPeriodOnData(data = threshold[[2]], dates = dates, + start = start, end = end, + time_dim = time_dim, + ncores = ncores) } } } - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } + } } # diff if (length(op) == 2 & diff == TRUE) { - stop("Parameter 'diff' can't be TRUE if the parameter 'threshold' is a range of values.") + stop("Parameter 'diff' can't be TRUE if the parameter 'threshold' is a ", + "range of values.") } else if (diff == TRUE) { if (length(threshold) != 1) { stop("Parameter 'diff' can't be TRUE if the parameter 'threshold' is not a scalar.") @@ -345,8 +390,6 @@ AccumulationExceedingThreshold <- function(data, threshold, op = '>', diff = FAL threshold <- 0 } - ### - if (length(op) > 1) { thres1 <- threshold[[1]] thres2 <- threshold[[2]] diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 2ef738f..c8e976e 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -80,7 +80,6 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, } Dates <- data$attrs$Dates - total <- PeriodAccumulation(data$data, dates = Dates, start, end, time_dim = time_dim, na.rm = na.rm, ncores = ncores) data$data <- total @@ -91,17 +90,21 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, time_dim = time_dim, ncores = ncores) } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') - # Create time_bounds - time_bounds <- NULL - time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') - time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') - - # Add Dates in attrs - data$attrs$Dates <- time_bounds$start - data$attrs$time_bounds <- time_bounds + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } } - return(data) } @@ -118,10 +121,9 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'} #' #'@param data A multidimensional array with named dimensions. -#'@param dates A multidimensional array of datesW with named dimensions matching -#' the dimensions on parameter 'data'. By default it is NULL, to select a -#' period this parameter must be provided and it needs to have temporal -#' dimensions. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set diff --git a/R/PeriodMean.R b/R/PeriodMean.R index e12cd09..6acea60 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -38,20 +38,20 @@ #' #'@examples #'exp <- NULL -#'exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 6, ftime = 3)) -#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), -#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), -#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), -#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), -#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), -#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), -#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), -#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) #'dim(Dates) <- c(sdate = 4, ftime = 3) #'exp$attrs$Dates <- Dates #'class(exp) <- 's2dv_cube' #' -#'SA <- CST_PeriodMean(exp) +#'SA <- CST_PeriodMean(exp, start = list(01, 12), end = list(01, 01)) #' #'@import multiApply #'@importFrom ClimProjDiags Subset @@ -74,9 +74,9 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, } Dates <- data$attrs$Dates - - total <- PeriodMean(data = data$data, dates = Dates, start, end, + total <- PeriodMean(data = data$data, dates = Dates, start = start, end = end, time_dim = time_dim, na.rm = na.rm, ncores = ncores) + data$data <- total data$dims <- dim(total) @@ -85,17 +85,24 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, time_dim = time_dim, ncores = ncores) } - - # Create time_bounds - time_bounds <- NULL - time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') - time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') - # Add Dates in attrs - data$attrs$Dates <- time_bounds$start - data$attrs$time_bounds <- time_bounds + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } } - return(data) } @@ -112,9 +119,9 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, #'} #' #'@param data A multidimensional array with named dimensions. -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -165,8 +172,9 @@ PeriodMean <- function(data, dates = NULL, start = NULL, end = NULL, "day and the month of the period start and end.") } if (!is.null(dim(dates))) { - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) } else { warning("Parameter 'dates' must have named dimensions if 'start' and ", "'end' are not NULL. All data will be used.") diff --git a/R/QThreshold.R b/R/QThreshold.R index 49217dd..0d069bb 100644 --- a/R/QThreshold.R +++ b/R/QThreshold.R @@ -57,15 +57,19 @@ #'exp$data <- array(abs(rnorm(112)*26), dim = c(member = 7, sdate = 8, ftime = 2)) #'class(exp) <- 's2dv_cube' #'exp_probs <- CST_QThreshold(exp, threshold) -#'exp$data <- array(rnorm(5 * 3 * 214 * 2), -#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#' +#'exp$data <- array(abs(rnorm(5 * 3 * 214 * 2)*50), +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) #'exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'exp_probs <- CST_QThreshold(exp, threshold) +#'dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) +#'class(exp) <- 's2dv_cube' +#'exp_probs <- CST_QThreshold(exp, threshold, start = list(21, 4), +#' end = list(21, 6)) #' #'@import multiApply #'@export @@ -92,6 +96,7 @@ CST_QThreshold <- function(data, threshold, start = NULL, end = NULL, start, end, time_dim = time_dim, memb_dim = memb_dim, sdate_dim = sdate_dim, ncores = ncores) data$data <- probs + data$dims <- dim(probs) if (!is.null(start) && !is.null(end)) { data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, start = start, end = end, @@ -127,9 +132,9 @@ CST_QThreshold <- function(data, threshold, start = NULL, end = NULL, #'@param threshold A multidimensional array with named dimensions in the same #' units as parameter 'data' and with the common dimensions of the element #' 'data' of the same length. -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -157,14 +162,24 @@ CST_QThreshold <- function(data, threshold, start = NULL, end = NULL, #'@examples #'threshold = 25 #'data <- array(rnorm(5 * 3 * 20 * 2, mean = 26), -#' c(member = 5, sdate = 3, time = 20, lon = 2)) -#'thres_q <- QThreshold(data, threshold) +#' c(member = 5, sdate = 3, time = 214, lon = 2)) +#' +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, time = 214) +#' +#'thres_q <- QThreshold(data, threshold, dates = Dates, time_dim = 'time', +#' start = list(21, 4), end = list(21, 6)) #' #'@import multiApply #'@export QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', memb_dim = 'member', sdate_dim = 'sdate', - ncores = NULL) { + time_dim = 'ftime', memb_dim = 'member', + sdate_dim = 'sdate', ncores = NULL) { # Initial checks ## data if (is.null(data)) { @@ -220,8 +235,13 @@ QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, } } } - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } if (length(threshold) == 1) { diff --git a/R/Threshold.R b/R/Threshold.R index 3122c12..d76cbfa 100644 --- a/R/Threshold.R +++ b/R/Threshold.R @@ -8,7 +8,7 @@ #'@param data An 's2dv_cube' object as provided function \code{CST_Load} in #' package CSTools. #'@param threshold A single scalar or vector indicating the relative -#' threshold(s). +#' threshold(s). It must contain values between 0 and 1. #'@param start An optional parameter to defined the initial date of the period #' to selectfrom the data by providing a list of two elements: the initial date #' of the period and the initial month of the period. By default it is set to @@ -40,13 +40,14 @@ #'threshold <- 0.9 #'exp <- NULL #'exp$data <- array(rnorm(5 * 3 * 214 * 2), -#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) #'exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) #'class(exp) <- 's2dv_cube' #'exp_probs <- CST_Threshold(exp, threshold, start = list(21, 4), end = list(21, 6)) #' @@ -74,6 +75,7 @@ CST_Threshold <- function(data, threshold, start = NULL, end = NULL, time_dim = time_dim, memb_dim = memb_dim, sdate_dim = sdate_dim, na.rm = na.rm, ncores = ncores) data$data <- thres + data$dims <- dim(thres) if (!is.null(start) && !is.null(end)) { data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, start = start, end = end, @@ -91,10 +93,10 @@ CST_Threshold <- function(data, threshold, start = NULL, end = NULL, #' #'@param data A multidimensional array with named dimensions. #'@param threshold A single scalar or vector indicating the relative -#' threshold(s). -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#' threshold(s). It must contain values between 0 and 1. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -134,7 +136,7 @@ CST_Threshold <- function(data, threshold, start = NULL, end = NULL, #'@importFrom stats quantile #'@export Threshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', memb_dim = 'member', sdate_dim = 'sdate', + time_dim = 'ftime', memb_dim = 'member', sdate_dim = 'sdate', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") @@ -147,7 +149,7 @@ Threshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, names(dim(data)) <- c(memb_dim, sdate_dim) } if (is.null(threshold)) { - stop("Parameter 'threshold' cannot be NULL.") + stop("Parameter 'threshold' cannot be NULL.") } if (!is.numeric(threshold)) { stop("Parameter 'threshold' must be numeric.") @@ -161,8 +163,14 @@ Threshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") } - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } if (!is.null(memb_dim)) { diff --git a/R/TotalSpellTimeExceedingThreshold.R b/R/TotalSpellTimeExceedingThreshold.R index 3ee22a2..5d649b4 100644 --- a/R/TotalSpellTimeExceedingThreshold.R +++ b/R/TotalSpellTimeExceedingThreshold.R @@ -48,7 +48,12 @@ #' computation. #' #'@return An 's2dv_cube' object containing the number of days that are part of a -#'spell within a threshold in element \code{data}. +#'spell within a threshold in element \code{data} with dimensions of the input +#'parameter 'data' except the dimension where the indicator has been computed. +#'A new element called 'time_bounds' will be added into the 'attrs' element in +#'the 's2dv_cube' object. It consists of a list containing two elements, the +#'start and end dates of the aggregated period with the same dimensions of +#''Dates' element. #' #'@examples #'exp <- NULL @@ -60,10 +65,14 @@ #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) #'class(exp) <- 's2dv_cube' -#'TTSET <- CST_TotalSpellTimeExceedingThreshold(exp, threshold = 23, spell = 3) +#'TTSET <- CST_TotalSpellTimeExceedingThreshold(exp, threshold = 23, spell = 3, +#' start = list(21, 4), +#' end = list(21, 6)) #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export CST_TotalSpellTimeExceedingThreshold <- function(data, threshold, spell, op = '>', start = NULL, end = NULL, @@ -95,19 +104,41 @@ CST_TotalSpellTimeExceedingThreshold <- function(data, threshold, spell, op = '> threshold[[2]] <- threshold[[2]]$data } } + + Dates <- data$attrs$Dates - total <- TotalSpellTimeExceedingThreshold(data$data, data$attrs$Dates, + total <- TotalSpellTimeExceedingThreshold(data$data, Dates, threshold = threshold, spell = spell, op = op, start = start, end = end, time_dim = time_dim, ncores = ncores) data$data <- total - if (!is.null(start) && !is.null(end)) { - data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, - start = start, end = end, - time_dim = time_dim, - ncores = ncores) + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } } + return(data) } #'Total Spell Time Exceeding Threshold @@ -143,9 +174,9 @@ CST_TotalSpellTimeExceedingThreshold <- function(data, threshold, spell, op = '> #' are used it has to be a vector of a pair of two logical operators: #' c('<', '>'), c('<', '>='), c('<=', '>'), c('<=', '>='), c('>', '<'), #' c('>', '<='), c('>=', '<'),c('>=', '<=')). -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -171,9 +202,19 @@ CST_TotalSpellTimeExceedingThreshold <- function(data, threshold, spell, op = '> #'values by values exceeding the threshold. #'@examples -#'data <- array(rnorm(120), c(member = 1, sdate = 2, ftime = 20, lat = 4)) -#'threshold <- array(rnorm(4), c(lat = 4)) -#'total <- TotalSpellTimeExceedingThreshold(data, threshold, spell = 6) +#'data <- array(1:100, c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#' +#'threshold <- array(1:4, c(lat = 4)) +#'total <- TotalSpellTimeExceedingThreshold(data, threshold, dates = Dates, +#' spell = 6, start = list(21, 4), +#' end = list(21, 6)) #' #'@import multiApply #'@export @@ -334,6 +375,14 @@ TotalSpellTimeExceedingThreshold <- function(data, threshold, spell, op = '>', } } } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } data <- SelectPeriodOnData(data, dates, start, end, time_dim = time_dim, ncores = ncores) } diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index ceda1ee..49fa988 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -54,11 +54,14 @@ #' #'@return An 's2dv_cube' object containing in element \code{data} the total #'number of the corresponding units of the data frequency that a variable is -#'exceeding a threshold during a period. +#'exceeding a threshold during a period. A new element called 'time_bounds' will +#'be added into the 'attrs' element in the 's2dv_cube' object. It consists of a +#'list containing two elements, the start and end dates of the aggregated period +#'with the same dimensions of 'Dates' element. #' #'@examples #'exp <- NULL -#'exp$data <- array(abs(rnorm(5 * 3 * 214 * 2)*280), +#'exp$data <- array(rnorm(5 * 3 * 214 * 2)*23, #' c(member = 5, sdate = 3, ftime = 214, lon = 2)) #'exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), @@ -66,10 +69,13 @@ #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) #'class(exp) <- 's2dv_cube' -#'DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280) +#'DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 23, start = list(21, 4), +#' end = list(21, 6)) #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', start = NULL, end = NULL, @@ -101,17 +107,38 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', threshold[[2]] <- threshold[[2]]$data } } - total <- TotalTimeExceedingThreshold(data$data, dates = data$attrs$Dates, + + Dates <- data$attrs$Dates + total <- TotalTimeExceedingThreshold(data = data$data, dates = Dates, threshold = threshold, op = op, start = start, end = end, time_dim = time_dim, na.rm = na.rm, ncores = ncores) data$data <- total - if (!is.null(start) && !is.null(end)) { - data$attrs$Dates <- SelectPeriodOnDates(dates = data$attrs$Dates, - start = start, end = end, - time_dim = time_dim, - ncores = ncores) + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } } return(data) } @@ -151,9 +178,9 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #' are used it has to be a vector of a pair of two logical operators: #' c('<', '>'), c('<', '>='), c('<=', '>'), c('<=', '>='), c('>', '<'), #' c('>', '<='), c('>=', '<'),c('>=', '<=')). -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to define the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -173,12 +200,21 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #' #'@return A multidimensional array with named dimensions containing the total #'number of the corresponding units of the data frequency that a variable is -#'exceeding a threshold during a period. +#'exceeding a threshold during a period with dimensions of the input parameter +#''data' except the dimension where the indicator has been computed. #' #'@examples -#'exp <- array(abs(rnorm(5 * 3 * 214 * 2)*280), -#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) -#'DOT <- TotalTimeExceedingThreshold(exp, threshold = 300, time_dim = 'ftime') +#'data <- array(rnorm(5 * 3 * 214 * 2)*23, +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'DOT <- TotalTimeExceedingThreshold(data, threshold = 23, dates = Dates, +#' start = list(21, 4), end = list(21, 6)) #' #'@import multiApply #'@export @@ -336,8 +372,14 @@ TotalTimeExceedingThreshold <- function(data, threshold, op = '>', } } } - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } diff --git a/R/WindCapacityFactor.R b/R/WindCapacityFactor.R index 8ed2084..906ade3 100644 --- a/R/WindCapacityFactor.R +++ b/R/WindCapacityFactor.R @@ -42,14 +42,23 @@ #'@examples #'wind <- NULL #'wind$data <- array(rweibull(n = 100, shape = 2, scale = 6), -#' c(member = 10, lat = 2, lon = 5)) +#' c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) #'wind$coords <- list(lat = c(40, 41), lon = 1:5) #'variable <- list(varName = 'sfcWind', #' metadata = list(sfcWind = list(level = 'Surface'))) #'wind$attrs <- list(Variable = variable, Datasets = 'synthetic', #' when = Sys.time(), Dates = '1990-01-01 00:00:00') +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'wind$attrs$Dates <- Dates #'class(wind) <- 's2dv_cube' -#'WCF <- CST_WindCapacityFactor(wind, IEC_class = "III") +#'WCF <- CST_WindCapacityFactor(wind, IEC_class = "III", +#' start = list(21, 4), end = list(21, 6)) #' #'@export CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/III", "III"), @@ -73,6 +82,8 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II dates = wind$attrs$Dates, start = start, end = end, ncores = ncores) wind$data <- WindCapacity + wind$dims <- dim(WindCapacity) + if ('Variable' %in% names(wind$attrs)) { if ('varName' %in% names(wind$attrs$Variable)) { wind$attrs$Variable$varName <- 'WindCapacityFactor' @@ -111,9 +122,9 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II #' respectively. Classes \code{'I/II'} and \code{'II/III'} indicate #' intermediate turbines that fit both classes. More details of the five #' turbines and a plot of its power curves can be found in Lledó et al. (2019). -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -134,15 +145,26 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II #' Capacity Factor (unitless). #' #'@examples -#'wind <- rweibull(n = 100, shape = 2, scale = 6) -#'WCF <- WindCapacityFactor(wind, IEC_class = "III") +#'wind <- array(rweibull(n = 32100, shape = 2, scale = 6), +#' c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) +#' +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#' +#'WCF <- WindCapacityFactor(wind, IEC_class = "III", dates = Dates, +#' start = list(21, 4), end = list(21, 6)) #' #'@importFrom stats approxfun #'@importFrom utils read.delim #'@export WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/III", "III"), dates = NULL, start = NULL, end = NULL, - time_dim = 'time', ncores = NULL) { + time_dim = 'ftime', ncores = NULL) { IEC_class <- match.arg(IEC_class) pc_files <- c( "I" = "Enercon_E70_2.3MW.txt", @@ -159,8 +181,14 @@ WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/III", stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") } - wind <- SelectPeriodOnData(wind, dates, start, end, - time_dim = time_dim, ncores = ncores) + if (!is.null(dim(dates))) { + wind <- SelectPeriodOnData(data = wind, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'wind' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } diff --git a/R/WindPowerDensity.R b/R/WindPowerDensity.R index 3578209..e9c70f6 100644 --- a/R/WindPowerDensity.R +++ b/R/WindPowerDensity.R @@ -6,8 +6,8 @@ #'@description It is computed as 0.5*ro*wspd^3. As this function is non-linear, #'it will give inaccurate results if used with period means. #' -#'@param wind An s2dv_cube object with instantaneous wind speeds expressed in m/s -#' obtained from CST_Load or s2dv_cube functions from CSTools pacakge. +#'@param wind An 's2dv_cube' object with instantaneous wind speeds expressed in +#' m/s obtained from CST_Load or s2dv_cube functions from CSTools pacakge. #'@param ro A scalar, or alternatively a multidimensional array with the same #' dimensions as wind, with the air density expressed in kg/m^3. By default it #' takes the value 1.225, the standard density of air at 15ºC and 1013.25 hPa. @@ -32,14 +32,23 @@ #'@examples #'wind <- NULL #'wind$data <- array(rweibull(n = 100, shape = 2, scale = 6), -#' c(member = 10, lat = 2, lon = 5)) +#' c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) #'wind$coords <- list(lat = c(40, 41), lon = 1:5) #'variable <- list(varName = 'sfcWind', #' metadata = list(sfcWind = list(level = 'Surface'))) #'wind$attrs <- list(Variable = variable, Datasets = 'synthetic', #' when = Sys.time(), Dates = '1990-01-01 00:00:00') +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'wind$attrs$Dates <- Dates #'class(wind) <- 's2dv_cube' -#'WCF <- CST_WindPowerDensity(wind) +#'WPD <- CST_WindPowerDensity(wind, start = list(21, 4), +#' end = list(21, 6)) #' #'@export CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, @@ -61,6 +70,7 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, dates = wind$attrs$Dates, start = start, end = end, ncores = ncores) wind$data <- WindPower + wind$dims <- dim(WindPower) if ('Variable' %in% names(wind$attrs)) { if ('varName' %in% names(wind$attrs$Variable)) { wind$attrs$Variable$varName <- 'WindPowerDensity' @@ -88,9 +98,9 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, #'@param ro A scalar, or alternatively a multidimensional array with the same #' dimensions as wind, with the air density expressed in kg/m^3. By default it #' takes the value 1.225, the standard density of air at 15ºC and 1013.25 hPa. -#'@param dates A vector of dates or a multidimensional array of dates with named -#' dimensions matching the dimensions on parameter 'data'. By default it is -#' NULL, to select a period this parameter must be provided. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial #' date of the period and the initial month of the period. By default it is set @@ -111,20 +121,35 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, #'Density expressed in W/m^2. #' #'@examples -#'wind <- rweibull(n = 100, shape = 2, scale = 6) -#'WPD <- WindPowerDensity(wind) +#'wind <- array(rweibull(n = 32100, shape = 2, scale = 6), +#' c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'WPD <- WindPowerDensity(wind, dates = Dates, start = list(21, 4), +#' end = list(21, 6)) #' #'@export WindPowerDensity <- function(wind, ro = 1.225, dates = NULL, start = NULL, - end = NULL, time_dim = 'time', ncores = NULL) { + end = NULL, time_dim = 'ftime', ncores = NULL) { if (!is.null(dates)) { if (!is.null(start) && !is.null(end)) { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") } - wind <- SelectPeriodOnData(wind, dates, start, end, - time_dim = time_dim, ncores = ncores) + if (!is.null(dim(dates))) { + wind <- SelectPeriodOnData(data = wind, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'wind' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } return(0.5 * ro * wind^3) diff --git a/man/AccumulationExceedingThreshold.Rd b/man/AccumulationExceedingThreshold.Rd index 172592c..0f9a64a 100644 --- a/man/AccumulationExceedingThreshold.Rd +++ b/man/AccumulationExceedingThreshold.Rd @@ -39,9 +39,9 @@ c('>', '<='), c('>=', '<'),c('>=', '<=')).} between data and threshold (TRUE) or not (FALSE by default). It can only be TRUE if a unique threshold is used.} -\item{dates}{A vector of dates or a multidimensional array with of dates with -named dimensions matching the dimensions on parameter 'data'. By default it -is NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to define the initial date of the period to select from the data by providing a list of two elements: the initial @@ -86,12 +86,6 @@ function: # Assuming data is already (tasmax + tasmin)/2 - 10 data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), c(memb = 5, sdate = 3, ftime = 214, lon = 2)) -Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), - as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), - seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), - as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), - seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), - as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) GDD <- AccumulationExceedingThreshold(data, threshold = 0, start = list(1, 4), end = list(31, 10)) } diff --git a/man/CST_AccumulationExceedingThreshold.Rd b/man/CST_AccumulationExceedingThreshold.Rd index bc0eb83..f14c33b 100644 --- a/man/CST_AccumulationExceedingThreshold.Rd +++ b/man/CST_AccumulationExceedingThreshold.Rd @@ -63,7 +63,10 @@ computation.} \value{ An 's2dv_cube' object containing the aggregated values in the element \code{data} with dimensions of the input parameter 'data' except the dimension -where the indicator has been computed. +where the indicator has been computed. A new element called 'time_bounds' will +be added into the 'attrs' element in the 's2dv_cube' object. It consists of a +list containing two elements, the start and end dates of the aggregated period +with the same dimensions of 'Dates' element. } \description{ The accumulation (sum) of a variable in the days (or time steps) that the @@ -80,9 +83,19 @@ function: } \examples{ exp <- NULL -exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, - ftime = 9, lat = 2, lon = 2)) +exp$data <- array(abs(rnorm(5 * 3 * 214 * 2)*100), + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) class(exp) <- 's2dv_cube' -DOT <- CST_AccumulationExceedingThreshold(exp, threshold = 280) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) +exp$attrs$Dates <- Dates +AT <- CST_AccumulationExceedingThreshold(data = exp, threshold = 100, + start = list(21, 4), + end = list(21, 6)) } diff --git a/man/CST_PeriodMean.Rd b/man/CST_PeriodMean.Rd index f9db984..d0e1ba8 100644 --- a/man/CST_PeriodMean.Rd +++ b/man/CST_PeriodMean.Rd @@ -59,19 +59,19 @@ this function: } \examples{ exp <- NULL -exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 6, ftime = 3)) -Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) dim(Dates) <- c(sdate = 4, ftime = 3) exp$attrs$Dates <- Dates class(exp) <- 's2dv_cube' -SA <- CST_PeriodMean(exp) +SA <- CST_PeriodMean(exp, start = list(01, 12), end = list(01, 01)) } diff --git a/man/CST_QThreshold.Rd b/man/CST_QThreshold.Rd index eda0fd1..5a68bc3 100644 --- a/man/CST_QThreshold.Rd +++ b/man/CST_QThreshold.Rd @@ -83,14 +83,18 @@ exp <- NULL exp$data <- array(abs(rnorm(112)*26), dim = c(member = 7, sdate = 8, ftime = 2)) class(exp) <- 's2dv_cube' exp_probs <- CST_QThreshold(exp, threshold) -exp$data <- array(rnorm(5 * 3 * 214 * 2), - c(member = 5, sdate = 3, ftime = 214, lon = 2)) + +exp$data <- array(abs(rnorm(5 * 3 * 214 * 2)*50), + c(member = 5, sdate = 3, ftime = 214, lon = 2)) exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -exp_probs <- CST_QThreshold(exp, threshold) +dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) +class(exp) <- 's2dv_cube' +exp_probs <- CST_QThreshold(exp, threshold, start = list(21, 4), + end = list(21, 6)) } diff --git a/man/CST_Threshold.Rd b/man/CST_Threshold.Rd index ffe0600..e513ec0 100644 --- a/man/CST_Threshold.Rd +++ b/man/CST_Threshold.Rd @@ -21,7 +21,7 @@ CST_Threshold( package CSTools.} \item{threshold}{A single scalar or vector indicating the relative -threshold(s).} +threshold(s). It must contain values between 0 and 1.} \item{start}{An optional parameter to defined the initial date of the period to selectfrom the data by providing a list of two elements: the initial date @@ -67,13 +67,14 @@ given a dataset. threshold <- 0.9 exp <- NULL exp$data <- array(rnorm(5 * 3 * 214 * 2), - c(member = 5, sdate = 3, ftime = 214, lon = 2)) + c(member = 5, sdate = 3, ftime = 214, lon = 2)) exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) class(exp) <- 's2dv_cube' exp_probs <- CST_Threshold(exp, threshold, start = list(21, 4), end = list(21, 6)) diff --git a/man/CST_TotalSpellTimeExceedingThreshold.Rd b/man/CST_TotalSpellTimeExceedingThreshold.Rd index e2f7d26..69dee16 100644 --- a/man/CST_TotalSpellTimeExceedingThreshold.Rd +++ b/man/CST_TotalSpellTimeExceedingThreshold.Rd @@ -56,7 +56,12 @@ computation.} } \value{ An 's2dv_cube' object containing the number of days that are part of a -spell within a threshold in element \code{data}. +spell within a threshold in element \code{data} with dimensions of the input +parameter 'data' except the dimension where the indicator has been computed. +A new element called 'time_bounds' will be added into the 'attrs' element in +the 's2dv_cube' object. It consists of a list containing two elements, the +start and end dates of the aggregated period with the same dimensions of +'Dates' element. } \description{ The number of days (when daily data is provided) that are part of a spell @@ -84,8 +89,11 @@ exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) class(exp) <- 's2dv_cube' -TTSET <- CST_TotalSpellTimeExceedingThreshold(exp, threshold = 23, spell = 3) +TTSET <- CST_TotalSpellTimeExceedingThreshold(exp, threshold = 23, spell = 3, + start = list(21, 4), + end = list(21, 6)) } \seealso{ diff --git a/man/CST_TotalTimeExceedingThreshold.Rd b/man/CST_TotalTimeExceedingThreshold.Rd index b09ae53..840700a 100644 --- a/man/CST_TotalTimeExceedingThreshold.Rd +++ b/man/CST_TotalTimeExceedingThreshold.Rd @@ -58,7 +58,10 @@ computation.} \value{ An 's2dv_cube' object containing in element \code{data} the total number of the corresponding units of the data frequency that a variable is -exceeding a threshold during a period. +exceeding a threshold during a period. A new element called 'time_bounds' will +be added into the 'attrs' element in the 's2dv_cube' object. It consists of a +list containing two elements, the start and end dates of the aggregated period +with the same dimensions of 'Dates' element. } \description{ The Total Time of a variable exceeding (or not) a Threshold. It returns the @@ -83,7 +86,7 @@ indices for heat stress can be obtained by using this function: } \examples{ exp <- NULL -exp$data <- array(abs(rnorm(5 * 3 * 214 * 2)*280), +exp$data <- array(rnorm(5 * 3 * 214 * 2)*23, c(member = 5, sdate = 3, ftime = 214, lon = 2)) exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), @@ -91,7 +94,9 @@ exp$attrs$Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(exp$attrs$Dates) <- c(sdate = 3, ftime = 214) class(exp) <- 's2dv_cube' -DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280) +DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 23, start = list(21, 4), + end = list(21, 6)) } diff --git a/man/CST_WindCapacityFactor.Rd b/man/CST_WindCapacityFactor.Rd index 638f5b8..84c057d 100644 --- a/man/CST_WindCapacityFactor.Rd +++ b/man/CST_WindCapacityFactor.Rd @@ -60,14 +60,23 @@ below). \examples{ wind <- NULL wind$data <- array(rweibull(n = 100, shape = 2, scale = 6), - c(member = 10, lat = 2, lon = 5)) + c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) wind$coords <- list(lat = c(40, 41), lon = 1:5) variable <- list(varName = 'sfcWind', metadata = list(sfcWind = list(level = 'Surface'))) wind$attrs <- list(Variable = variable, Datasets = 'synthetic', when = Sys.time(), Dates = '1990-01-01 00:00:00') +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) +wind$attrs$Dates <- Dates class(wind) <- 's2dv_cube' -WCF <- CST_WindCapacityFactor(wind, IEC_class = "III") +WCF <- CST_WindCapacityFactor(wind, IEC_class = "III", + start = list(21, 4), end = list(21, 6)) } \references{ diff --git a/man/CST_WindPowerDensity.Rd b/man/CST_WindPowerDensity.Rd index c33bd8d..4b04aed 100644 --- a/man/CST_WindPowerDensity.Rd +++ b/man/CST_WindPowerDensity.Rd @@ -14,8 +14,8 @@ CST_WindPowerDensity( ) } \arguments{ -\item{wind}{An s2dv_cube object with instantaneous wind speeds expressed in m/s -obtained from CST_Load or s2dv_cube functions from CSTools pacakge.} +\item{wind}{An 's2dv_cube' object with instantaneous wind speeds expressed in +m/s obtained from CST_Load or s2dv_cube functions from CSTools pacakge.} \item{ro}{A scalar, or alternatively a multidimensional array with the same dimensions as wind, with the air density expressed in kg/m^3. By default it @@ -53,14 +53,23 @@ it will give inaccurate results if used with period means. \examples{ wind <- NULL wind$data <- array(rweibull(n = 100, shape = 2, scale = 6), - c(member = 10, lat = 2, lon = 5)) + c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) wind$coords <- list(lat = c(40, 41), lon = 1:5) variable <- list(varName = 'sfcWind', metadata = list(sfcWind = list(level = 'Surface'))) wind$attrs <- list(Variable = variable, Datasets = 'synthetic', when = Sys.time(), Dates = '1990-01-01 00:00:00') +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) +wind$attrs$Dates <- Dates class(wind) <- 's2dv_cube' -WCF <- CST_WindPowerDensity(wind) +WPD <- CST_WindPowerDensity(wind, start = list(21, 4), + end = list(21, 6)) } \author{ diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 0f55937..614b65c 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -17,10 +17,9 @@ PeriodAccumulation( \arguments{ \item{data}{A multidimensional array with named dimensions.} -\item{dates}{A multidimensional array of datesW with named dimensions matching -the dimensions on parameter 'data'. By default it is NULL, to select a -period this parameter must be provided and it needs to have temporal -dimensions.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial diff --git a/man/PeriodMean.Rd b/man/PeriodMean.Rd index fffb332..f52db40 100644 --- a/man/PeriodMean.Rd +++ b/man/PeriodMean.Rd @@ -17,9 +17,9 @@ PeriodMean( \arguments{ \item{data}{A multidimensional array with named dimensions.} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial diff --git a/man/QThreshold.Rd b/man/QThreshold.Rd index 2af6e5f..ba023d8 100644 --- a/man/QThreshold.Rd +++ b/man/QThreshold.Rd @@ -10,7 +10,7 @@ QThreshold( dates = NULL, start = NULL, end = NULL, - time_dim = "time", + time_dim = "ftime", memb_dim = "member", sdate_dim = "sdate", ncores = NULL @@ -23,9 +23,9 @@ QThreshold( units as parameter 'data' and with the common dimensions of the element 'data' of the same length.} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial @@ -82,7 +82,17 @@ and memb_dim parameters: \examples{ threshold = 25 data <- array(rnorm(5 * 3 * 20 * 2, mean = 26), - c(member = 5, sdate = 3, time = 20, lon = 2)) -thres_q <- QThreshold(data, threshold) + c(member = 5, sdate = 3, time = 214, lon = 2)) + +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, time = 214) + +thres_q <- QThreshold(data, threshold, dates = Dates, time_dim = 'time', + start = list(21, 4), end = list(21, 6)) } diff --git a/man/Threshold.Rd b/man/Threshold.Rd index db59817..d254cbe 100644 --- a/man/Threshold.Rd +++ b/man/Threshold.Rd @@ -10,7 +10,7 @@ Threshold( dates = NULL, start = NULL, end = NULL, - time_dim = "time", + time_dim = "ftime", memb_dim = "member", sdate_dim = "sdate", na.rm = FALSE, @@ -21,11 +21,11 @@ Threshold( \item{data}{A multidimensional array with named dimensions.} \item{threshold}{A single scalar or vector indicating the relative -threshold(s).} +threshold(s). It must contain values between 0 and 1.} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial diff --git a/man/TotalSpellTimeExceedingThreshold.Rd b/man/TotalSpellTimeExceedingThreshold.Rd index 276423b..10124de 100644 --- a/man/TotalSpellTimeExceedingThreshold.Rd +++ b/man/TotalSpellTimeExceedingThreshold.Rd @@ -36,9 +36,9 @@ are used it has to be a vector of a pair of two logical operators: c('<', '>'), c('<', '>='), c('<=', '>'), c('<=', '>='), c('>', '<'), c('>', '<='), c('>=', '<'),c('>=', '<=')).} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial @@ -86,9 +86,19 @@ different behaviour consider to modify the 'data' input by substituting NA values by values exceeding the threshold. } \examples{ -data <- array(rnorm(120), c(member = 1, sdate = 2, ftime = 20, lat = 4)) -threshold <- array(rnorm(4), c(lat = 4)) -total <- TotalSpellTimeExceedingThreshold(data, threshold, spell = 6) +data <- array(1:100, c(member = 5, sdate = 3, ftime = 214, lon = 2)) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) + +threshold <- array(1:4, c(lat = 4)) +total <- TotalSpellTimeExceedingThreshold(data, threshold, dates = Dates, + spell = 6, start = list(21, 4), + end = list(21, 6)) } \seealso{ diff --git a/man/TotalTimeExceedingThreshold.Rd b/man/TotalTimeExceedingThreshold.Rd index 2068475..4dc00d0 100644 --- a/man/TotalTimeExceedingThreshold.Rd +++ b/man/TotalTimeExceedingThreshold.Rd @@ -34,9 +34,9 @@ are used it has to be a vector of a pair of two logical operators: c('<', '>'), c('<', '>='), c('<=', '>'), c('<=', '>='), c('>', '<'), c('>', '<='), c('>=', '<'),c('>=', '<=')).} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to define the initial date of the period to select from the data by providing a list of two elements: the initial @@ -62,7 +62,8 @@ computation.} \value{ A multidimensional array with named dimensions containing the total number of the corresponding units of the data frequency that a variable is -exceeding a threshold during a period. +exceeding a threshold during a period with dimensions of the input parameter +'data' except the dimension where the indicator has been computed. } \description{ The Total Time of a variable exceeding (or not) a Threshold. It returns the @@ -86,8 +87,16 @@ indices for heat stress can be obtained by using this function: } } \examples{ -exp <- array(abs(rnorm(5 * 3 * 214 * 2)*280), - c(member = 5, sdate = 3, ftime = 214, lon = 2)) -DOT <- TotalTimeExceedingThreshold(exp, threshold = 300, time_dim = 'ftime') +data <- array(rnorm(5 * 3 * 214 * 2)*23, + c(member = 5, sdate = 3, ftime = 214, lon = 2)) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) +DOT <- TotalTimeExceedingThreshold(data, threshold = 23, dates = Dates, + start = list(21, 4), end = list(21, 6)) } diff --git a/man/WindCapacityFactor.Rd b/man/WindCapacityFactor.Rd index 69549a8..3ddeec6 100644 --- a/man/WindCapacityFactor.Rd +++ b/man/WindCapacityFactor.Rd @@ -10,7 +10,7 @@ WindCapacityFactor( dates = NULL, start = NULL, end = NULL, - time_dim = "time", + time_dim = "ftime", ncores = NULL ) } @@ -25,9 +25,9 @@ respectively. Classes \code{'I/II'} and \code{'II/III'} indicate intermediate turbines that fit both classes. More details of the five turbines and a plot of its power curves can be found in Lledó et al. (2019).} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial @@ -65,8 +65,19 @@ different power curves that span different IEC classes can be selected (see below). } \examples{ -wind <- rweibull(n = 100, shape = 2, scale = 6) -WCF <- WindCapacityFactor(wind, IEC_class = "III") +wind <- array(rweibull(n = 32100, shape = 2, scale = 6), + c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) + +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) + +WCF <- WindCapacityFactor(wind, IEC_class = "III", dates = Dates, + start = list(21, 4), end = list(21, 6)) } \references{ diff --git a/man/WindPowerDensity.Rd b/man/WindPowerDensity.Rd index 8e3c8e3..9b935bc 100644 --- a/man/WindPowerDensity.Rd +++ b/man/WindPowerDensity.Rd @@ -10,7 +10,7 @@ WindPowerDensity( dates = NULL, start = NULL, end = NULL, - time_dim = "time", + time_dim = "ftime", ncores = NULL ) } @@ -22,9 +22,9 @@ speeds expressed in m/s.} dimensions as wind, with the air density expressed in kg/m^3. By default it takes the value 1.225, the standard density of air at 15ºC and 1013.25 hPa.} -\item{dates}{A vector of dates or a multidimensional array of dates with named -dimensions matching the dimensions on parameter 'data'. By default it is -NULL, to select a period this parameter must be provided.} +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial @@ -57,8 +57,17 @@ It is computed as 0.5*ro*wspd^3. As this function is non-linear, it will give inaccurate results if used with period means. } \examples{ -wind <- rweibull(n = 100, shape = 2, scale = 6) -WPD <- WindPowerDensity(wind) +wind <- array(rweibull(n = 32100, shape = 2, scale = 6), + c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +dim(Dates) <- c(sdate = 3, ftime = 214) +WPD <- WindPowerDensity(wind, dates = Dates, start = list(21, 4), + end = list(21, 6)) } \author{ diff --git a/tests/testthat/test-AccumulationExceedingThreshold.R b/tests/testthat/test-AccumulationExceedingThreshold.R index 0331e8e..baa2c50 100644 --- a/tests/testthat/test-AccumulationExceedingThreshold.R +++ b/tests/testthat/test-AccumulationExceedingThreshold.R @@ -1,4 +1,7 @@ ############################################## + +library(CSTools) + # dat1 dat1 <- 1:20 @@ -234,7 +237,6 @@ test_that("4. Output checks", { }) ############################################## -library(CSTools) test_that("5. Seasonal forecasts", { exp <- CSTools::lonlat_temp$exp @@ -260,24 +262,25 @@ test_that("5. Seasonal forecasts", { as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "%d-%m-%Y"), as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) + dim(Dates) <- c(sdate = 3, ftime = 214) GDD <- AccumulationExceedingThreshold(exp - 17, threshold = 0, dates = Dates, time_dim = 'ftime', start = list(1, 4), end = list(31, 10), na.rm = TRUE) expect_equal( round(GDD[,1,1,1]), - c(538, 367, 116, 519, 219, 282) + c(549, 387, 125, 554, 245, 282) ) expect_equal( dim(GDD), - c(member = 6, sdate = 3, lat =4, lon = 4) + c(member = 6, sdate = 3, lat = 4, lon = 4) ) expect_error( AccumulationExceedingThreshold(exp - 17, threshold = 0, dates = Dates, start = list(1, 4), end = list(31, 10), time_dim = 'time'), "Parameter 'time_dim' is not found in 'data' dimension." ) expect_equal( - all(is.na(AccumulationExceedingThreshold(exp - 17, threshold = 0, dates = Dates, time_dim = 'ftime',start = list(1, 4), end = list(31, 10)))), - all(is.na(c(NA, NA))) + !any(is.na(AccumulationExceedingThreshold(exp - 17, threshold = 0, dates = Dates, time_dim = 'ftime',start = list(1, 4), end = list(31, 10)))), + !any(is.na(c(1, 1))) ) # test the 'diff' @@ -294,7 +297,6 @@ test_that("5. Seasonal forecasts", { AccumulationExceedingThreshold(input_1, threshold_1), 204 ) - expect_equal( AccumulationExceedingThreshold(input_2, threshold_2, op = '<'), -105 @@ -304,3 +306,47 @@ test_that("5. Seasonal forecasts", { -55 ) }) + +############################################## +test_that("6. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_AccumulationExceedingThreshold(data = exp, threshold = mean(exp$data), + time_dim = 'ftime', start = list(10, 03), + end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) diff --git a/tests/testthat/test-QThreshold.R b/tests/testthat/test-QThreshold.R index deb35df..3599bd9 100644 --- a/tests/testthat/test-QThreshold.R +++ b/tests/testthat/test-QThreshold.R @@ -1,5 +1,21 @@ + +library(CSTools) + +# dat1 +threshold <- 26 +dat1 <- array(abs(rnorm(5 * 3 * 214 * 2)*50), + c(member = 5, sdate = 3, ftime = 214, lon = 2)) +dates0 <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "%d-%m-%Y"), + as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "%d-%m-%Y"), + as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +dates1 <- dates0 +dim(dates1) <- c(sdate = 3, ftime = 214) + ############################################## -test_that("Sanity checks", { +test_that("1. Sanity checks", { expect_error( QThreshold(NULL), "Parameter 'data' cannot be NULL." @@ -104,13 +120,23 @@ test_that("Sanity checks", { dim(res), c(sdate = 3, ftime = 52) ) - + # test start and end + expect_warning( + QThreshold(dat1, threshold = 26, dates = dates0, start = list(21, 4), + end = list(21, 6)), + paste0("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + ) + expect_equal( + dim(QThreshold(dat1, threshold = 26, dates = dates1, start = list(21, 4), + end = list(21, 6))), + c(sdate = 3, member = 5, ftime = 52, lon = 2) + ) }) ############################################## -library(CSTools) -test_that("Seasonal forecasts", { +test_that("2. Seasonal forecasts", { obs <- CSTools::lonlat_temp$obs$data - 248 obs_percentile <- QThreshold(obs, threshold = 35) expect_equal( @@ -142,3 +168,5 @@ test_that("Seasonal forecasts", { c(0.6, 0.4, 0.6, 0.6, 0.4, 0.4) ) }) + +############################################## diff --git a/tests/testthat/test-Threshold.R b/tests/testthat/test-Threshold.R index 84e4e0d..258e438 100644 --- a/tests/testthat/test-Threshold.R +++ b/tests/testthat/test-Threshold.R @@ -1,5 +1,21 @@ + +library(CSTools) + +# dat1 +threshold <- 0.9 +dat1 <- array(abs(rnorm(5 * 3 * 214 * 2)*50), + c(member = 5, sdate = 3, ftime = 214, lon = 2)) +dates0 <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "%d-%m-%Y"), + as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "%d-%m-%Y"), + as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +dates1 <- dates0 +dim(dates1) <- c(sdate = 3, ftime = 214) + ############################################## -test_that("Sanity checks", { +test_that("1. Sanity checks", { expect_error( Threshold(NULL), "Parameter 'data' cannot be NULL." @@ -60,9 +76,23 @@ test_that("Sanity checks", { dim(Threshold(data, threshold, memb_dim = 'x', sdate_dim = 'ftime')), c(probs = 2) ) + # test start and end + expect_warning( + Threshold(dat1, threshold = 0.9, dates = dates0, start = list(21, 4), + end = list(21, 6)), + paste0("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + ) + expect_equal( + dim(Threshold(dat1, threshold = 0.8, dates = dates1, start = list(21, 4), + end = list(21, 6))), + c(ftime = 52, lon = 2) + ) }) -test_that("Seasonal forecasts", { +############################################## + +test_that("2. Seasonal forecasts", { exp <- CSTools::lonlat_temp$exp$data thresholdP <- Threshold(exp, threshold = 0.9) expect_equal( diff --git a/tests/testthat/test-TotalSpellTimeExceedingThreshold.R b/tests/testthat/test-TotalSpellTimeExceedingThreshold.R index 36e46cf..f76b457 100644 --- a/tests/testthat/test-TotalSpellTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalSpellTimeExceedingThreshold.R @@ -240,7 +240,7 @@ test_that("4. Output checks", { }) -########################################################################### +################################################################## test_that("5. Seasonal Forecasts", { exp <- CSTools::lonlat_temp$exp @@ -263,3 +263,47 @@ test_that("5. Seasonal Forecasts", { WSDI1$data[3,3,3,], c(rep(0, 53))) }) + +################################################################## + +test_that("6. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodMean(data = CSTools::lonlat_prec, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) \ No newline at end of file diff --git a/tests/testthat/test-TotalTimeExceedingThreshold.R b/tests/testthat/test-TotalTimeExceedingThreshold.R index 1c3ca19..4a10b1b 100644 --- a/tests/testthat/test-TotalTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalTimeExceedingThreshold.R @@ -40,7 +40,7 @@ thres4_7 <- rnorm(5, 20) set.seed(2) thres4_8 <- rnorm(5, 25) -########################################################################### +############################################################# test_that("1. Sanity checks", { # data @@ -137,7 +137,7 @@ test_that("1. Sanity checks", { }) -########################################################################### +####################################################### test_that("2. Output checks", { expect_equal( @@ -231,7 +231,7 @@ test_that("4. Output checks", { ########################################################################### -test_that("Seasonal forecasts", { +test_that("5. Seasonal forecasts", { # compare with scalar fixed threshold exp <- CSTools::lonlat_temp$exp obs <- CSTools::lonlat_temp$obs @@ -252,3 +252,47 @@ test_that("Seasonal forecasts", { c(3, 3, 3, 3, 3, 3) ) }) + +################################################################## + +test_that("6. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodMean(data = CSTools::lonlat_prec, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) diff --git a/tests/testthat/test-WindCapacityFactor.R b/tests/testthat/test-WindCapacityFactor.R index bcecdbc..6fec014 100644 --- a/tests/testthat/test-WindCapacityFactor.R +++ b/tests/testthat/test-WindCapacityFactor.R @@ -12,7 +12,19 @@ wind$attrs <- list(Variable = variable, Datasets = 'synthetic', class(wind) <- 's2dv_cube' WCF <- CST_WindCapacityFactor(wind, IEC_class = "III") -########################################################################### +# dat2 +dat2 <- array(abs(rnorm(5 * 3 * 214 * 2)*50), + c(member = 5, sdate = 3, ftime = 214, lon = 2)) +dates0 <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "%d-%m-%Y"), + as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "%d-%m-%Y"), + as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +dates2 <- dates0 +dim(dates2) <- c(sdate = 3, ftime = 214) + +################################################### test_that("1. Input checks", { # Check 's2dv_cube' expect_error( @@ -35,7 +47,7 @@ test_that("1. Input checks", { ) }) -########################################################################### +#################################################### test_that("2. Output checks", { expect_equal( CST_WindCapacityFactor(wind = wind)$attrs$Variable$varName, @@ -45,5 +57,17 @@ test_that("2. Output checks", { dim(CST_WindCapacityFactor(wind = wind)$data), c(member = 10, lat = 2, lon = 5) ) + # test start and end + expect_warning( + WindCapacityFactor(wind = dat2, IEC_class = "III", dates = dates0, + start = list(21, 4), end = list(21, 6)), + paste0("Parameter 'wind' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + ) + expect_equal( + dim(WindCapacityFactor(wind = dat2, IEC_class = "III", dates = dates2, + start = list(21, 4), end = list(21, 6))), + c(member = 5, sdate = 3, ftime = 52, lon = 2) + ) }) diff --git a/tests/testthat/test-WindPowerDensity.R b/tests/testthat/test-WindPowerDensity.R index 184b062..a5abd4d 100644 --- a/tests/testthat/test-WindPowerDensity.R +++ b/tests/testthat/test-WindPowerDensity.R @@ -11,6 +11,18 @@ wind$attrs <- list(Variable = variable, Datasets = 'synthetic', when = Sys.time(), Dates = '1990-01-01 00:00:00') class(wind) <- 's2dv_cube' +# dat2 +dat2 <- array(abs(rnorm(5 * 3 * 214 * 2)*50), + c(member = 5, sdate = 3, ftime = 214, lon = 2)) +dates0 <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "%d-%m-%Y"), + as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "%d-%m-%Y"), + as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +dates2 <- dates0 +dim(dates2) <- c(sdate = 3, ftime = 214) + ########################################################################### test_that("1. Input checks", { # Check 's2dv_cube' @@ -44,5 +56,17 @@ test_that("2. Output checks", { dim(CST_WindPowerDensity(wind = wind)$data), c(member = 10, lat = 2, lon = 5) ) + # test start and end + expect_warning( + WindPowerDensity(wind = dat2, dates = dates0, + start = list(21, 4), end = list(21, 6)), + paste0("Parameter 'wind' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + ) + expect_equal( + dim(WindPowerDensity(wind = dat2, dates = dates2, + start = list(21, 4), end = list(21, 6))), + c(member = 5, sdate = 3, ftime = 52, lon = 2) + ) }) -- GitLab From e9f952eedfad50a9fa796428d816078bbe1787bc Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 20 Jun 2023 09:51:45 +0200 Subject: [PATCH 20/87] Improve output description in documentation --- R/AccumulationExceedingThreshold.R | 1 + R/PeriodAccumulation.R | 10 ++++++---- R/PeriodMean.R | 10 ++++++---- R/TotalTimeExceedingThreshold.R | 10 ++++++---- man/CST_PeriodAccumulation.Rd | 10 ++++++---- man/CST_PeriodMean.Rd | 10 ++++++---- man/CST_TotalTimeExceedingThreshold.Rd | 10 ++++++---- 7 files changed, 37 insertions(+), 24 deletions(-) diff --git a/R/AccumulationExceedingThreshold.R b/R/AccumulationExceedingThreshold.R index d3c7147..636b211 100644 --- a/R/AccumulationExceedingThreshold.R +++ b/R/AccumulationExceedingThreshold.R @@ -53,6 +53,7 @@ #'be added into the 'attrs' element in the 's2dv_cube' object. It consists of a #'list containing two elements, the start and end dates of the aggregated period #'with the same dimensions of 'Dates' element. +#' #'@examples #'exp <- NULL #'exp$data <- array(abs(rnorm(5 * 3 * 214 * 2)*100), diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index c8e976e..9bd33e5 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -31,10 +31,12 @@ #' computation. #' #'@return A 's2dv_cube' object containing the indicator in the element -#'\code{data}. A new element called 'time_bounds' will be added into -#' the 'attrs' element in the 's2dv_cube' object. It consists of a list -#' containing two elements, the start and end dates of the aggregated period -#' with the same dimensions of 'Dates' element. +#'\code{data} with dimensions of the input parameter 'data' except the dimension +#'where the accumulation has been computed (specified with 'time_dim'). A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. #' #'@examples #'exp <- NULL diff --git a/R/PeriodMean.R b/R/PeriodMean.R index 6acea60..3e7619d 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -31,10 +31,12 @@ #' computation. #' #'@return An 's2dv_cube' object containing the indicator in the element -#' \code{data}. A new element called 'time_bounds' will be added into -#' the 'attrs' element in the 's2dv_cube' object. It consists of a list -#' containing two elements, the start and end dates of the aggregated period -#' with the same dimensions of 'Dates' element. +#'\code{data} with dimensions of the input parameter 'data' except the +#'dimension where the mean has been computed (specified with 'time_dim'). A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. #' #'@examples #'exp <- NULL diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 49fa988..057bb8b 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -54,10 +54,12 @@ #' #'@return An 's2dv_cube' object containing in element \code{data} the total #'number of the corresponding units of the data frequency that a variable is -#'exceeding a threshold during a period. A new element called 'time_bounds' will -#'be added into the 'attrs' element in the 's2dv_cube' object. It consists of a -#'list containing two elements, the start and end dates of the aggregated period -#'with the same dimensions of 'Dates' element. +#'exceeding a threshold during a period with dimensions of the input parameter +#''data' except the dimension where the indicator has been computed. A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. #' #'@examples #'exp <- NULL diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 71122d6..0878399 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -41,10 +41,12 @@ computation.} } \value{ A 's2dv_cube' object containing the indicator in the element -\code{data}. A new element called 'time_bounds' will be added into - the 'attrs' element in the 's2dv_cube' object. It consists of a list - containing two elements, the start and end dates of the aggregated period - with the same dimensions of 'Dates' element. +\code{data} with dimensions of the input parameter 'data' except the dimension +where the accumulation has been computed (specified with 'time_dim'). A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a diff --git a/man/CST_PeriodMean.Rd b/man/CST_PeriodMean.Rd index d0e1ba8..025acfd 100644 --- a/man/CST_PeriodMean.Rd +++ b/man/CST_PeriodMean.Rd @@ -41,10 +41,12 @@ computation.} } \value{ An 's2dv_cube' object containing the indicator in the element - \code{data}. A new element called 'time_bounds' will be added into - the 'attrs' element in the 's2dv_cube' object. It consists of a list - containing two elements, the start and end dates of the aggregated period - with the same dimensions of 'Dates' element. +\code{data} with dimensions of the input parameter 'data' except the +dimension where the mean has been computed (specified with 'time_dim'). A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. } \description{ Period Mean computes the average (mean) of a given variable in a period. diff --git a/man/CST_TotalTimeExceedingThreshold.Rd b/man/CST_TotalTimeExceedingThreshold.Rd index 840700a..2449014 100644 --- a/man/CST_TotalTimeExceedingThreshold.Rd +++ b/man/CST_TotalTimeExceedingThreshold.Rd @@ -58,10 +58,12 @@ computation.} \value{ An 's2dv_cube' object containing in element \code{data} the total number of the corresponding units of the data frequency that a variable is -exceeding a threshold during a period. A new element called 'time_bounds' will -be added into the 'attrs' element in the 's2dv_cube' object. It consists of a -list containing two elements, the start and end dates of the aggregated period -with the same dimensions of 'Dates' element. +exceeding a threshold during a period with dimensions of the input parameter +'data' except the dimension where the indicator has been computed. A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. } \description{ The Total Time of a variable exceeding (or not) a Threshold. It returns the -- GitLab From 292e1e2d1aeaf8c7ec5e27009ef897334c83563c Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 21 Jun 2023 10:25:46 +0200 Subject: [PATCH 21/87] Update time_dim and improve example --- R/PeriodAccumulation.R | 2 +- R/PeriodMean.R | 15 ++++++++++++--- man/PeriodAccumulation.Rd | 2 +- man/PeriodMean.Rd | 15 ++++++++++++--- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 9bd33e5..8f1fb7f 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -168,7 +168,7 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'@import multiApply #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', na.rm = FALSE, + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") diff --git a/R/PeriodMean.R b/R/PeriodMean.R index 3e7619d..3b1cf7f 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -146,13 +146,22 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, #'indicator in the element \code{data}. #' #'@examples -#'exp <- array(rnorm(56), dim = c(member = 7, ftime = 8)) -#'SA <- PeriodMean(exp, time_dim = 'ftime') +#'data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'SA <- PeriodMean(data, dates = Dates, start = list(01, 12), end = list(01, 01)) #' #'@import multiApply #'@export PeriodMean <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', na.rm = FALSE, ncores = NULL) { + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 614b65c..0260648 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -9,7 +9,7 @@ PeriodAccumulation( dates = NULL, start = NULL, end = NULL, - time_dim = "time", + time_dim = "ftime", na.rm = FALSE, ncores = NULL ) diff --git a/man/PeriodMean.Rd b/man/PeriodMean.Rd index f52db40..d02a0ab 100644 --- a/man/PeriodMean.Rd +++ b/man/PeriodMean.Rd @@ -9,7 +9,7 @@ PeriodMean( dates = NULL, start = NULL, end = NULL, - time_dim = "time", + time_dim = "ftime", na.rm = FALSE, ncores = NULL ) @@ -59,7 +59,16 @@ this function: } } \examples{ -exp <- array(rnorm(56), dim = c(member = 7, ftime = 8)) -SA <- PeriodMean(exp, time_dim = 'ftime') +data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +SA <- PeriodMean(data, dates = Dates, start = list(01, 12), end = list(01, 01)) } -- GitLab From 9eb3e4aecb2444fc1a3b6771b835633b11d6b90e Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 21 Jun 2023 11:27:51 +0200 Subject: [PATCH 22/87] Improve check when start and end are provided but dates is NULL; add test for this --- R/PeriodAccumulation.R | 8 ++++++-- R/PeriodMean.R | 13 +++++++------ R/QThreshold.R | 9 +++++++-- R/Threshold.R | 8 ++++++-- R/TotalSpellTimeExceedingThreshold.R | 7 +++++-- R/TotalTimeExceedingThreshold.R | 7 +++++-- R/WindCapacityFactor.R | 8 ++++++-- R/WindPowerDensity.R | 8 ++++++-- tests/testthat/test-PeriodAccumulation.R | 11 +++++++++-- tests/testthat/test-PeriodMean.R | 16 ++++++++++++---- tests/testthat/test-QThreshold.R | 7 +++++++ tests/testthat/test-Threshold.R | 7 +++++++ .../test-TotalSpellTimeExceedingThreshold.R | 7 +++++++ .../testthat/test-TotalTimeExceedingThreshold.R | 7 +++++++ tests/testthat/test-WindCapacityFactor.R | 7 +++++++ tests/testthat/test-WindPowerDensity.R | 7 +++++++ 16 files changed, 111 insertions(+), 26 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 8f1fb7f..965534f 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -180,8 +180,12 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, dim(data) <- length(data) names(dim(data)) <- time_dim } - if (!is.null(dates)) { - if (!is.null(start) && !is.null(end)) { + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") diff --git a/R/PeriodMean.R b/R/PeriodMean.R index 3b1cf7f..fd8fd7a 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -173,13 +173,14 @@ PeriodMean <- function(data, dates = NULL, start = NULL, end = NULL, dim(data) <- length(data) names(data) <- time_dim } - if (is.null(dates)) { - warning("Parameter 'dates' is NULL and the Average of the ", - "full data provided in 'data' is computed.") - } else { - if (!is.null(start) && !is.null(end)) { + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { - stop("Parameter 'start' and 'end' must be lists indicating the ", + stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") } if (!is.null(dim(dates))) { diff --git a/R/QThreshold.R b/R/QThreshold.R index 0d069bb..c39a14b 100644 --- a/R/QThreshold.R +++ b/R/QThreshold.R @@ -217,8 +217,12 @@ QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, if (is.null(memb_dim)) { memb_dim <- 99999 } - if (!is.null(dates)) { - if (!is.null(start) && !is.null(end)) { + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") @@ -244,6 +248,7 @@ QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, } } } + if (length(threshold) == 1) { if (memb_dim %in% names(dim(data))) { probs <- Apply(list(data), target_dims = c(memb_dim, sdate_dim), diff --git a/R/Threshold.R b/R/Threshold.R index d76cbfa..b4e9638 100644 --- a/R/Threshold.R +++ b/R/Threshold.R @@ -157,8 +157,12 @@ Threshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, if (is.null(names(dim(data)))) { stop("Parameter 'data' must have named dimensions.") } - if (!is.null(dates)) { - if (!is.null(start) && !is.null(end)) { + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") diff --git a/R/TotalSpellTimeExceedingThreshold.R b/R/TotalSpellTimeExceedingThreshold.R index 5d649b4..5cf2e1d 100644 --- a/R/TotalSpellTimeExceedingThreshold.R +++ b/R/TotalSpellTimeExceedingThreshold.R @@ -352,8 +352,11 @@ TotalSpellTimeExceedingThreshold <- function(data, threshold, spell, op = '>', } } # dates - if (!is.null(dates)) { - if (!is.null(start) && !is.null(end)) { + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 057bb8b..8a8b7ed 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -351,8 +351,11 @@ TotalTimeExceedingThreshold <- function(data, threshold, op = '>', } } # dates - if (!is.null(dates)) { - if (!is.null(start) && !is.null(end)) { + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") diff --git a/R/WindCapacityFactor.R b/R/WindCapacityFactor.R index 906ade3..7f32139 100644 --- a/R/WindCapacityFactor.R +++ b/R/WindCapacityFactor.R @@ -175,8 +175,12 @@ WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/III", ) pc_file <- system.file("power_curves", pc_files[IEC_class], package = "CSIndicators", mustWork = T) pc <- read_pc(pc_file) - if (!is.null(dates)) { - if (!is.null(start) && !is.null(end)) { + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") diff --git a/R/WindPowerDensity.R b/R/WindPowerDensity.R index e9c70f6..181e509 100644 --- a/R/WindPowerDensity.R +++ b/R/WindPowerDensity.R @@ -136,8 +136,12 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, #'@export WindPowerDensity <- function(wind, ro = 1.225, dates = NULL, start = NULL, end = NULL, time_dim = 'ftime', ncores = NULL) { - if (!is.null(dates)) { - if (!is.null(start) && !is.null(end)) { + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 74bb45d..6898a93 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -27,20 +27,27 @@ test_that("1. Sanity Checks", { PeriodAccumulation(1:10), 55 ) - data <- array(1:24, c(sdate = 2, time = 3, lon = 4)) + data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) expect_equal( PeriodAccumulation(data), array(c(9, 12, 27, 30, 45, 48, 63, 66), c(sdate = 2, lon = 4)) ) # Test dates warning expect_warning( - PeriodAccumulation(array(1:10, c(time = 10)), + PeriodAccumulation(array(1:10, c(ftime = 10)), dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), start = list(05, 02), end = list(05, 09)), paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", "are not NULL. All data will be used.") ) + # start and end when dates is not provided + expect_warning( + PeriodAccumulation(array(1:10, c(ftime = 10)), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) ############################################## diff --git a/tests/testthat/test-PeriodMean.R b/tests/testthat/test-PeriodMean.R index 18b5995..cd9f5fe 100644 --- a/tests/testthat/test-PeriodMean.R +++ b/tests/testthat/test-PeriodMean.R @@ -18,14 +18,15 @@ test_that("1. Sanity Checks", { ) expect_error( PeriodMean(1, dates = '2000-01-01', end = 3, start = 4), - "Parameter 'start' and 'end' must be lists indicating the day and the month of the period start and end.") + "Parameter 'start' and 'end' must be lists indicating the day and the month of the period start and end." + ) suppressWarnings( expect_equal( - PeriodMean(array(1:10, c(time = 10))), + PeriodMean(array(1:10, c(ftime = 10))), 5.5 ) ) - data <- array(1:24, c(sdate = 2, time = 3, lon = 4)) + data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) suppressWarnings( expect_equal( PeriodMean(data), @@ -35,13 +36,20 @@ test_that("1. Sanity Checks", { ) # Test dates warning expect_warning( - PeriodMean(array(1:10, c(time = 10)), + PeriodMean(array(1:10, c(ftime = 10)), dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), start = list(05, 02), end = list(05, 09)), paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", "are not NULL. All data will be used.") ) + # start and end when dates is not provided + expect_warning( + PeriodMean(array(1:10, c(ftime = 10)), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) ############################################## diff --git a/tests/testthat/test-QThreshold.R b/tests/testthat/test-QThreshold.R index 3599bd9..57883b1 100644 --- a/tests/testthat/test-QThreshold.R +++ b/tests/testthat/test-QThreshold.R @@ -132,6 +132,13 @@ test_that("1. Sanity checks", { end = list(21, 6))), c(sdate = 3, member = 5, ftime = 52, lon = 2) ) + # start and end when dates is not provided + expect_warning( + QThreshold(array(1:61, dim = c(ftime = 61, sdate = 3)), threshold = 25, + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) ############################################## diff --git a/tests/testthat/test-Threshold.R b/tests/testthat/test-Threshold.R index 258e438..7cb83cf 100644 --- a/tests/testthat/test-Threshold.R +++ b/tests/testthat/test-Threshold.R @@ -88,6 +88,13 @@ test_that("1. Sanity checks", { end = list(21, 6))), c(ftime = 52, lon = 2) ) + # start and end when dates is not provided + expect_warning( + Threshold(array(1:366, dim = c(ftime = 61, sdate = 3, member = 2)), + threshold = 0.8, start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) ############################################## diff --git a/tests/testthat/test-TotalSpellTimeExceedingThreshold.R b/tests/testthat/test-TotalSpellTimeExceedingThreshold.R index f76b457..b66c5ae 100644 --- a/tests/testthat/test-TotalSpellTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalSpellTimeExceedingThreshold.R @@ -142,6 +142,13 @@ test_that("1. Sanity checks", { paste0("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") ) + # start and end when dates is not provided + expect_warning( + TotalSpellTimeExceedingThreshold(array(1:10, c(ftime = 10)), threshold = 5, spell = 2, + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) ########################################################################### diff --git a/tests/testthat/test-TotalTimeExceedingThreshold.R b/tests/testthat/test-TotalTimeExceedingThreshold.R index 4a10b1b..c025c3e 100644 --- a/tests/testthat/test-TotalTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalTimeExceedingThreshold.R @@ -134,6 +134,13 @@ test_that("1. Sanity checks", { paste0("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") ) + # start and end when dates is not provided + expect_warning( + TotalTimeExceedingThreshold(array(1:10, c(ftime = 10)), threshold = 5, + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) diff --git a/tests/testthat/test-WindCapacityFactor.R b/tests/testthat/test-WindCapacityFactor.R index 6fec014..3afa27e 100644 --- a/tests/testthat/test-WindCapacityFactor.R +++ b/tests/testthat/test-WindCapacityFactor.R @@ -45,6 +45,13 @@ test_that("1. Input checks", { paste0("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") ) + # start and end when dates is not provided + expect_warning( + WindCapacityFactor(array(1:10, c(ftime = 10)), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) #################################################### diff --git a/tests/testthat/test-WindPowerDensity.R b/tests/testthat/test-WindPowerDensity.R index a5abd4d..999235a 100644 --- a/tests/testthat/test-WindPowerDensity.R +++ b/tests/testthat/test-WindPowerDensity.R @@ -44,6 +44,13 @@ test_that("1. Input checks", { paste0("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") ) + # start and end when dates is not provided + expect_warning( + WindPowerDensity(array(1:10, c(ftime = 10)), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) }) ########################################################################### -- GitLab From 8777fc856ee4baff0106f407110a3aadbdf5245d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 21 Jun 2023 16:11:11 +0200 Subject: [PATCH 23/87] Add return to PeriodMean --- R/PeriodMean.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/PeriodMean.R b/R/PeriodMean.R index fd8fd7a..2a85ceb 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -195,6 +195,7 @@ PeriodMean <- function(data, dates = NULL, start = NULL, end = NULL, } total <- Apply(list(data), target_dims = time_dim, fun = mean, na.rm = na.rm, ncores = ncores)$output1 + return(total) } -- GitLab From 18c77dfe194af21ea7f877dbde1f3b19215538dd Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 21 Jun 2023 16:28:29 +0200 Subject: [PATCH 24/87] Add functions to compute bioclimatic indicators; add unit tests --- R/PeriodFun.R | 190 +++++++++++++++++++++++ R/PeriodMax.R | 205 +++++++++++++++++++++++++ R/PeriodMin.R | 205 +++++++++++++++++++++++++ R/PeriodVariance.R | 219 +++++++++++++++++++++++++++ tests/testthat/test-PeriodFun.R | 120 +++++++++++++++ tests/testthat/test-PeriodMax.R | 123 +++++++++++++++ tests/testthat/test-PeriodMin.R | 119 +++++++++++++++ tests/testthat/test-PeriodVariance.R | 118 +++++++++++++++ 8 files changed, 1299 insertions(+) create mode 100644 R/PeriodFun.R create mode 100644 R/PeriodMax.R create mode 100644 R/PeriodMin.R create mode 100644 R/PeriodVariance.R create mode 100644 tests/testthat/test-PeriodFun.R create mode 100644 tests/testthat/test-PeriodMax.R create mode 100644 tests/testthat/test-PeriodMin.R create mode 100644 tests/testthat/test-PeriodVariance.R diff --git a/R/PeriodFun.R b/R/PeriodFun.R new file mode 100644 index 0000000..9c33ba2 --- /dev/null +++ b/R/PeriodFun.R @@ -0,0 +1,190 @@ +#'Period Function on 's2dv_cube' objects +#' +#'Period Fun computes a calculation of a given variable in a period. +#' +#'@param data An 's2dv_cube' object as provided function \code{CST_Load} in +#' package CSTools. +#'@param fun An atomic function to compute a calculation over a period. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return An 's2dv_cube' object containing the indicator in the element +#'\code{data} with dimensions of the input parameter 'data' except the +#'dimension where the mean has been computed (specified with 'time_dim'). A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. +#' +#'@examples +#'exp <- NULL +#'exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'exp$attrs$Dates <- Dates +#'class(exp) <- 's2dv_cube' +#' +#'SA <- CST_PeriodFun(exp, fun = mean, start = list(01, 12), +#' end = list(01, 01)) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +CST_PeriodFun <- function(data, fun, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + # Dates subset + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$attrs$Dates))) { + warning("Dimensions in 'data' element 'attrs$Dates' are missed and ", + "all data would be used.") + start <- NULL + end <- NULL + } + } + + Dates <- data$attrs$Dates + total <- PeriodFun(data = data$data, fun = fun, dates = Dates, start = start, + end = end, time_dim = time_dim, na.rm = na.rm, + ncores = ncores) + + data$data <- total + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } + } + return(data) +} + +#'Period Function on multidimensional array objects +#' +#'Period Fun computes a calculation of a given variable in a period. +#' +#'@param data A multidimensional array with named dimensions. +#'@param fun An atomic function to compute a calculation over a period. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return A multidimensional array with named dimensions containing the +#'indicator in the element \code{data}. +#' +#'@examples +#'data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'SA <- PeriodFun(data, fun = mean, dates = Dates, start = list(01, 12), +#' end = list(01, 01)) +#' +#'@import multiApply +#'@export +PeriodFun <- function(data, fun, dates = NULL, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { + + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + if (!is.array(data)) { + dim(data) <- length(data) + names(data) <- time_dim + } + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } + } + } + total <- Apply(list(data), target_dims = time_dim, fun = fun, + na.rm = na.rm, ncores = ncores)$output1 + return(total) +} + + diff --git a/R/PeriodMax.R b/R/PeriodMax.R new file mode 100644 index 0000000..0d47033 --- /dev/null +++ b/R/PeriodMax.R @@ -0,0 +1,205 @@ +#'Period Max on 's2dv_cube' objects +#' +#'Period Max computes the maximum (max) of a given variable in a period. +#'Two bioclimatic indicators can be obtained by using this function: +#'\itemize{ +#' \item\code{BIO5}{(Providing temperature data) Max Temperature of Warmest +#' Month. The maximum monthly temperature occurrence over a +#' given year (time-series) or averaged span of years +#' (normal).} +#' \item\code{BIO13}{(Providing precipitation data) Precipitation of Wettest +#' Month. This index identifies the total precipitation +#' that prevails during the wettest month.} +#'} +#' +#'@param data An 's2dv_cube' object as provided function \code{CST_Load} in +#' package CSTools. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return An 's2dv_cube' object containing the indicator in the element +#'\code{data} with dimensions of the input parameter 'data' except the +#'dimension where the max has been computed (specified with 'time_dim'). A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. +#' +#'@examples +#'exp <- NULL +#'exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'exp$attrs$Dates <- Dates +#'class(exp) <- 's2dv_cube' +#' +#'res <- CST_PeriodMax(exp, start = list(01, 12), end = list(01, 01)) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +CST_PeriodMax <- function(data, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + # Dates subset + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$attrs$Dates))) { + warning("Dimensions in 'data' element 'attrs$Dates' are missed and ", + "all data would be used.") + start <- NULL + end <- NULL + } + } + + Dates <- data$attrs$Dates + total <- PeriodMax(data = data$data, dates = Dates, start = start, end = end, + time_dim = time_dim, na.rm = na.rm, ncores = ncores) + + data$data <- total + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } + } + return(data) +} + +#'Period max on multidimensional array objects +#' +#'Period max computes the average (max) of a given variable in a period. +#'Two bioclimatic indicators can be obtained by using this function: +#'\itemize{ +#' \item\code{BIO5}{(Providing temperature data) Max Temperature of Warmest +#' Month. The maximum monthly temperature occurrence over a +#' given year (time-series) or averaged span of years +#' (normal).} +#' \item\code{BIO13}{(Providing precipitation data) Precipitation of Wettest +#' Month. This index identifies the total precipitation +#' that prevails during the wettest month.} +#'} +#' +#'@param data A multidimensional array with named dimensions. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return A multidimensional array with named dimensions containing the +#'indicator in the element \code{data}. +#' +#'@examples +#'data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'res <- PeriodMax(data, dates = Dates, start = list(01, 12), end = list(01, 01)) +#' +#'@import multiApply +#'@export +PeriodMax <- function(data, dates = NULL, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { + + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + if (!is.array(data)) { + dim(data) <- length(data) + names(data) <- time_dim + } + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } + } + } + total <- Apply(list(data), target_dims = time_dim, fun = max, + na.rm = na.rm, ncores = ncores)$output1 + return(total) +} + + diff --git a/R/PeriodMin.R b/R/PeriodMin.R new file mode 100644 index 0000000..f5ea5b0 --- /dev/null +++ b/R/PeriodMin.R @@ -0,0 +1,205 @@ +#'Period Min on 's2dv_cube' objects +#' +#'Period Min computes the average (min) of a given variable in a period. +#'Two bioclimatic indicators can be obtained by using this function: +#'\itemize{ +#' \item\code{BIO6}{(Providing temperature data) Min Temperature of Coldest +#' Month. The minimum monthly temperature occurrence over a +#' given year (time-series) or averaged span of years +#' (normal).} +#' \item\code{BIO14}{(Providing precipitation data) Precipitation of Driest +#' Month. This index identifies the total precipitation +#' that prevails during the driest month.} +#'} +#' +#'@param data An 's2dv_cube' object as provided function \code{CST_Load} in +#' package CSTools. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return An 's2dv_cube' object containing the indicator in the element +#'\code{data} with dimensions of the input parameter 'data' except the +#'dimension where the min has been computed (specified with 'time_dim'). A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. +#' +#'@examples +#'exp <- NULL +#'exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'exp$attrs$Dates <- Dates +#'class(exp) <- 's2dv_cube' +#' +#'res <- CST_PeriodMin(exp, start = list(01, 12), end = list(01, 01)) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +CST_PeriodMin <- function(data, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + # Dates subset + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$attrs$Dates))) { + warning("Dimensions in 'data' element 'attrs$Dates' are missed and ", + "all data would be used.") + start <- NULL + end <- NULL + } + } + + Dates <- data$attrs$Dates + total <- PeriodMin(data = data$data, dates = Dates, start = start, end = end, + time_dim = time_dim, na.rm = na.rm, ncores = ncores) + + data$data <- total + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } + } + return(data) +} + +#'Period Min on multidimensional array objects +#' +#'Period Min computes the average (min) of a given variable in a period. +#'Two bioclimatic indicators can be obtained by using this function: +#'\itemize{ +#' \item\code{BIO6}{(Providing temperature data) Min Temperature of Coldest +#' Month. The minimum monthly temperature occurrence over a +#' given year (time-series) or averaged span of years +#' (normal).} +#' \item\code{BIO14}{(Providing precipitation data) Precipitation of Driest +#' Month. This index identifies the total precipitation +#' that prevails during the driest month.} +#'} +#' +#'@param data A multidimensional array with named dimensions. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return A multidimensional array with named dimensions containing the +#'indicator in the element \code{data}. +#' +#'@examples +#'data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'res <- PeriodMin(data, dates = Dates, start = list(01, 12), end = list(01, 01)) +#' +#'@import multiApply +#'@export +PeriodMin <- function(data, dates = NULL, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { + + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + if (!is.array(data)) { + dim(data) <- length(data) + names(data) <- time_dim + } + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } + } + } + total <- Apply(list(data), target_dims = time_dim, fun = min, + na.rm = na.rm, ncores = ncores)$output1 + return(total) +} + + diff --git a/R/PeriodVariance.R b/R/PeriodVariance.R new file mode 100644 index 0000000..b9f3cff --- /dev/null +++ b/R/PeriodVariance.R @@ -0,0 +1,219 @@ +#'Period Variance on 's2dv_cube' objects +#' +#'Period Variance computes the average (var) of a given variable in a period. +#'Two bioclimatic indicators can be obtained by using this function: +#'\itemize{ +#' \item\code{BIO4}{(Providing temperature data) Temperature Seasonality +#' (Standard Deviation). The amount of temperature variation +#' over a given year (or averaged years) based on the standard +#' deviation (variation) of monthly temperature averages. } +#' \item\code{BIO15}{(Providing precipitation data) Precipitation Seasonality +#' (CV). This is a measure of the variation in +#' monthly precipitation totals over the course of the year. +#' This index is the ratio of the standard deviation of the +#' monthly total precipitation to the mean monthly total +#' precipitation (also known as the coefficient of variation) +#' and is expressed as a percentage} +#'} +#' +#'@param data An 's2dv_cube' object as provided function \code{CST_Load} in +#' package CSTools. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return An 's2dv_cube' object containing the indicator in the element +#'\code{data} with dimensions of the input parameter 'data' except the +#'dimension where the var has been computed (specified with 'time_dim'). A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. +#' +#'@examples +#'exp <- NULL +#'exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'exp$attrs$Dates <- Dates +#'class(exp) <- 's2dv_cube' +#' +#'res <- CST_PeriodVariance(exp, start = list(01, 12), end = list(01, 01)) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +CST_PeriodVariance <- function(data, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + # Dates subset + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$attrs$Dates))) { + warning("Dimensions in 'data' element 'attrs$Dates' are missed and ", + "all data would be used.") + start <- NULL + end <- NULL + } + } + + Dates <- data$attrs$Dates + total <- PeriodVariance(data = data$data, dates = Dates, start = start, end = end, + time_dim = time_dim, na.rm = na.rm, ncores = ncores) + + data$data <- total + data$dims <- dim(total) + + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(x = Dates, along = time_dim, + indices = dim(Dates)[time_dim], + drop = 'selected') + + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } + } + return(data) +} + +#'Period Variance on multidimensional array objects +#' +#'Period Variance computes the average (var) of a given variable in a period. +#'Two bioclimatic indicators can be obtained by using this function: +#'\itemize{ +#' \item\code{BIO4}{(Providing temperature data) Temperature Seasonality +#' (Standard Deviation). The amount of temperature variation +#' over a given year (or averaged years) based on the standard +#' deviation (variation) of monthly temperature averages. } +#' \item\code{BIO15}{(Providing precipitation data) Precipitation Seasonality +#' (CV). This is a measure of the variation in +#' monthly precipitation totals over the course of the year. +#' This index is the ratio of the standard deviation of the +#' monthly total precipitation to the mean monthly total +#' precipitation (also known as the coefficient of variation) +#' and is expressed as a percentage} +#'} +#' +#'@param data A multidimensional array with named dimensions. +#'@param dates A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data'. By default it is NULL, to +#' select aperiod this parameter must be provided. +#'@param start An optional parameter to defined the initial date of the period +#' to select from the data by providing a list of two elements: the initial +#' date of the period and the initial month of the period. By default it is set +#' to NULL and the indicator is computed using all the data provided in +#' \code{data}. +#'@param end An optional parameter to defined the final date of the period to +#' select from the data by providing a list of two elements: the final day of +#' the period and the final month of the period. By default it is set to NULL +#' and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim A character string indicating the name of the dimension to +#' compute the indicator. By default, it is set to 'ftime'. More than one +#' dimension name matching the dimensions provided in the object +#' \code{data$data} can be specified. +#'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or +#' not (FALSE). +#'@param ncores An integer indicating the number of cores to use in parallel +#' computation. +#' +#'@return A multidimensional array with named dimensions containing the +#'indicator in the element \code{data}. +#' +#'@examples +#'data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2002-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2002-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), +#' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), +#' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) +#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'res <- PeriodVariance(data, dates = Dates, start = list(01, 12), end = list(01, 01)) +#' +#'@import multiApply +#'@export +PeriodVariance <- function(data, dates = NULL, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { + + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + if (!is.array(data)) { + dim(data) <- length(data) + names(data) <- time_dim + } + + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data = data, dates = dates, start = start, + end = end, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } + } + } + total <- Apply(list(data), target_dims = time_dim, + fun = .periodvariance, + na.rm = na.rm, ncores = ncores)$output1 + return(total) +} + +.periodvariance <- function(data, na.rm) { + var <- sum((data - mean(data, na.rm = na.rm))^2) / (length(data)-1) + return(var) +} + + diff --git a/tests/testthat/test-PeriodFun.R b/tests/testthat/test-PeriodFun.R new file mode 100644 index 0000000..c743aee --- /dev/null +++ b/tests/testthat/test-PeriodFun.R @@ -0,0 +1,120 @@ +library(CSTools) + +############################################## +test_that("1. Sanity Checks", { + expect_error( + PeriodFun('x'), + "Parameter 'data' must be numeric." + ) + suppressWarnings( + expect_equal( + PeriodFun(array(1, c(x = 1)), fun = mean, time_dim = 'x'), + 1 + ) + ) + expect_error( + PeriodFun(data = NULL, fun = mean), + "Parameter 'data' cannot be NULL." + ) + expect_error( + PeriodFun(1, fun = mean, dates = '2000-01-01', end = 3, start = 4), + paste0("Parameter 'start' and 'end' must be lists indicating the day ", + "and the month of the period start and end.") + ) + expect_equal( + PeriodFun(array(1:10, c(ftime = 10)), fun = mean), + 5.5 + ) + data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) + expect_equal( + PeriodFun(data, fun = min), + array(c(1, 2, 7, 8, 13, 14, 19, 20), + c(sdate = 2, lon = 4)) + ) + # Test dates warning + expect_warning( + PeriodFun(array(1:10, c(ftime = 10)), fun = mean, + dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", + "are not NULL. All data will be used.") + ) + # start and end when dates is not provided + expect_warning( + PeriodFun(array(1:10, c(ftime = 10)), fun = sum, + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) +}) + +############################################## + +test_that("2. Seasonal", { + exp <- NULL + exp$data <- array(1:(1 * 3 * 214 * 2), + c(memb = 1, sdate = 3, ftime = 214, lon = 2)) + exp$dims <- dim(exp$data) + exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2002", format = "%d-%m-%Y"), + as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) + dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) + class(exp) <- 's2dv_cube' + output <- exp + output$data <- array(c(min(exp$data[1,1,21:82,1]), min(exp$data[1,2,21:82,1]), + min(exp$data[1,3,21:82,1]), min(exp$data[1,1,21:82,2]), + min(exp$data[1,2,21:82,2]), min(exp$data[1,3,21:82,2])), + c(memb = 1, sdate = 3, lon = 2)) + expect_equal( + CST_PeriodFun(exp, fun = min, start = list(21, 4), end = list(21, 6))$data, + output$data + ) +}) + +############################################## +test_that("3. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodFun(data = CSTools::lonlat_prec, fun = min, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) + diff --git a/tests/testthat/test-PeriodMax.R b/tests/testthat/test-PeriodMax.R new file mode 100644 index 0000000..967b086 --- /dev/null +++ b/tests/testthat/test-PeriodMax.R @@ -0,0 +1,123 @@ +library(CSTools) + +############################################## +test_that("1. Sanity Checks", { + expect_error( + PeriodMax('x'), + "Parameter 'data' must be numeric." + ) + suppressWarnings( + expect_equal( + PeriodMax(array(1, c(x = 1)), time_dim = 'x'), + 1 + ) + ) + expect_error( + PeriodMax(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + PeriodMax(1, dates = '2000-01-01', end = 3, start = 4), + "Parameter 'start' and 'end' must be lists indicating the day and the month of the period start and end." + ) + suppressWarnings( + expect_equal( + PeriodMax(array(1:10, c(ftime = 10))), + 10 + ) + ) + data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) + suppressWarnings( + expect_equal( + PeriodMax(data), + array(c(5, 6, 11, 12, 17, 18, 23, 24), + c(sdate = 2, lon = 4)) + ) + ) + # Test dates warning + expect_warning( + PeriodMax(array(1:10, c(ftime = 10)), + dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", + "are not NULL. All data will be used.") + ) + # start and end when dates is not provided + expect_warning( + PeriodMax(array(1:10, c(ftime = 10)), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) +}) + +############################################## + +test_that("2. Seasonal", { + exp <- NULL + exp$data <- array(1:(1 * 3 * 214 * 2), + c(memb = 1, sdate = 3, ftime = 214, lon = 2)) + exp$dims <- dim(exp$data) + exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2002", format = "%d-%m-%Y"), + as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) + dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) + class(exp) <- 's2dv_cube' + output <- exp + output$data <- array(c(max(exp$data[1,1,21:82,1]), max(exp$data[1,2,21:82,1]), + max(exp$data[1,3,21:82,1]), max(exp$data[1,1,21:82,2]), + max(exp$data[1,2,21:82,2]), max(exp$data[1,3,21:82,2])), + c(memb = 1, sdate = 3, lon = 2)) + expect_equal( + CST_PeriodMax(exp, start = list(21, 4), end = list(21, 6))$data, + output$data + ) +}) + +############################################## +test_that("3. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodMax(data = CSTools::lonlat_prec, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) + diff --git a/tests/testthat/test-PeriodMin.R b/tests/testthat/test-PeriodMin.R new file mode 100644 index 0000000..da91a3c --- /dev/null +++ b/tests/testthat/test-PeriodMin.R @@ -0,0 +1,119 @@ +library(CSTools) + +############################################## +test_that("1. Sanity Checks", { + expect_error( + PeriodMin('x'), + "Parameter 'data' must be numeric." + ) + suppressWarnings( + expect_equal( + PeriodMin(array(1, c(x = 1)), time_dim = 'x'), + 1 + ) + ) + expect_error( + PeriodMin(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + PeriodMin(1, dates = '2000-01-01', end = 3, start = 4), + "Parameter 'start' and 'end' must be lists indicating the day and the month of the period start and end." + ) + expect_equal( + PeriodMin(array(1:10, c(ftime = 10))), + 1 + ) + data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) + expect_equal( + PeriodMin(data), + array(c(1, 2, 7, 8, 13, 14, 19, 20), + c(sdate = 2, lon = 4)) + ) + # Test dates warning + expect_warning( + PeriodMin(array(1:10, c(ftime = 10)), + dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", + "are not NULL. All data will be used.") + ) + # start and end when dates is not provided + expect_warning( + PeriodMin(array(1:10, c(ftime = 10)), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) +}) + +############################################## + +test_that("2. Seasonal", { + exp <- NULL + exp$data <- array(1:(1 * 3 * 214 * 2), + c(memb = 1, sdate = 3, ftime = 214, lon = 2)) + exp$dims <- dim(exp$data) + exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2002", format = "%d-%m-%Y"), + as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) + dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) + class(exp) <- 's2dv_cube' + output <- exp + output$data <- array(c(min(exp$data[1,1,21:82,1]), min(exp$data[1,2,21:82,1]), + min(exp$data[1,3,21:82,1]), min(exp$data[1,1,21:82,2]), + min(exp$data[1,2,21:82,2]), min(exp$data[1,3,21:82,2])), + c(memb = 1, sdate = 3, lon = 2)) + expect_equal( + CST_PeriodMin(exp, start = list(21, 4), end = list(21, 6))$data, + output$data + ) +}) + +############################################## +test_that("3. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodMin(data = CSTools::lonlat_prec, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) + diff --git a/tests/testthat/test-PeriodVariance.R b/tests/testthat/test-PeriodVariance.R new file mode 100644 index 0000000..1ac78c1 --- /dev/null +++ b/tests/testthat/test-PeriodVariance.R @@ -0,0 +1,118 @@ +library(CSTools) + +############################################## +test_that("1. Sanity Checks", { + expect_error( + PeriodVariance('x'), + "Parameter 'data' must be numeric." + ) + expect_equal( + PeriodVariance(array(1:2, c(x = 2)), time_dim = 'x'), + 0.5 + ) + expect_error( + PeriodVariance(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + PeriodVariance(1, dates = '2000-01-01', end = 3, start = 4), + "Parameter 'start' and 'end' must be lists indicating the day and the month of the period start and end." + ) + expect_equal( + PeriodVariance(array(1:10, c(ftime = 10))), + 9.166667, + tolerance = 0.001 + ) + data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) + expect_equal( + PeriodVariance(data), + array(rep(4, 8), + c(sdate = 2, lon = 4)) + ) + # Test dates warning + expect_warning( + PeriodVariance(array(1:10, c(ftime = 10)), + dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' must have named dimensions if 'start' and 'end' ", + "are not NULL. All data will be used.") + ) + # start and end when dates is not provided + expect_warning( + PeriodVariance(array(1:10, c(ftime = 10)), + start = list(05, 02), end = list(05, 09)), + paste0("Parameter 'dates' is NULL and the average of the full data ", + "provided in 'data' is computed.") + ) +}) + +############################################## + +test_that("2. Seasonal", { + exp <- NULL + exp$data <- array(1:(1 * 3 * 214 * 2), + c(memb = 1, sdate = 3, ftime = 214, lon = 2)) + exp$dims <- dim(exp$data) + exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2002", format = "%d-%m-%Y"), + as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) + dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) + class(exp) <- 's2dv_cube' + output <- exp + output$data <- array(c(var(exp$data[1,1,21:82,1]), var(exp$data[1,2,21:82,1]), + var(exp$data[1,3,21:82,1]), var(exp$data[1,1,21:82,2]), + var(exp$data[1,2,21:82,2]), var(exp$data[1,3,21:82,2])), + c(memb = 1, sdate = 3, lon = 2)) + expect_equal( + CST_PeriodVariance(exp, start = list(21, 4), end = list(21, 6))$data, + output$data + ) +}) + +############################################## +test_that("3. Subset Dates and check time_bounds", { + exp <- CSTools::lonlat_prec + res <- CST_PeriodVariance(data = CSTools::lonlat_prec, time_dim = 'ftime', + start = list(10, 03), end = list(20, 03)) + # Check dims + expect_equal( + dim(res$data), + res$dims + ) + # Check Dates + expect_equal( + dim(res$data)['sdate'], + dim(res$attrs$Dates) + ) + # Check time_bounds + expect_equal( + res$attrs$Dates, + res$attrs$time_bounds$start + ) + expect_equal( + dim(res$attrs$time_bounds$start), + dim(res$attrs$time_bounds$end) + ) + # Check 'sdates' + expect_equal( + all(lubridate::month(res$attrs$time_bounds$start) == 3), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$start) == 10), + TRUE + ) + expect_equal( + all(lubridate::month(res$attrs$time_bounds$end) == 03), + TRUE + ) + expect_equal( + all(lubridate::day(res$attrs$time_bounds$end) == 20), + TRUE + ) +}) + -- GitLab From 5fcc11fad521548fba24eca96d3581699dab00ea Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 21 Jun 2023 16:29:40 +0200 Subject: [PATCH 25/87] Update documentation --- NAMESPACE | 9 +++++ man/CST_PeriodFun.Rd | 75 ++++++++++++++++++++++++++++++++++ man/CST_PeriodMax.Rd | 81 +++++++++++++++++++++++++++++++++++++ man/CST_PeriodMin.Rd | 81 +++++++++++++++++++++++++++++++++++++ man/CST_PeriodVariance.Rd | 85 +++++++++++++++++++++++++++++++++++++++ man/PeriodFun.Rd | 70 ++++++++++++++++++++++++++++++++ man/PeriodMax.Rd | 76 ++++++++++++++++++++++++++++++++++ man/PeriodMin.Rd | 76 ++++++++++++++++++++++++++++++++++ man/PeriodVariance.Rd | 80 ++++++++++++++++++++++++++++++++++++ 9 files changed, 633 insertions(+) create mode 100644 man/CST_PeriodFun.Rd create mode 100644 man/CST_PeriodMax.Rd create mode 100644 man/CST_PeriodMin.Rd create mode 100644 man/CST_PeriodVariance.Rd create mode 100644 man/PeriodFun.Rd create mode 100644 man/PeriodMax.Rd create mode 100644 man/PeriodMin.Rd create mode 100644 man/PeriodVariance.Rd diff --git a/NAMESPACE b/NAMESPACE index d80accb..f02cee5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,11 @@ export(CST_AbsToProbs) export(CST_AccumulationExceedingThreshold) export(CST_MergeRefToExp) export(CST_PeriodAccumulation) +export(CST_PeriodFun) +export(CST_PeriodMax) export(CST_PeriodMean) +export(CST_PeriodMin) +export(CST_PeriodVariance) export(CST_QThreshold) export(CST_SelectPeriodOnData) export(CST_Threshold) @@ -16,7 +20,11 @@ export(CST_WindCapacityFactor) export(CST_WindPowerDensity) export(MergeRefToExp) export(PeriodAccumulation) +export(PeriodFun) +export(PeriodMax) export(PeriodMean) +export(PeriodMin) +export(PeriodVariance) export(QThreshold) export(SelectPeriodOnData) export(SelectPeriodOnDates) @@ -26,6 +34,7 @@ export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) import(multiApply) +importFrom(ClimProjDiags,Subset) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/man/CST_PeriodFun.Rd b/man/CST_PeriodFun.Rd new file mode 100644 index 0000000..1ffe32b --- /dev/null +++ b/man/CST_PeriodFun.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodFun.R +\name{CST_PeriodFun} +\alias{CST_PeriodFun} +\title{Period Function on 's2dv_cube' objects} +\usage{ +CST_PeriodFun( + data, + fun, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{An 's2dv_cube' object as provided function \code{CST_Load} in +package CSTools.} + +\item{fun}{An atomic function to compute a calculation over a period.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +An 's2dv_cube' object containing the indicator in the element +\code{data} with dimensions of the input parameter 'data' except the +dimension where the mean has been computed (specified with 'time_dim'). A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. +} +\description{ +Period Fun computes a calculation of a given variable in a period. +} +\examples{ +exp <- NULL +exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +exp$attrs$Dates <- Dates +class(exp) <- 's2dv_cube' + +SA <- CST_PeriodFun(exp, fun = mean, start = list(01, 12), + end = list(01, 01)) + +} diff --git a/man/CST_PeriodMax.Rd b/man/CST_PeriodMax.Rd new file mode 100644 index 0000000..02a4a8a --- /dev/null +++ b/man/CST_PeriodMax.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodMax.R +\name{CST_PeriodMax} +\alias{CST_PeriodMax} +\title{Period Max on 's2dv_cube' objects} +\usage{ +CST_PeriodMax( + data, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{An 's2dv_cube' object as provided function \code{CST_Load} in +package CSTools.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +An 's2dv_cube' object containing the indicator in the element +\code{data} with dimensions of the input parameter 'data' except the +dimension where the max has been computed (specified with 'time_dim'). A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. +} +\description{ +Period Max computes the maximum (max) of a given variable in a period. +Two bioclimatic indicators can be obtained by using this function: +\itemize{ + \item\code{BIO5}{(Providing temperature data) Max Temperature of Warmest + Month. The maximum monthly temperature occurrence over a + given year (time-series) or averaged span of years + (normal).} + \item\code{BIO13}{(Providing precipitation data) Precipitation of Wettest + Month. This index identifies the total precipitation + that prevails during the wettest month.} +} +} +\examples{ +exp <- NULL +exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +exp$attrs$Dates <- Dates +class(exp) <- 's2dv_cube' + +res <- CST_PeriodMax(exp, start = list(01, 12), end = list(01, 01)) + +} diff --git a/man/CST_PeriodMin.Rd b/man/CST_PeriodMin.Rd new file mode 100644 index 0000000..7076ccd --- /dev/null +++ b/man/CST_PeriodMin.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodMin.R +\name{CST_PeriodMin} +\alias{CST_PeriodMin} +\title{Period Min on 's2dv_cube' objects} +\usage{ +CST_PeriodMin( + data, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{An 's2dv_cube' object as provided function \code{CST_Load} in +package CSTools.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +An 's2dv_cube' object containing the indicator in the element +\code{data} with dimensions of the input parameter 'data' except the +dimension where the min has been computed (specified with 'time_dim'). A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. +} +\description{ +Period Min computes the average (min) of a given variable in a period. +Two bioclimatic indicators can be obtained by using this function: +\itemize{ + \item\code{BIO6}{(Providing temperature data) Min Temperature of Coldest + Month. The minimum monthly temperature occurrence over a + given year (time-series) or averaged span of years + (normal).} + \item\code{BIO14}{(Providing precipitation data) Precipitation of Driest + Month. This index identifies the total precipitation + that prevails during the driest month.} +} +} +\examples{ +exp <- NULL +exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +exp$attrs$Dates <- Dates +class(exp) <- 's2dv_cube' + +res <- CST_PeriodMin(exp, start = list(01, 12), end = list(01, 01)) + +} diff --git a/man/CST_PeriodVariance.Rd b/man/CST_PeriodVariance.Rd new file mode 100644 index 0000000..fa68197 --- /dev/null +++ b/man/CST_PeriodVariance.Rd @@ -0,0 +1,85 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodVariance.R +\name{CST_PeriodVariance} +\alias{CST_PeriodVariance} +\title{Period Variance on 's2dv_cube' objects} +\usage{ +CST_PeriodVariance( + data, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{An 's2dv_cube' object as provided function \code{CST_Load} in +package CSTools.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +An 's2dv_cube' object containing the indicator in the element +\code{data} with dimensions of the input parameter 'data' except the +dimension where the var has been computed (specified with 'time_dim'). A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. +} +\description{ +Period Variance computes the average (var) of a given variable in a period. +Two bioclimatic indicators can be obtained by using this function: +\itemize{ + \item\code{BIO4}{(Providing temperature data) Temperature Seasonality + (Standard Deviation). The amount of temperature variation + over a given year (or averaged years) based on the standard + deviation (variation) of monthly temperature averages. } + \item\code{BIO15}{(Providing precipitation data) Precipitation Seasonality + (CV). This is a measure of the variation in + monthly precipitation totals over the course of the year. + This index is the ratio of the standard deviation of the + monthly total precipitation to the mean monthly total + precipitation (also known as the coefficient of variation) + and is expressed as a percentage} +} +} +\examples{ +exp <- NULL +exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +exp$attrs$Dates <- Dates +class(exp) <- 's2dv_cube' + +res <- CST_PeriodVariance(exp, start = list(01, 12), end = list(01, 01)) + +} diff --git a/man/PeriodFun.Rd b/man/PeriodFun.Rd new file mode 100644 index 0000000..9378273 --- /dev/null +++ b/man/PeriodFun.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodFun.R +\name{PeriodFun} +\alias{PeriodFun} +\title{Period Function on multidimensional array objects} +\usage{ +PeriodFun( + data, + fun, + dates = NULL, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A multidimensional array with named dimensions.} + +\item{fun}{An atomic function to compute a calculation over a period.} + +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +A multidimensional array with named dimensions containing the +indicator in the element \code{data}. +} +\description{ +Period Fun computes a calculation of a given variable in a period. +} +\examples{ +data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +SA <- PeriodFun(data, fun = mean, dates = Dates, start = list(01, 12), + end = list(01, 01)) + +} diff --git a/man/PeriodMax.Rd b/man/PeriodMax.Rd new file mode 100644 index 0000000..26e62a4 --- /dev/null +++ b/man/PeriodMax.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodMax.R +\name{PeriodMax} +\alias{PeriodMax} +\title{Period max on multidimensional array objects} +\usage{ +PeriodMax( + data, + dates = NULL, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A multidimensional array with named dimensions.} + +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +A multidimensional array with named dimensions containing the +indicator in the element \code{data}. +} +\description{ +Period max computes the average (max) of a given variable in a period. +Two bioclimatic indicators can be obtained by using this function: +\itemize{ + \item\code{BIO5}{(Providing temperature data) Max Temperature of Warmest + Month. The maximum monthly temperature occurrence over a + given year (time-series) or averaged span of years + (normal).} + \item\code{BIO13}{(Providing precipitation data) Precipitation of Wettest + Month. This index identifies the total precipitation + that prevails during the wettest month.} +} +} +\examples{ +data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +res <- PeriodMax(data, dates = Dates, start = list(01, 12), end = list(01, 01)) + +} diff --git a/man/PeriodMin.Rd b/man/PeriodMin.Rd new file mode 100644 index 0000000..72d6c78 --- /dev/null +++ b/man/PeriodMin.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodMin.R +\name{PeriodMin} +\alias{PeriodMin} +\title{Period Min on multidimensional array objects} +\usage{ +PeriodMin( + data, + dates = NULL, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A multidimensional array with named dimensions.} + +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +A multidimensional array with named dimensions containing the +indicator in the element \code{data}. +} +\description{ +Period Min computes the average (min) of a given variable in a period. +Two bioclimatic indicators can be obtained by using this function: +\itemize{ + \item\code{BIO6}{(Providing temperature data) Min Temperature of Coldest + Month. The minimum monthly temperature occurrence over a + given year (time-series) or averaged span of years + (normal).} + \item\code{BIO14}{(Providing precipitation data) Precipitation of Driest + Month. This index identifies the total precipitation + that prevails during the driest month.} +} +} +\examples{ +data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +res <- PeriodMin(data, dates = Dates, start = list(01, 12), end = list(01, 01)) + +} diff --git a/man/PeriodVariance.Rd b/man/PeriodVariance.Rd new file mode 100644 index 0000000..be4a243 --- /dev/null +++ b/man/PeriodVariance.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodVariance.R +\name{PeriodVariance} +\alias{PeriodVariance} +\title{Period Variance on multidimensional array objects} +\usage{ +PeriodVariance( + data, + dates = NULL, + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A multidimensional array with named dimensions.} + +\item{dates}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data'. By default it is NULL, to +select aperiod this parameter must be provided.} + +\item{start}{An optional parameter to defined the initial date of the period +to select from the data by providing a list of two elements: the initial +date of the period and the initial month of the period. By default it is set +to NULL and the indicator is computed using all the data provided in +\code{data}.} + +\item{end}{An optional parameter to defined the final date of the period to +select from the data by providing a list of two elements: the final day of +the period and the final month of the period. By default it is set to NULL +and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{A character string indicating the name of the dimension to +compute the indicator. By default, it is set to 'ftime'. More than one +dimension name matching the dimensions provided in the object +\code{data$data} can be specified.} + +\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or +not (FALSE).} + +\item{ncores}{An integer indicating the number of cores to use in parallel +computation.} +} +\value{ +A multidimensional array with named dimensions containing the +indicator in the element \code{data}. +} +\description{ +Period Variance computes the average (var) of a given variable in a period. +Two bioclimatic indicators can be obtained by using this function: +\itemize{ + \item\code{BIO4}{(Providing temperature data) Temperature Seasonality + (Standard Deviation). The amount of temperature variation + over a given year (or averaged years) based on the standard + deviation (variation) of monthly temperature averages. } + \item\code{BIO15}{(Providing precipitation data) Precipitation Seasonality + (CV). This is a measure of the variation in + monthly precipitation totals over the course of the year. + This index is the ratio of the standard deviation of the + monthly total precipitation to the mean monthly total + precipitation (also known as the coefficient of variation) + and is expressed as a percentage} +} +} +\examples{ +data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), + seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), + as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) +dim(Dates) <- c(sdate = 4, ftime = 3) +res <- PeriodVariance(data, dates = Dates, start = list(01, 12), end = list(01, 01)) + +} -- GitLab From 3adf7275da5a2eb200dea5a9c210606e4f330295 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 21 Jun 2023 18:08:24 +0200 Subject: [PATCH 26/87] Include publication in README and DESCRIPTION --- DESCRIPTION | 4 ++-- README.md | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4151ba4..a05a3b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ Description: Set of generalised tools for the flexible computation of climate provided the dimensional structure of the input is maintained. Additionally, the outputs of the functions in this package are compatible with 'CSTools'. This package was developed in the context of H2020 MED-GOLD (776467) and - S2S4E (776787) projects. Lledó et al. (2019) . + S2S4E (776787) projects. Lledó et al. (2019) . Pérez-Zanón et al. (2023) . Depends: R (>= 3.6.0) Imports: @@ -40,4 +40,4 @@ URL: https://earth.bsc.es/gitlab/es/csindicators/ BugReports: https://earth.bsc.es/gitlab/es/csindicators/-/issues Encoding: UTF-8 RoxygenNote: 7.2.0 -Config/testthat/edition: 3 \ No newline at end of file +Config/testthat/edition: 3 diff --git a/README.md b/README.md index 5521a51..6607398 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,11 @@ CSIndicators #### Sectoral Indicators for Climate Services Based on Sub-Seasonal to Decadal Climate Predictions -Set of generalised tools for the flexible computation of climate related indicators defined by the user. Each method represents a specific mathematical approach which is combined with the possibility to select an arbitrary time period to define the indicator. This enables a wide range of possibilities to tailor the most suitable indicator for each particular climate service application (agriculture, food security, energy, water management…). This package is intended for sub-seasonal, seasonal and decadal climate predictions, but its methods are also applicable to other time-scales, provided the dimensional structure of the input is maintained. Additionally, the outputs of the functions in this package are compatible with [CSTools](https://earth.bsc.es/gitlab/external/cstools). +Set of generalised tools for the flexible computation of climate related indicators defined by the user. Each method represents a specific mathematical approach which is combined with the possibility to select an arbitrary time period to define the indicator. This enables a wide range of possibilities to tailor the most suitable indicator for each particular climate service application (agriculture, food security, energy, water management…). This package is intended for sub-seasonal, seasonal and decadal climate predictions, but its methods are also applicable to other time-scales, provided the dimensional structure of the input is maintained. Additionally, the outputs of the functions in this package are compatible with [CSTools](https://earth.bsc.es/gitlab/external/cstools). + +A scientific publication was published in the Climate Services Journal, and it can be cited as follows: + +> Pérez-Zanón, N., Ho, A. Chou, C., Lledó, L., Marcos-Matamoros, R., Rifà, E. and González-Reviriego, N. (2023). CSIndicators: Get tailored climate indicators for applications in your sector. Climate Services. https://doi.org/10.1016/j.cliser.2023.100393 Installation ------------ -- GitLab From 797d8eedabff618e5c5966b4c6af664aae37f56c Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 21 Jun 2023 18:10:29 +0200 Subject: [PATCH 27/87] Improve description with a new space --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a05a3b5..77a2156 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,9 @@ Description: Set of generalised tools for the flexible computation of climate provided the dimensional structure of the input is maintained. Additionally, the outputs of the functions in this package are compatible with 'CSTools'. This package was developed in the context of H2020 MED-GOLD (776467) and - S2S4E (776787) projects. Lledó et al. (2019) . Pérez-Zanón et al. (2023) . + S2S4E (776787) projects. + Lledó et al. (2019) . + Pérez-Zanón et al. (2023) . Depends: R (>= 3.6.0) Imports: -- GitLab From 710f49ed8e21dc0510873066468c50eb0c5e4a88 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 28 Jun 2023 13:17:12 +0200 Subject: [PATCH 28/87] Correct parameter 'na.rm' in Standardization call, it was not included; corrected documentation for 'params'; added check for 'params' dimensions --- R/PeriodSPEI.R | 67 +++++++++++++++++--------------- man/CST_PeriodSPEI.Rd | 11 +++--- tests/testthat/test-PeriodSPEI.R | 24 +++++++++++- 3 files changed, 64 insertions(+), 38 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 077137c..f947531 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -39,11 +39,12 @@ #' used as starting and end points. #'@param params A multidimensional array with named dimensions for computing the #' SPEI. This option overrides computation of fitting parameters. It needs -#' to be of same time dimension (specified in 'time_dim') of 'exp' and a -#' dimension named 'coef' with the length of the coefficients needed for the -#' used distribution (for 'Gamma' coef dimension is of lenght 2, for -#' 'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a -#' leadtime dimension (specified in 'leadtime_dim') of length 1. +#' to have the following dimensions: same leadtime dimension of 'exp' +#' (specified in 'leadtime_dim'); time dimension of length 1 (specified in +#' 'time_dim'); and a dimension named 'coef' with the length of the +#' coefficients needed for the used distribution (for 'Gamma' coef dimension is +#' of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). It can't +#' have member dimension (specified in 'memb_dim'). #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the #' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided @@ -617,16 +618,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } - ## params - if (!is.null(params)) { - if (!is.numeric(params)) { - stop("Parameter 'params' must be numeric.") - } - if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { - stop("Parameter 'params' must be a multidimensional array with named ", - "dimensions: 'time_dim', 'leadtime_dim' and 'coef'.") - } - } ## standardization if (!is.logical(standardization)) { @@ -678,9 +669,29 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'ncores' must be a positive integer.") } } - # Data preparation - - # complete dates + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + + # Complete dates dates_monthly <- NULL k = 0 for (dates in .return2list(dates_exp, dates_expcor)) { @@ -736,9 +747,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, handle_infinity = handle_infinity, param_error = param_error, method = method, distribution = distribution, - fit = fit, ncores = ncores) + fit = fit, na.rm = na.rm, ncores = ncores) params <- spei_dat$params - pos <- match(names(dim(data[[1]])), names(dim(spei_dat[[1]]))) spei_dat[[1]] <- aperm(spei_dat[[1]], pos) @@ -972,11 +982,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, # with the values repeated each time_dim and leadtime_dim params <- aperm(params, c(2,3,1)) names(dim(params)) <- c(time_dim, leadtime_dim, 'coef') - } else { - if (dim(params)['coef'] != length(coef)) { - stop(paste0("Params array should have 'coef' dimension of length ", - length(coef), ".")) - } } spei <- Apply(data = list(data = data, params = params), @@ -985,13 +990,11 @@ PeriodSPEI <- function(exp, dates_exp, lat, fun = .standardization, coef = coef, leadtime_dim = leadtime_dim, - time_dim = time_dim, - memb_dim = memb_dim, - handle_infinity = handle_infinity, - cross_validation = cross_validation, - method = method, distribution = distribution, fit = fit, - ref_period = ref_period, param_error = param_error, - na.rm = na.rm, + time_dim = time_dim, memb_dim = memb_dim, + ref_period = ref_period, handle_infinity = handle_infinity, + cross_validation = cross_validation, param_error = param_error, + method = method, distribution = distribution, + fit = fit, na.rm = na.rm, output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 2a7aaa6..8e8ab19 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -66,11 +66,12 @@ used as starting and end points.} \item{params}{A multidimensional array with named dimensions for computing the SPEI. This option overrides computation of fitting parameters. It needs -to be of same time dimension (specified in 'time_dim') of 'exp' and a -dimension named 'coef' with the length of the coefficients needed for the -used distribution (for 'Gamma' coef dimension is of lenght 2, for -'log-Logistic' or 'PearsonIII' is of length 3). It also needs to have a -leadtime dimension (specified in 'leadtime_dim') of length 1.} +to have the following dimensions: same leadtime dimension of 'exp' +(specified in 'leadtime_dim'); time dimension of length 1 (specified in +'time_dim'); and a dimension named 'coef' with the length of the +coefficients needed for the used distribution (for 'Gamma' coef dimension is +of lenght 2, for 'log-Logistic' or 'PearsonIII' is of length 3). It can't +have member dimension (specified in 'memb_dim').} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 85a1427..c6b8075 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -34,6 +34,8 @@ lat <- c(40,40.1) exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) exp_cor1 <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, 'prlr' = expcor_prlr) +params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, + longitude = 1, coef = 3)) # dat2 dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) @@ -182,6 +184,8 @@ test_that("2. Output checks", { dates_exp = dates_exp) res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp, standardization = FALSE) + res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = params1) # output dims expect_equal( names(res1), @@ -223,8 +227,25 @@ test_that("2. Output checks", { # memb_dim # lat_dim # accum + res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, + dates_exp = dates_exp, na.rm = TRUE) + expect_equal( + res11$exp[1,2,1,1,][1:4], + c(-0.5553128, 0.4689562, -0.4682003, -0.9956847), + tolerance = 0.0001 + ) # ref_period # params + expect_error( + PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = array(abs(rnorm(100)), dim = dimscor)), + paste0("Parameter 'params' must be a multidimensional array with named ", + "dimensions: 'syear', 'time' and 'coef'.") + ) + expect_equal( + dim(res5$params), + c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) + ) # standarization # cross_validation expect_warning( @@ -245,7 +266,7 @@ test_that("2. Output checks", { ) expect_equal( dim(res_crossval_T$params), - dims + c(syear = 6, time = 3, coef = 3, latitude = 2, longitude = 1) ) # cross_validation = FALSE expect_equal( @@ -286,6 +307,7 @@ test_that("2. Output checks", { res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp, handle_infinity = FALSE) # na.rm - + # ncores }) -- GitLab From 4a8dd49ac07d1bb54b23d404b023179b5c4b2130 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 29 Jun 2023 14:23:44 +0200 Subject: [PATCH 29/87] Add in documentation that in result, the Dates correspond to the beginning of the aggregated time period --- R/AccumulationExceedingThreshold.R | 10 ++++++---- R/PeriodAccumulation.R | 13 +++++++------ R/PeriodMean.R | 11 ++++++----- R/TotalSpellTimeExceedingThreshold.R | 9 +++++---- R/TotalTimeExceedingThreshold.R | 11 ++++++----- man/CST_AccumulationExceedingThreshold.Rd | 10 ++++++---- man/CST_PeriodAccumulation.Rd | 13 +++++++------ man/CST_PeriodMean.Rd | 11 ++++++----- man/CST_TotalSpellTimeExceedingThreshold.Rd | 9 +++++---- man/CST_TotalTimeExceedingThreshold.Rd | 11 ++++++----- 10 files changed, 60 insertions(+), 48 deletions(-) diff --git a/R/AccumulationExceedingThreshold.R b/R/AccumulationExceedingThreshold.R index 636b211..083b76c 100644 --- a/R/AccumulationExceedingThreshold.R +++ b/R/AccumulationExceedingThreshold.R @@ -49,10 +49,12 @@ #' #'@return An 's2dv_cube' object containing the aggregated values in the element #'\code{data} with dimensions of the input parameter 'data' except the dimension -#'where the indicator has been computed. A new element called 'time_bounds' will -#'be added into the 'attrs' element in the 's2dv_cube' object. It consists of a -#'list containing two elements, the start and end dates of the aggregated period -#'with the same dimensions of 'Dates' element. +#'where the indicator has been computed. The 'Dates' array is updated to +#'the dates corresponding to the beginning of the aggregated time period. A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. #' #'@examples #'exp <- NULL diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 965534f..1a8fea0 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -30,13 +30,14 @@ #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return A 's2dv_cube' object containing the indicator in the element +#'@return An 's2dv_cube' object containing the indicator in the element #'\code{data} with dimensions of the input parameter 'data' except the dimension -#'where the accumulation has been computed (specified with 'time_dim'). A new -#'element called 'time_bounds' will be added into the 'attrs' element in the -#''s2dv_cube' object. It consists of a list containing two elements, the start -#'and end dates of the aggregated period with the same dimensions of 'Dates' -#'element. +#'where the accumulation has been computed (specified with 'time_dim'). The +#''Dates' array is updated to the dates corresponding to the beginning of the +#'aggregated time period. A new element called 'time_bounds' will be added into +#'the 'attrs' element in the 's2dv_cube' object. It consists of a list +#'containing two elements, the start and end dates of the aggregated period with +#'the same dimensions of 'Dates' element. #' #'@examples #'exp <- NULL diff --git a/R/PeriodMean.R b/R/PeriodMean.R index 2a85ceb..f848fda 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -32,11 +32,12 @@ #' #'@return An 's2dv_cube' object containing the indicator in the element #'\code{data} with dimensions of the input parameter 'data' except the -#'dimension where the mean has been computed (specified with 'time_dim'). A new -#'element called 'time_bounds' will be added into the 'attrs' element in the -#''s2dv_cube' object. It consists of a list containing two elements, the start -#'and end dates of the aggregated period with the same dimensions of 'Dates' -#'element. +#'dimension where the mean has been computed (specified with 'time_dim'). The +#''Dates' array is updated to the dates corresponding to the beginning of the +#'aggregated time period. A new element called 'time_bounds' will be added into +#'the 'attrs' element in the 's2dv_cube' object. It consists of a list +#'containing two elements, the start and end dates of the aggregated period with +#'the same dimensions of 'Dates' element. #' #'@examples #'exp <- NULL diff --git a/R/TotalSpellTimeExceedingThreshold.R b/R/TotalSpellTimeExceedingThreshold.R index 5cf2e1d..38a3ff4 100644 --- a/R/TotalSpellTimeExceedingThreshold.R +++ b/R/TotalSpellTimeExceedingThreshold.R @@ -50,10 +50,11 @@ #'@return An 's2dv_cube' object containing the number of days that are part of a #'spell within a threshold in element \code{data} with dimensions of the input #'parameter 'data' except the dimension where the indicator has been computed. -#'A new element called 'time_bounds' will be added into the 'attrs' element in -#'the 's2dv_cube' object. It consists of a list containing two elements, the -#'start and end dates of the aggregated period with the same dimensions of -#''Dates' element. +#'The 'Dates' array is updated to the dates corresponding to the beginning of +#'the aggregated time period. A new element called 'time_bounds' will be added +#'into the 'attrs' element in the 's2dv_cube' object. It consists of a list +#'containing two elements, the start and end dates of the aggregated period with +#'the same dimensions of 'Dates' element. #' #'@examples #'exp <- NULL diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 8a8b7ed..94d2c53 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -55,11 +55,12 @@ #'@return An 's2dv_cube' object containing in element \code{data} the total #'number of the corresponding units of the data frequency that a variable is #'exceeding a threshold during a period with dimensions of the input parameter -#''data' except the dimension where the indicator has been computed. A new -#'element called 'time_bounds' will be added into the 'attrs' element in the -#''s2dv_cube' object. It consists of a list containing two elements, the start -#'and end dates of the aggregated period with the same dimensions of 'Dates' -#'element. +#''data' except the dimension where the indicator has been computed. The +#''Dates' array is updated to the dates corresponding to the beginning of the +#'aggregated time period. A new element called 'time_bounds' will be added into +#'the 'attrs' element in the 's2dv_cube' object. It consists of a list +#'containing two elements, the start and end dates of the aggregated period with +#'the same dimensions of 'Dates' element. #' #'@examples #'exp <- NULL diff --git a/man/CST_AccumulationExceedingThreshold.Rd b/man/CST_AccumulationExceedingThreshold.Rd index f14c33b..ff02e4c 100644 --- a/man/CST_AccumulationExceedingThreshold.Rd +++ b/man/CST_AccumulationExceedingThreshold.Rd @@ -63,10 +63,12 @@ computation.} \value{ An 's2dv_cube' object containing the aggregated values in the element \code{data} with dimensions of the input parameter 'data' except the dimension -where the indicator has been computed. A new element called 'time_bounds' will -be added into the 'attrs' element in the 's2dv_cube' object. It consists of a -list containing two elements, the start and end dates of the aggregated period -with the same dimensions of 'Dates' element. +where the indicator has been computed. The 'Dates' array is updated to +the dates corresponding to the beginning of the aggregated time period. A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. } \description{ The accumulation (sum) of a variable in the days (or time steps) that the diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 0878399..77f4a38 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -40,13 +40,14 @@ not (FALSE).} computation.} } \value{ -A 's2dv_cube' object containing the indicator in the element +An 's2dv_cube' object containing the indicator in the element \code{data} with dimensions of the input parameter 'data' except the dimension -where the accumulation has been computed (specified with 'time_dim'). A new -element called 'time_bounds' will be added into the 'attrs' element in the -'s2dv_cube' object. It consists of a list containing two elements, the start -and end dates of the aggregated period with the same dimensions of 'Dates' -element. +where the accumulation has been computed (specified with 'time_dim'). The +'Dates' array is updated to the dates corresponding to the beginning of the +aggregated time period. A new element called 'time_bounds' will be added into +the 'attrs' element in the 's2dv_cube' object. It consists of a list +containing two elements, the start and end dates of the aggregated period with +the same dimensions of 'Dates' element. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a diff --git a/man/CST_PeriodMean.Rd b/man/CST_PeriodMean.Rd index 025acfd..0aa4aa3 100644 --- a/man/CST_PeriodMean.Rd +++ b/man/CST_PeriodMean.Rd @@ -42,11 +42,12 @@ computation.} \value{ An 's2dv_cube' object containing the indicator in the element \code{data} with dimensions of the input parameter 'data' except the -dimension where the mean has been computed (specified with 'time_dim'). A new -element called 'time_bounds' will be added into the 'attrs' element in the -'s2dv_cube' object. It consists of a list containing two elements, the start -and end dates of the aggregated period with the same dimensions of 'Dates' -element. +dimension where the mean has been computed (specified with 'time_dim'). The +'Dates' array is updated to the dates corresponding to the beginning of the +aggregated time period. A new element called 'time_bounds' will be added into +the 'attrs' element in the 's2dv_cube' object. It consists of a list +containing two elements, the start and end dates of the aggregated period with +the same dimensions of 'Dates' element. } \description{ Period Mean computes the average (mean) of a given variable in a period. diff --git a/man/CST_TotalSpellTimeExceedingThreshold.Rd b/man/CST_TotalSpellTimeExceedingThreshold.Rd index 69dee16..0715414 100644 --- a/man/CST_TotalSpellTimeExceedingThreshold.Rd +++ b/man/CST_TotalSpellTimeExceedingThreshold.Rd @@ -58,10 +58,11 @@ computation.} An 's2dv_cube' object containing the number of days that are part of a spell within a threshold in element \code{data} with dimensions of the input parameter 'data' except the dimension where the indicator has been computed. -A new element called 'time_bounds' will be added into the 'attrs' element in -the 's2dv_cube' object. It consists of a list containing two elements, the -start and end dates of the aggregated period with the same dimensions of -'Dates' element. +The 'Dates' array is updated to the dates corresponding to the beginning of +the aggregated time period. A new element called 'time_bounds' will be added +into the 'attrs' element in the 's2dv_cube' object. It consists of a list +containing two elements, the start and end dates of the aggregated period with +the same dimensions of 'Dates' element. } \description{ The number of days (when daily data is provided) that are part of a spell diff --git a/man/CST_TotalTimeExceedingThreshold.Rd b/man/CST_TotalTimeExceedingThreshold.Rd index 2449014..4a1f736 100644 --- a/man/CST_TotalTimeExceedingThreshold.Rd +++ b/man/CST_TotalTimeExceedingThreshold.Rd @@ -59,11 +59,12 @@ computation.} An 's2dv_cube' object containing in element \code{data} the total number of the corresponding units of the data frequency that a variable is exceeding a threshold during a period with dimensions of the input parameter -'data' except the dimension where the indicator has been computed. A new -element called 'time_bounds' will be added into the 'attrs' element in the -'s2dv_cube' object. It consists of a list containing two elements, the start -and end dates of the aggregated period with the same dimensions of 'Dates' -element. +'data' except the dimension where the indicator has been computed. The +'Dates' array is updated to the dates corresponding to the beginning of the +aggregated time period. A new element called 'time_bounds' will be added into +the 'attrs' element in the 's2dv_cube' object. It consists of a list +containing two elements, the start and end dates of the aggregated period with +the same dimensions of 'Dates' element. } \description{ The Total Time of a variable exceeding (or not) a Threshold. It returns the -- GitLab From 1096e69c87fd71b09b914de90dd9d67882dbf344 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 29 Jun 2023 15:13:05 +0200 Subject: [PATCH 30/87] Undo changes in default value of time_dim; CST functions have ftime, simplified functions have time --- R/PeriodAccumulation.R | 2 +- R/PeriodMean.R | 4 ++-- R/QThreshold.R | 4 ++-- R/Threshold.R | 4 ++-- R/WindCapacityFactor.R | 4 ++-- R/WindPowerDensity.R | 4 ++-- man/PeriodAccumulation.Rd | 2 +- man/PeriodMean.Rd | 4 ++-- man/QThreshold.Rd | 4 ++-- man/Threshold.Rd | 4 ++-- man/WindCapacityFactor.Rd | 4 ++-- man/WindPowerDensity.Rd | 4 ++-- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 1a8fea0..3b0d33d 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -169,7 +169,7 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'@import multiApply #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'ftime', na.rm = FALSE, + time_dim = 'time', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") diff --git a/R/PeriodMean.R b/R/PeriodMean.R index f848fda..abc585a 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -135,7 +135,7 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, #' the period and the final month of the period. By default it is set to NULL #' and the indicator is computed using all the data provided in \code{data}. #'@param time_dim A character string indicating the name of the dimension to -#' compute the indicator. By default, it is set to 'ftime'. More than one +#' compute the indicator. By default, it is set to 'time'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or @@ -162,7 +162,7 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, #'@import multiApply #'@export PeriodMean <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { + time_dim = 'time', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") diff --git a/R/QThreshold.R b/R/QThreshold.R index c39a14b..c5089df 100644 --- a/R/QThreshold.R +++ b/R/QThreshold.R @@ -145,7 +145,7 @@ CST_QThreshold <- function(data, threshold, start = NULL, end = NULL, #' the period and the final month of the period. By default it is set to NULL #' and the indicator is computed using all the data provided in \code{data}. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name +#' dimension. By default, it is set to 'time'. More than one dimension name #' matching the dimensions provided in the object \code{data$data} can be #' specified. This dimension is required to subset the data in a requested #' period. @@ -178,7 +178,7 @@ CST_QThreshold <- function(data, threshold, start = NULL, end = NULL, #'@import multiApply #'@export QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, - time_dim = 'ftime', memb_dim = 'member', + time_dim = 'time', memb_dim = 'member', sdate_dim = 'sdate', ncores = NULL) { # Initial checks ## data diff --git a/R/Threshold.R b/R/Threshold.R index b4e9638..73e3715 100644 --- a/R/Threshold.R +++ b/R/Threshold.R @@ -107,7 +107,7 @@ CST_Threshold <- function(data, threshold, start = NULL, end = NULL, #' the period and the final month of the period. By default it is set to NULL #' and the indicator is computed using all the data provided in \code{data}. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name +#' dimension. By default, it is set to 'time'. More than one dimension name #' matching the dimensions provided in the object \code{data$data} can be #' specified. This dimension is required to subset the data in a requested #' period. @@ -136,7 +136,7 @@ CST_Threshold <- function(data, threshold, start = NULL, end = NULL, #'@importFrom stats quantile #'@export Threshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, - time_dim = 'ftime', memb_dim = 'member', sdate_dim = 'sdate', + time_dim = 'time', memb_dim = 'member', sdate_dim = 'sdate', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") diff --git a/R/WindCapacityFactor.R b/R/WindCapacityFactor.R index 7f32139..760dba0 100644 --- a/R/WindCapacityFactor.R +++ b/R/WindCapacityFactor.R @@ -135,7 +135,7 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II #' the period and the final month of the period. By default it is set to NULL #' and the indicator is computed using all the data provided in \code{data}. #'@param time_dim A character string indicating the name of the dimension to -#' compute the indicator. By default, it is set to 'ftime'. More than one +#' compute the indicator. By default, it is set to 'time'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. #'@param ncores An integer indicating the number of cores to use in parallel @@ -164,7 +164,7 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II #'@export WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/III", "III"), dates = NULL, start = NULL, end = NULL, - time_dim = 'ftime', ncores = NULL) { + time_dim = 'time', ncores = NULL) { IEC_class <- match.arg(IEC_class) pc_files <- c( "I" = "Enercon_E70_2.3MW.txt", diff --git a/R/WindPowerDensity.R b/R/WindPowerDensity.R index 181e509..3eba59a 100644 --- a/R/WindPowerDensity.R +++ b/R/WindPowerDensity.R @@ -111,7 +111,7 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, #' the period and the final month of the period. By default it is set to NULL #' and the indicator is computed using all the data provided in \code{data}. #'@param time_dim A character string indicating the name of the dimension to -#' compute the indicator. By default, it is set to 'ftime'. More than one +#' compute the indicator. By default, it is set to 'time'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. #'@param ncores An integer indicating the number of cores to use in parallel @@ -135,7 +135,7 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, #' #'@export WindPowerDensity <- function(wind, ro = 1.225, dates = NULL, start = NULL, - end = NULL, time_dim = 'ftime', ncores = NULL) { + end = NULL, time_dim = 'time', ncores = NULL) { if (!is.null(start) && !is.null(end)) { if (is.null(dates)) { diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 0260648..614b65c 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -9,7 +9,7 @@ PeriodAccumulation( dates = NULL, start = NULL, end = NULL, - time_dim = "ftime", + time_dim = "time", na.rm = FALSE, ncores = NULL ) diff --git a/man/PeriodMean.Rd b/man/PeriodMean.Rd index d02a0ab..cd1fcef 100644 --- a/man/PeriodMean.Rd +++ b/man/PeriodMean.Rd @@ -9,7 +9,7 @@ PeriodMean( dates = NULL, start = NULL, end = NULL, - time_dim = "ftime", + time_dim = "time", na.rm = FALSE, ncores = NULL ) @@ -33,7 +33,7 @@ the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{time_dim}{A character string indicating the name of the dimension to -compute the indicator. By default, it is set to 'ftime'. More than one +compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} diff --git a/man/QThreshold.Rd b/man/QThreshold.Rd index ba023d8..efc48cf 100644 --- a/man/QThreshold.Rd +++ b/man/QThreshold.Rd @@ -10,7 +10,7 @@ QThreshold( dates = NULL, start = NULL, end = NULL, - time_dim = "ftime", + time_dim = "time", memb_dim = "member", sdate_dim = "sdate", ncores = NULL @@ -39,7 +39,7 @@ the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name +dimension. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} diff --git a/man/Threshold.Rd b/man/Threshold.Rd index d254cbe..dc9d2a2 100644 --- a/man/Threshold.Rd +++ b/man/Threshold.Rd @@ -10,7 +10,7 @@ Threshold( dates = NULL, start = NULL, end = NULL, - time_dim = "ftime", + time_dim = "time", memb_dim = "member", sdate_dim = "sdate", na.rm = FALSE, @@ -39,7 +39,7 @@ the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name +dimension. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} diff --git a/man/WindCapacityFactor.Rd b/man/WindCapacityFactor.Rd index 3ddeec6..a0a7ce5 100644 --- a/man/WindCapacityFactor.Rd +++ b/man/WindCapacityFactor.Rd @@ -10,7 +10,7 @@ WindCapacityFactor( dates = NULL, start = NULL, end = NULL, - time_dim = "ftime", + time_dim = "time", ncores = NULL ) } @@ -41,7 +41,7 @@ the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{time_dim}{A character string indicating the name of the dimension to -compute the indicator. By default, it is set to 'ftime'. More than one +compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} diff --git a/man/WindPowerDensity.Rd b/man/WindPowerDensity.Rd index 9b935bc..8b72009 100644 --- a/man/WindPowerDensity.Rd +++ b/man/WindPowerDensity.Rd @@ -10,7 +10,7 @@ WindPowerDensity( dates = NULL, start = NULL, end = NULL, - time_dim = "ftime", + time_dim = "time", ncores = NULL ) } @@ -38,7 +38,7 @@ the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{time_dim}{A character string indicating the name of the dimension to -compute the indicator. By default, it is set to 'ftime'. More than one +compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} -- GitLab From 6f95d8e143cbf58ee7db64f2a3082163cb95096a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 5 Jul 2023 17:38:31 +0200 Subject: [PATCH 31/87] Develop SPEI in order that NA from accumulation are not used to compute standardization, then added again NA --- R/PeriodSPEI.R | 81 +++++++++++---- tests/testthat/test-PeriodSPEI.R | 172 ++++++++++++++++++++++++------- 2 files changed, 194 insertions(+), 59 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index f947531..8a0b7e2 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -410,9 +410,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, first_dims <- dims[[1]] all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) if (!all_equal) { - stop("Parameter 'exp' variables needs to have the same dimension names.") + stop("Parameter 'exp' variables need to have the same dimensions.") } + ## exp_cor if (!is.null(exp_cor)) { if (!inherits(exp_cor, 'list')) { @@ -429,7 +430,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, first_dimscor <- dimscor[[1]] all_equal <- all(sapply(dimscor[-1], function(x) identical(first_dimscor, x))) if (!all_equal) { - stop("Parameter 'exp_cor' variables needs to have the same dimension names.") + stop("Parameter 'exp_cor' variables need to have the same dimensions.") } } @@ -437,14 +438,20 @@ PeriodSPEI <- function(exp, dates_exp, lat, if (!is.numeric(lat)) { stop("Parameter 'lat' must be numeric.") } + if (!lat_dim %in% names(dims[[1]])) { + stop("Parameter 'exp' must have 'lat_dim' dimension.") + } if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { stop("Parameter 'lat' needs to have the same length of latitudinal", "dimension of all the variables arrays in 'exp'.") } if (!is.null(exp_cor)) { + if (!lat_dim %in% names(dimscor[[1]])) { + stop("Parameter 'exp_cor' must have 'lat_dim' dimension.") + } if (any(sapply(dimscor, FUN = function(x) x[lat_dim] != length(lat)))) { stop("Parameter 'lat' needs to have the same length of latitudinal", - "dimension of all the variables arrays in 'exp'.") + "dimension of all the variables arrays in 'exp'.") } } @@ -562,7 +569,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, } ## memb_dim if (!is.character(memb_dim) | length(memb_dim) != 1) { - stop("Parameter 'time_dim' must be a character string.") + stop("Parameter 'memb_dim' must be a character string.") } if (!all(sapply(exp, function(x) memb_dim %in% names(dim(x))))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") @@ -592,6 +599,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'dates_exp' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } + if (!time_dim %in% names(dim(dates_exp)) | !leadtime_dim %in% names(dim(dates_exp))) { + stop("Parameter 'dates_exp' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } if (!all(dim(exp[[1]])[c(time_dim, leadtime_dim)] == dim(dates_exp)[c(time_dim, leadtime_dim)])) { stop("Parameter 'dates_exp' needs to have the same length as 'time_dim' ", @@ -607,6 +618,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Element 'Dates' in 'exp_cor' is not of the correct class, ", "only 'Date' and 'POSIXct' classes are accepted.") } + if (!time_dim %in% names(dim(dates_expcor)) | !leadtime_dim %in% names(dim(dates_expcor))) { + stop("Parameter 'dates_expcor' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } if (!all(dim(exp_cor[[1]])[c(time_dim, leadtime_dim)] == dim(dates_expcor)[c(time_dim, leadtime_dim)])) { stop("Parameter 'dates_expcor' needs to have the same length as ", @@ -738,21 +753,21 @@ PeriodSPEI <- function(exp, dates_exp, lat, ncores = ncores) # Standardization if (standardization) { - spei_dat <- .Standardization(data = data_accum, params = params, - accum = accum, time_dim = time_dim, - leadtime_dim = leadtime_dim, - memb_dim = memb_dim, - ref_period = ref_period, - cross_validation = cross_validation, - handle_infinity = handle_infinity, - param_error = param_error, - method = method, distribution = distribution, - fit = fit, na.rm = na.rm, ncores = ncores) - params <- spei_dat$params - pos <- match(names(dim(data[[1]])), names(dim(spei_dat[[1]]))) - spei_dat[[1]] <- aperm(spei_dat[[1]], pos) - - spei_res[[k]] <- spei_dat[[1]] + spei <- .Standardization(data = data_accum, params = params, + accum = accum, time_dim = time_dim, + leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + cross_validation = cross_validation, + handle_infinity = handle_infinity, + param_error = param_error, + method = method, distribution = distribution, + fit = fit, na.rm = na.rm, ncores = ncores) + params <- spei$params + pos <- match(names(dim(data[[1]])), names(dim(spei[[1]]))) + spei[[1]] <- aperm(spei[[1]], pos) + + spei_res[[k]] <- spei[[1]] } else { pos <- match(names(dim(data[[1]])), names(dim(data_accum))) data_accum <- aperm(data_accum, pos) @@ -961,6 +976,18 @@ PeriodSPEI <- function(exp, dates_exp, lat, method = 'parametric', distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = FALSE, ncores = NULL) { + # Remove NA data due to accumulation + if (accum > 1) { + data <- ClimProjDiags::Subset(x = data, along = leadtime_dim, + indices = accum:dim(data)[leadtime_dim], + drop = FALSE) + if (!is.null(params)) { + params <- ClimProjDiags::Subset(x = params, along = leadtime_dim, + indices = accum:dim(params)[leadtime_dim], + drop = FALSE) + } + } + nleadtime <- dim(data)[leadtime_dim] ntime <- dim(data)[time_dim] @@ -995,11 +1022,23 @@ PeriodSPEI <- function(exp, dates_exp, lat, cross_validation = cross_validation, param_error = param_error, method = method, distribution = distribution, fit = fit, na.rm = na.rm, - output_dims = list(data_spei = c(leadtime_dim, time_dim, memb_dim), + output_dims = list(spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) - return(spei) # spei is a list of data_spei and params + # Add again NA values due to accumulation + if (accum > 1) { + for (s in 1:length(spei)) { + spei[[s]] <- Apply(data = list(spei[[s]]), target_dims = leadtime_dim, + fun = function(x, accum = 1, leadtime_dim = 'time') { + res <- c(rep(NA, accum-1), x) + dim(res) <- length(res) + names(dim(res)) <- leadtime_dim + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 + } + } + return(spei) } .standardization <- function(data, params, coef, leadtime_dim = 'time', diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index c6b8075..4e42859 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -4,6 +4,19 @@ cube1 <- NULL cube1$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) class(cube1) <- 's2dv_cube' +# cube2 +cube2 <- NULL +cube2$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube2) <- 's2dv_cube' +cube2$coords <- list(lat = 1:2) + +# cube3 +cube3 <- NULL +cube3$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube3) <- 's2dv_cube' +cube3$coords <- list(lat = 1:2) +cube3$attrs$Dates <- as.Date(c(paste0(2010:2014, "-08-16"))) + # dat1 dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) @@ -23,7 +36,7 @@ set.seed(3) expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) + paste0(2010:2015, "-10-16"))) dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), @@ -38,26 +51,26 @@ params1 <- array(abs(rnorm(100)), dim = c(syear = 1, time = 3, latitude = 2, longitude = 1, coef = 3)) # dat2 -dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) -dimscor <- c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) +dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) +dimscor2 <- c(styear = 1, ftime = 3, lat = 2, lon = 1, member = 15) set.seed(1) -exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims) +exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims2) set.seed(2) -exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims2) set.seed(1) -expcor_tas <- array(rnorm(100, 17.23, 9.19), dim = dimscor) +expcor_tas <- array(rnorm(100, 17.23, 9.19), dim = dimscor2) set.seed(2) -expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) +expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor2) -dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), +dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), paste0(2010:2015, "-10-16"))) -dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) -dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), +dates_expcor2 <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), paste0(2020, "-10-16"))) -dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) +dim(dates_expcor2) <- c(sday = 1, sweek = 1, styear = 1, ftime = 3) lat <- c(40,40.1) @@ -67,6 +80,7 @@ exp_cor2 <- list('tas' = expcor_tas, 'prlr' = expcor_prlr) ############################################## test_that("1. Initial checks CST_PeriodSPEI", { + # Check 's2dv_cube' expect_error( CST_PeriodSPEI(exp = NULL), "Parameter 'exp' cannot be NULL." @@ -75,11 +89,25 @@ test_that("1. Initial checks CST_PeriodSPEI", { CST_PeriodSPEI(exp = array(10)), "Parameter 'exp' must be a list of 's2dv_cube' class." ) + # latitude expect_error( CST_PeriodSPEI(exp = list(cube1)), paste0("Spatial coordinate names of parameter 'exp' do not match any ", "of the names accepted by the package.") ) + # Dates + expect_error( + CST_PeriodSPEI(exp = list(cube2)), + paste0("Element 'Dates' is not found in 'attrs' list of 'exp'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + ) + expect_error( + CST_PeriodSPEI(exp = list(cube3), exp_cor = list(cube2)), + paste0("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + ) }) ############################################## @@ -95,9 +123,19 @@ test_that("1. Initial checks PeriodSPEI", { "Parameter 'exp' needs to be a named list with the variable names." ) expect_error( - PeriodSPEI(exp = list('tasmax' = array(10))), + PeriodSPEI(exp = list(tasmax = array(10))), "Parameter 'exp' needs to be a list of arrays with dimension names." ) + expect_error( + PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 11)))), + "Parameter 'exp' variables need to have the same dimensions." + ) + expect_error( + PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(ftime = 10)))), + "Parameter 'exp' variables need to have the same dimensions." + ) # exp_cor expect_error( PeriodSPEI(exp = exp1, exp_cor = 1), @@ -112,6 +150,20 @@ test_that("1. Initial checks PeriodSPEI", { PeriodSPEI(exp = exp1, exp_cor = list('tasmax' = array(10))), "Parameter 'exp_cor' needs to be a list of arrays with dimension names." ) + expect_error( + PeriodSPEI(exp = exp1, exp_cor = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 11)))), + "Parameter 'exp_cor' variables need to have the same dimensions." + ) + expect_error( + PeriodSPEI(exp = exp1, lat = 'lat'), + "Parameter 'lat' must be numeric." + ) + expect_error( + PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 10))), lat = 1:2), + "Parameter 'exp' must have 'lat_dim' dimension." + ) # exp (2) expect_warning( PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), @@ -120,55 +172,95 @@ test_that("1. Initial checks PeriodSPEI", { ) # time_dim expect_error( - PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + lat_dim = 'lat', pet_method = 'thornthwaite'), + "Parameter 'time_dim' is not found in 'exp' dimension." ) # leadtime_dim expect_error( - PeriodSPEI(exp = exp1, time_dim = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, leadtime_dim = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'leadtime_dim' must be a character string." + ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + lat_dim = 'lat', time_dim = 'ftime', pet_method = 'thornthwaite'), + "Parameter 'leadtime_dim' is not found in 'exp' dimension." ) # memb_dim expect_error( - PeriodSPEI(exp = exp1, memb_dim = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, memb_dim = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + lat_dim = 'lat', time_dim = 'ftime', leadtime_dim = 'styear', + pet_method = 'thornthwaite'), + "Parameter 'memb_dim' is not found in 'exp' dimension." ) # lat_dim expect_error( PeriodSPEI(exp = exp1, lat_dim = 1, dates_exp = dates_exp, lat = lat) ) + expect_error( + PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, + dates_exp = dates_exp2, dates_expcor = dates_expcor2), + "Parameter 'exp' must have 'lat_dim' dimension." + ) # accum expect_error( - PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat), + "Cannot compute accumulation of 10 months because loaded data has only 3 months." ) # standardization expect_error( - PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat), + "Parameter 'standardization' must be a logical value." ) # param_error expect_error( - PeriodSPEI(exp = exp1, param_error = TRUE, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, param_error = TRUE, dates_exp = dates_exp, lat = lat), + "Parameter 'param_error' must be a numeric value." ) # handle_infinity expect_error( - PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, handle_infinity = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'handle_infinity' must be a logical value." ) # cross_validation expect_error( - PeriodSPEI(exp = exp1, cross_validation = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, cross_validation = 1, dates_exp = dates_exp, lat = lat), + "Parameter 'cross_validation' must be a logical value." ) # method expect_error( - PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, method = 1, dates_exp = dates_exp, lat = lat), + paste0("Parameter 'method' must be a character string containing one ", + "of the following methods: 'parametric' or 'non-parametric'.") ) # distribution expect_error( - PeriodSPEI(exp = exp1, distribution = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, distribution = 1, dates_exp = dates_exp, lat = lat), + paste0("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") ) # fit expect_error( - PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat), + paste0("Parameter 'fit' must be a character string containing one of ", + "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") ) # ncores expect_error( - PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat) + PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat), + "Parameter 'ncores' must be a positive integer." ) }) @@ -182,10 +274,6 @@ test_that("2. Output checks", { standardization = FALSE) res3 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp) - res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, standardization = FALSE) - res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = params1) # output dims expect_equal( names(res1), @@ -199,10 +287,6 @@ test_that("2. Output checks", { names(res3), c('exp', 'params') ) - expect_equal( - names(res4), - c('exp') - ) expect_equal( dim(res1[[1]]), dims @@ -222,10 +306,6 @@ test_that("2. Output checks", { # exp # exp_cor # pet - # time_dim - # leadtime_dim - # memb_dim - # lat_dim # accum res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, dates_exp = dates_exp, na.rm = TRUE) @@ -236,6 +316,8 @@ test_that("2. Output checks", { ) # ref_period # params + res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, + params = params1) expect_error( PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, params = array(abs(rnorm(100)), dim = dimscor)), @@ -247,6 +329,16 @@ test_that("2. Output checks", { c(syear = 1, time = 3, coef = 3, latitude = 2, longitude = 1) ) # standarization + res4 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, + dates_exp = dates_exp, standardization = FALSE) + expect_equal( + names(res4), + c('exp') + ) + expect_equal( + dim(res4$exp), + c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + ) # cross_validation expect_warning( PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, @@ -281,9 +373,13 @@ test_that("2. Output checks", { res6 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, pet_method = c('hargreaves_modified', 'hargreaves')) + + # time_dim, leadtime_dim, memb_dim, lat_dim res7 <- PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor, - pet_method = c('thornthwaite', 'thornthwaite')) + dates_exp = dates_exp2, dates_expcor = dates_expcor2, + pet_method = c('thornthwaite', 'thornthwaite'), + lat_dim = 'lat', time_dim = 'styear', + leadtime_dim = 'ftime', memb_dim = 'member') # method - ok res8 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, -- GitLab From 903fca855b13d9249612ee8cb22b77fa5e0ebd04 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 11 Jul 2023 16:20:35 +0200 Subject: [PATCH 32/87] Add memb_dim to MergeRefToExp; Improve function --- NAMESPACE | 1 + R/MergeRefToExp.R | 243 ++++++++++++++++++++++++++------------- man/CST_MergeRefToExp.Rd | 5 + man/MergeRefToExp.Rd | 13 ++- 4 files changed, 179 insertions(+), 83 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d80accb..8795a86 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,6 +26,7 @@ export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) import(multiApply) +importFrom(s2dv,InsertDim) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 434cae3..534bb55 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -33,6 +33,9 @@ #' specified. This dimension is required to subset the data in a requested period. #'@param sdate_dim A character string indicating the name of the dimension in #' which the initialization dates are stored. +#'@param memb_dim A character string indicating the name of the member +#' dimension. If the data are not ensemble ones, set as NULL. The default +#' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #'@return A 's2dv_cube' object containing the indicator in the element @@ -63,7 +66,7 @@ #'@export CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, time_dim = 'ftime', sdate_dim = 'sdate', - ncores = NULL) { + memb_dim = 'member', ncores = NULL) { # Check 's2dv_cube' if (!inherits(data1, 's2dv_cube')) { stop("Parameter 'ref' must be of the class 's2dv_cube'.") @@ -72,74 +75,74 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, stop("Parameter 'data' must be of the class 's2dv_cube'.") } # Dates subset of data1 - dates1 <- NULL if (!is.null(start1) && !is.null(end1)) { if (is.null(dim(data1$attrs$Dates))) { warning("Dimensions in 'data1' element 'attrs$Dates' are missed and ", "all data would be used.") - start <- NULL - end <- NULL - } else { - dates1 <- data1$attrs$Dates + start1 <- NULL + end1 <- NULL } } # Dates subset of data2 - dates2 <- NULL if (!is.null(start2) && !is.null(end2)) { if (is.null(dim(data2$attrs$Dates))) { warning("Dimensions in 'data2' element 'attrs$Dates' are missed and ", "all data would be used.") - start <- NULL - end <- NULL - } else { - dates2 <- data2$attrs$Dates + start2 <- NULL + end2 <- NULL } } + dates1 <- data1$attrs$Dates + dates2 <- data2$attrs$Dates + data1$data <- MergeRefToExp(data1 = data1$data, dates1 = dates1, start1 = start1, end1 = end1, data2 = data2$data, dates2 = dates2, - start2, end2, time_dim = time_dim, + start2, end2, time_dim = time_dim, + memb_dim = memb_dim, sdate_dim = sdate_dim, ncores = ncores) + data1$dims <- dim(data1$data) + if (!is.null(dates1)) { - data1$attrs$Dates <- SelectPeriodOnDates(dates1, start = start1, end = end1, - time_dim = time_dim) + if (!is.null(start1) && !is.null(end1)) { + dates1 <- SelectPeriodOnDates(dates1, start = start1, end = end1, + time_dim = time_dim) + } } if (!is.null(dates2)) { - data2$attrs$Dates <- SelectPeriodOnDates(dates2, start = start2, - end = end2, time_dim = time_dim) + if ((!is.null(start2) && !is.null(end2))) { + dates2 <- SelectPeriodOnDates(dates2, start = start2, + end = end2, time_dim = time_dim) + } } # TO DO CONCATENATE DATES - remove_dates1_dim <- FALSE - remove_dates2_dim <- FALSE - if (!is.null(data1$attrs$Dates) & !is.null(data2$attrs$Dates)) { - if (is.null(dim(data1$attrs$Dates))) { - remove_dates1_dim <- TRUE - dim(data1$attrs$Dates) <- length(data1$attrs$Dates) - names(dim(data1$attrs$Dates)) <- time_dim + remove_dates_dim <- FALSE + + if (!is.null(dates1) & !is.null(dates2)) { + if (is.null(dim(dates1))) { + remove_dates_dim <- TRUE + dim(dates1) <- length(dates1) + names(dim(dates1)) <- time_dim } - if (is.null(dim(data2$attrs$Dates))) { - remove_dates2_dim <- TRUE - dim(data2$attrs$Dates) <- length(data2$attrs$Dates) - names(dim(data2$attrs$Dates)) <- time_dim + if (is.null(dim(dates2))) { + remove_dates_dim <- TRUE + dim(dates2) <- length(dates2) + names(dim(dates2)) <- time_dim } } - res <- Apply(list(data1$attrs$Dates, data2$attrs$Dates), target_dims = time_dim, - c, output_dims = time_dim, ncores = ncores)$output1 - if (inherits(data1$attrs$Dates, 'Date')) { + res <- Apply(list(dates1, dates2), target_dims = time_dim, + 'c', output_dims = time_dim, ncores = ncores)$output1 + if (inherits(dates1, 'Date')) { data1$attrs$Dates <- as.Date(res, origin = '1970-01-01') } else { data1$attrs$Dates <- as.POSIXct(res*3600*24, origin = '1970-01-01', tz = 'UTC') } - if (remove_dates1_dim) { + if (remove_dates_dim) { dim(data1$attrs$Dates) <- NULL } - if (remove_dates2_dim) { - dim(data2$attrs$Dates) <- NULL - } - return(data1) } @@ -177,6 +180,9 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #' period. #'@param sdate_dim A character string indicating the name of the dimension in #' which the initialization dates are stored. +#'@param memb_dim A character string indicating the name of the member +#' dimension. If the data are not ensemble ones, set as NULL. The default +#' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' @@ -199,57 +205,89 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #' time_dim = 'time') #' #'@import multiApply +#'@importFrom s2dv InsertDim #'@export -MergeRefToExp <- function(data1, dates1, start1, end1, data2, dates2, start2, - end2, time_dim = 'ftime', sdate_dim = 'sdate', - ncores = NULL) { +MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, + dates2, start2 = NULL, end2 = NULL, + time_dim = 'ftime', sdate_dim = 'sdate', + memb_dim = 'member', ncores = NULL) { # Input checks # data - if (!is.array(data1)) { - dim(data1) <- c(length(data1)) - names(dim(data1)) <- time_dim + if (!is.array(data1) | !is.array(data2)) { + stop("Parameters 'data1' and 'data2' must be arrays.") } - if (!is.array(data2)) { - dim(data2) <- c(length(data2)) - names(dim(data2)) <- time_dim + if (is.null(names(dim(data1))) | is.null(names(dim(data2)))) { + stop("Parameters 'data1' and 'data2' must have named dimensions.") } - # dates - if (!is.null(dates1) & !is.null(dates2)) { - if (is.null(dim(dates1))) { - warning("Dimensions in 'dates1' element are missed and ", - "all data would be used.") - dim(dates1) <- length(dates1) - names(dim(dates1)) <- time_dim + # time_dim + if (!is.character(time_dim)) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data1)) | !time_dim %in% names(dim(data2))) { + stop("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", + "names.") + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' must be a character string.") } - if (is.null(dim(dates2))) { - warning("Dimensions in 'dates2' element are missed and ", - "all data would be used.") - dim(dates2) <- length(dates2) - names(dim(dates2)) <- time_dim + if (!memb_dim %in% names(dim(data1)) & !memb_dim %in% names(dim(data2))) { + stop("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", + "Set it to NULL if there is no member dimension.") + } + if (memb_dim %in% names(dim(data1))) { + if (dim(data1)[memb_dim] == 1) { + print('memb_dim removed data1') + data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) + } + } + if (memb_dim %in% names(dim(data2))) { + if (dim(data2)[memb_dim] == 1) { + print('memb_dim removed data2') + data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) + } + } + # Add NA to fill member_dim + if (memb_dim %in% names(dim(data1)) & memb_dim %in% names(dim(data2))) { + if (dim(data1)[memb_dim] != dim(data2)[memb_dim]) { + if (dim(data1)[memb_dim] > dim(data2)[memb_dim]) { + data2 <- Apply(list(data2), target_dims = memb_dim, + fun = function(x, length_new_dim) { + return(c(x, rep(NA, length_new_dim - length(x)))) + }, length_new_dim = dim(data1)[memb_dim], + output_dims = memb_dim)$output1 + } else { + data1 <- Apply(list(data1), target_dims = memb_dim, + fun = function(x, length_new_dim) { + return(c(x, rep(NA, length_new_dim - length(x)))) + }, length_new_dim = dim(data2)[memb_dim], + output_dims = memb_dim)$output1 + } + } } - data1 <- SelectPeriodOnData(data1, dates = dates1, start = start1, - end = end1, time_dim = time_dim, ncores = ncores) } - # Check if data2 has dimension sdate_dim and it should be added to data1: - if ((sdate_dim %in% names(dim(data2))) && dim(data2)[sdate_dim] > 1 && - !sdate_dim %in% names(dim(data1))) { - dim(data1) <- c(length(data1)/dim(data2)[sdate_dim], dim(data2)[sdate_dim]) - names(dim(data1)) <- c(time_dim, sdate_dim) - } - # Check if data1 has dimension sdate_dim and it should be added to data2: - if ((sdate_dim %in% names(dim(data1))) && dim(data1)[sdate_dim] > 1 && - !sdate_dim %in% names(dim(data2))) { - dim(data2) <- c(length(data2)/dim(data1)[sdate_dim], dim(data1)[sdate_dim]) - names(dim(data2)) <- c(time_dim, sdate_dim) - } + # # Check if data2 has dimension sdate_dim and it should be added to data1: + # if ((sdate_dim %in% names(dim(data2))) && dim(data2)[sdate_dim] > 1 && + # !sdate_dim %in% names(dim(data1))) { + # dim(data1) <- c(length(data1)/dim(data2)[sdate_dim], dim(data2)[sdate_dim]) + # names(dim(data1)) <- c(time_dim, sdate_dim) + # } + # # Check if data1 has dimension sdate_dim and it should be added to data2: + # if ((sdate_dim %in% names(dim(data1))) && dim(data1)[sdate_dim] > 1 && + # !sdate_dim %in% names(dim(data2))) { + # dim(data2) <- c(length(data2)/dim(data1)[sdate_dim], dim(data1)[sdate_dim]) + # names(dim(data2)) <- c(time_dim, sdate_dim) + # } + # Check if data1 needs to be extended to the length of the dimensions of data2: if (length(dim(data2)) != length(dim(data1))) { dif_dims <- which(names(dim(data2)) %in% names(dim(data1)) == FALSE) if (length(dif_dims) > 0) { for (i in dif_dims) { - data1 <- .insertdim(data1, posdim = i, lendim = dim(data2)[i], - name = names(dim(data2))[i]) + data1 <- InsertDim(data1, posdim = length(dim(data1)), lendim = dim(data2)[i], + name = names(dim(data2))[i]) } } } @@ -258,18 +296,65 @@ MergeRefToExp <- function(data1, dates1, start1, end1, data2, dates2, start2, dif_dims <- which(names(dim(data1)) %in% names(dim(data2)) == FALSE) if (length(dif_dims) > 0) { for (i in dif_dims) { - data2 <- .insertdim(data2, posdim = i, lendim = dim(data1)[i], - name = names(dim(data1))[i]) + data2 <- InsertDim(data2, posdim = length(dim(data1)), lendim = dim(data1)[i], + name = names(dim(data1))[i]) } } } - if (!is.null(dates2)) { - data2 <- SelectPeriodOnData(data2, dates = dates2, start = start2, - end = end2, time_dim = time_dim, ncores = ncores) + + # dates1 + if (!is.null(start1) & !is.null(end1)) { + if (is.null(dates1)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start1), is.list(end1)))) { + stop("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates1))) { + data1 <- SelectPeriodOnData(data = data1, dates = dates1, start = start1, + end = end1, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates1' must have named dimensions if 'start' and ", + "'end' are not NULL. All 'data1' will be used.") + } + } } + # dates2 + if (!is.null(start2) & !is.null(end2)) { + if (is.null(dates2)) { + warning("Parameter 'dates2' is NULL and the average of the ", + "full data provided in 'data' is computed.") + } else { + if (!any(c(is.list(start2), is.list(end2)))) { + stop("Parameter 'start2' and 'end2' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates2))) { + data2 <- SelectPeriodOnData(data = data2, dates = dates2, start = start2, + end = end2, time_dim = time_dim, + ncores = ncores) + } else { + warning("Parameter 'dates2' must have named dimensions if 'start2' and ", + "'end2' are not NULL. All 'data2' will be used.") + } + } + } + + data1dims <- names(dim(data1)) + data2dims <- names(dim(data2)) + data1 <- Apply(list(data1, data2), target_dims = time_dim, fun = 'c', output_dims = time_dim, ncores = ncores)$output1 + if (all(names(dim(data1)) %in% data1dims)) { + pos <- match(names(dim(data1)), data1dims) + data1 <- aperm(data1, pos) + } + if (all(names(dim(data1)) %in% data2dims)) { + pos <- match(names(dim(data1)), data2dims) + data1 <- aperm(data1, pos) + } return(data1) } - - diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index 9f9a3b9..c15ab9e 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -13,6 +13,7 @@ CST_MergeRefToExp( end2, time_dim = "ftime", sdate_dim = "sdate", + memb_dim = "member", ncores = NULL ) } @@ -47,6 +48,10 @@ specified. This dimension is required to subset the data in a requested period.} \item{sdate_dim}{A character string indicating the name of the dimension in which the initialization dates are stored.} +\item{memb_dim}{A character string indicating the name of the member +dimension. If the data are not ensemble ones, set as NULL. The default +value is 'member'.} + \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index e6b40c8..c446a10 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -7,14 +7,15 @@ MergeRefToExp( data1, dates1, - start1, - end1, + start1 = NULL, + end1 = NULL, data2, dates2, - start2, - end2, + start2 = NULL, + end2 = NULL, time_dim = "ftime", sdate_dim = "sdate", + memb_dim = "member", ncores = NULL ) } @@ -54,6 +55,10 @@ period.} \item{sdate_dim}{A character string indicating the name of the dimension in which the initialization dates are stored.} +\item{memb_dim}{A character string indicating the name of the member +dimension. If the data are not ensemble ones, set as NULL. The default +value is 'member'.} + \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } -- GitLab From 3c1bb67ed22caf98ccd136df2ee46c2deed8830a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 12 Jul 2023 16:15:31 +0200 Subject: [PATCH 33/87] Improved development of MergeRefToExp; improved documentation; added unit tests for the new development --- DESCRIPTION | 2 +- R/MergeRefToExp.R | 213 ++++++++++++++++++---------- man/CST_MergeRefToExp.Rd | 43 ++++-- man/MergeRefToExp.Rd | 54 ++++--- tests/testthat/test-MergeRefToExp.R | 150 ++++++++++++++++++-- 5 files changed, 341 insertions(+), 121 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4151ba4..4e20983 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,4 +40,4 @@ URL: https://earth.bsc.es/gitlab/es/csindicators/ BugReports: https://earth.bsc.es/gitlab/es/csindicators/-/issues Encoding: UTF-8 RoxygenNote: 7.2.0 -Config/testthat/edition: 3 \ No newline at end of file +Config/testthat/edition: 3 diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 534bb55..4f8e16f 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -11,10 +11,19 @@ #'references) could be added at the end of the forecast lead time to cover the #'desired period (e.g.: until the end of summer). #' -#'@param data1 An 's2dv_cube' object as provided function \code{CST_Load} in -#' package CSTools. -#'@param data2 An 's2dv_cube' object as provided function \code{CST_Load} in -#' package CSTools. +#'@param data1 An 's2dv_cube' object with the element 'data' being a +#' multidimensional array of with named dimensions matching the dimensions of +#' parameter 'data2'. Dimensions with the same name in the 'data2' parameter +#' must have the same length or length 1; except for the dimension specified +#' with 'memb_dim', which can be different and in the result will be filled +#' with NA values. It can also have additional dimensions with different names +#' in 'data2'. +#'@param data2 An 's2dv_cube' object with the element 'data' being a +#' multidimensional array of dates with named dimensions matching +#' the dimensions on parameter 'data1'. Dimensions with the same name in the +#' 'data1' parameter must have the same length or length 1, except for the +#' dimension specified with 'memb_dim', which can be different and in the +#' result will be filled with NA values. #'@param start1 A list to define the initial date of the period to select from #' data1 by providing a list of two elements: the initial date of the period #' and the initial month of the period. @@ -30,16 +39,24 @@ #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'ftime'. More than one dimension name #' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested period. -#'@param sdate_dim A character string indicating the name of the dimension in -#' which the initialization dates are stored. +#' specified. This dimension is required to subset the data in a requested +#' period. #'@param memb_dim A character string indicating the name of the member #' dimension. If the data are not ensemble ones, set as NULL. The default #' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #'@return A 's2dv_cube' object containing the indicator in the element -#' \code{data}. +#'\code{data}. The element \code{data} will be a multidimensional array with +#'dimensions named from the combination of 'data1' and 'data2'. The resulting +#'dimensions will be the following: all the same common dimensions between the +#'two arrays plus the different dimensions of each array. If there is any +#'different common dimension but in a dataset it has length 1, it will be added +#'with the maximum dimension. If memb_dim is used, the dimension of the maximum +#'value corresponding to memb_dim of the two data sets will be added; the +#'difference between the dimensions of the set members will be filled with NA. +#'The other elements of the 's2dv_cube' will be updated with the combined +#'information of both datasets. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), @@ -64,15 +81,16 @@ #' #'@import multiApply #'@export -CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, - time_dim = 'ftime', sdate_dim = 'sdate', - memb_dim = 'member', ncores = NULL) { +CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, + start2 = NULL, end2 = NULL, + time_dim = 'ftime', memb_dim = 'member', + ncores = NULL) { # Check 's2dv_cube' if (!inherits(data1, 's2dv_cube')) { - stop("Parameter 'ref' must be of the class 's2dv_cube'.") + stop("Parameter 'data1' must be of the class 's2dv_cube'.") } if (!inherits(data2, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube'.") + stop("Parameter 'data2' must be of the class 's2dv_cube'.") } # Dates subset of data1 if (!is.null(start1) && !is.null(end1)) { @@ -96,14 +114,37 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, dates1 <- data1$attrs$Dates dates2 <- data2$attrs$Dates + # data data1$data <- MergeRefToExp(data1 = data1$data, dates1 = dates1, start1 = start1, end1 = end1, data2 = data2$data, dates2 = dates2, start2, end2, time_dim = time_dim, - memb_dim = memb_dim, - sdate_dim = sdate_dim, ncores = ncores) + memb_dim = memb_dim, ncores = ncores) + # dims data1$dims <- dim(data1$data) + # coords + for (i_dim in names(dim(data1$data))) { + if (length(data1$coords[[i_dim]]) != dim(data1$data)[i_dim]) { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } else if (length(data1$coords[[i_dim]]) == length(data2$coords[[i_dim]])) { + if (any(as.vector(data1$coords[[i_dim]]) != as.vector(data2$coords[[i_dim]]))) { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } else if (!identical(attributes(data1$coords[[i_dim]]), attributes(data2$coords[[i_dim]]))) { + attributes(data1$coords[[i_dim]]) <- NULL + } + } else { + data1$coords[[i_dim]] <- NULL + data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] + attr(data1$coords[[i_dim]], 'indices') <- TRUE + } + } + + # Dates if (!is.null(dates1)) { if (!is.null(start1) && !is.null(end1)) { dates1 <- SelectPeriodOnDates(dates1, start = start1, end = end1, @@ -117,7 +158,6 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, } } - # TO DO CONCATENATE DATES remove_dates_dim <- FALSE if (!is.null(dates1) & !is.null(dates2)) { @@ -134,15 +174,31 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, } res <- Apply(list(dates1, dates2), target_dims = time_dim, 'c', output_dims = time_dim, ncores = ncores)$output1 + if (inherits(dates1, 'Date')) { data1$attrs$Dates <- as.Date(res, origin = '1970-01-01') } else { - data1$attrs$Dates <- as.POSIXct(res*3600*24, origin = '1970-01-01', tz = 'UTC') + data1$attrs$Dates <- as.POSIXct(res, origin = '1970-01-01', tz = 'UTC') } if (remove_dates_dim) { dim(data1$attrs$Dates) <- NULL } + + # Variable + data1$attrs$Variable$varName <- unique(data1$attrs$Variable$varName, + data2$attrs$Variable$varName) + data1$attrs$Variable$metadata <- intersect(data1$attrs$Variable, data2$attrs$Variable)[[2]] + + # source_files + data1$attrs$source_files <- c(data1$attrs$source_files, data2$attrs$source_files) + + # Datasets + data1$attrs$Datasets <- c(data1$attrs$Datasets, data2$attrs$Datasets) + + # when + data1$attrs$when <- Sys.time() + return(data1) } @@ -155,12 +211,21 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #'compute the indicator. The function \code{MergeObs2Exp} takes care of this #'steps. #' -#'@param data1 A multidimensional array with named dimensions. -#'@param dates1 A vector of dates or a multidimensional array of dates with -#' named dimensions matching the dimensions on parameter 'data1'. -#'@param data2 A multidimensional array with named dimensions. -#'@param dates2 A vector of dates or a multidimensional array of dates with -#' named dimensions matching the dimensions on parameter 'data2'. +#'@param data1 A multidimensional array of with named dimensions matching the +#' dimensions of parameter 'data2'. Dimensions with the same name in the +#' 'data2' parameter must have the same length or length 1; except for the +#' dimension specified with 'memb_dim', which can be different and in the +#' result will be filled with NA values. It can also have additional dimensions +#' with different names in 'data2'. +#'@param dates1 A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data1'. +#'@param data2 A multidimensional array of dates with named dimensions matching +#' the dimensions on parameter 'data1'. Dimensions with the same name in the +#' 'data1' parameter must have the same length or length 1, except for the +#' dimension specified with 'memb_dim', which can be different and in the +#' result will be filled with NA values. +#'@param dates2 A multidimensional array of dates with named dimensions matching +#' the temporal dimensions on parameter 'data2'. #'@param start1 A list to define the initial date of the period to select from #' data1 by providing a list of two elements: the initial date of the period #' and the initial month of the period. @@ -178,39 +243,44 @@ CST_MergeRefToExp <- function(data1, data2, start1, end1, start2, end2, #' matching the dimensions provided in the object \code{data$data} can be #' specified. This dimension is required to subset the data in a requested #' period. -#'@param sdate_dim A character string indicating the name of the dimension in -#' which the initialization dates are stored. #'@param memb_dim A character string indicating the name of the member #' dimension. If the data are not ensemble ones, set as NULL. The default #' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return A multidimensional array with named dimensions. -#' +#'@return A multidimensional array with dimensions named from the combination of +#''data1' and 'data2'. The resulting dimensions will be the following: all the +#'same common dimensions between the two arrays plus the different dimensions of +#'each array. If there is any different common dimension but in a dataset it has +#'length 1, it will be added with the maximum dimension. If memb_dim is used, +#'the dimension of the maximum value corresponding to memb_dim of the two data +#'sets will be added; the difference between the dimensions of the set members +#'will be filled with NA. +#' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1993","%d-%m-%Y", tz = 'UTC'), "day"), #' seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day")) -#'dim(data_dates) <- c(time = 154, sdate = 2) +#'dim(data_dates) <- c(ftime = 154, sdate = 2) #'ref_dates <- seq(as.Date("01-01-1993", "%d-%m-%Y", tz = 'UTC'), #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day") -#'dim(ref_dates) <- c(time = 350, sdate = 2) -#'ref <- array(1001:1700, c(time = 350, sdate = 2)) -#'data <- array(1:(2*154*2), c(time = 154, sdate = 2, member= 2)) +#'dim(ref_dates) <- c(ftime = 350, sdate = 2) +#'ref <- array(1001:1700, c(ftime = 350, sdate = 2)) +#'data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) #'new_data <- MergeRefToExp(data1 = ref, dates1 = ref_dates, start1 = list(21, 6), #' end1 = list(30, 6), data2 = data, dates2 = data_dates, #' start2 = list(1, 7), end = list(21, 9), -#' time_dim = 'time') +#' time_dim = 'ftime') #' #'@import multiApply #'@importFrom s2dv InsertDim #'@export -MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, - dates2, start2 = NULL, end2 = NULL, - time_dim = 'ftime', sdate_dim = 'sdate', - memb_dim = 'member', ncores = NULL) { +MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, + start1 = NULL, end1 = NULL, start2 = NULL, end2 = NULL, + time_dim = 'ftime', memb_dim = 'member', + ncores = NULL) { # Input checks # data if (!is.array(data1) | !is.array(data2)) { @@ -238,13 +308,11 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, } if (memb_dim %in% names(dim(data1))) { if (dim(data1)[memb_dim] == 1) { - print('memb_dim removed data1') data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) } } if (memb_dim %in% names(dim(data2))) { if (dim(data2)[memb_dim] == 1) { - print('memb_dim removed data2') data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) } } @@ -268,36 +336,30 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, } } - # # Check if data2 has dimension sdate_dim and it should be added to data1: - # if ((sdate_dim %in% names(dim(data2))) && dim(data2)[sdate_dim] > 1 && - # !sdate_dim %in% names(dim(data1))) { - # dim(data1) <- c(length(data1)/dim(data2)[sdate_dim], dim(data2)[sdate_dim]) - # names(dim(data1)) <- c(time_dim, sdate_dim) - # } - # # Check if data1 has dimension sdate_dim and it should be added to data2: - # if ((sdate_dim %in% names(dim(data1))) && dim(data1)[sdate_dim] > 1 && - # !sdate_dim %in% names(dim(data2))) { - # dim(data2) <- c(length(data2)/dim(data1)[sdate_dim], dim(data1)[sdate_dim]) - # names(dim(data2)) <- c(time_dim, sdate_dim) - # } - - # Check if data1 needs to be extended to the length of the dimensions of data2: - if (length(dim(data2)) != length(dim(data1))) { - dif_dims <- which(names(dim(data2)) %in% names(dim(data1)) == FALSE) - if (length(dif_dims) > 0) { - for (i in dif_dims) { - data1 <- InsertDim(data1, posdim = length(dim(data1)), lendim = dim(data2)[i], - name = names(dim(data2))[i]) + # Find common dims and remove the ones not needed + name_data1 <- sort(names(dim(data1))) + name_data2 <- sort(names(dim(data2))) + + commondims <- name_data1[name_data1 %in% name_data2] + commondims <- commondims[-which(commondims == time_dim)] + + if (length(commondims) != 0) { + if (!all(dim(data2)[commondims] == dim(data1)[commondims])) { + dif_common <- commondims[!dim(data2)[commondims] == dim(data1)[commondims]] + if (any(dim(data2)[dif_common] == 1)) { + dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] + dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] + dif_common <- dif_common[-which(dif_common == dim_remove)] } - } - } - # Check if data2 needs to be extended to the length of the dimensions of data1: - if (length(dim(data1)) != length(dim(data2))) { - dif_dims <- which(names(dim(data1)) %in% names(dim(data2)) == FALSE) - if (length(dif_dims) > 0) { - for (i in dif_dims) { - data2 <- InsertDim(data2, posdim = length(dim(data1)), lendim = dim(data1)[i], - name = names(dim(data1))[i]) + if (any(dim(data1)[dif_common] == 1)) { + dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] + dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] + dif_common <- dif_common[-which(dif_common == dim_remove)] + } + if (length(dif_common) != 0) { + stop("Parameters 'data1' and 'data2' have common dimension ", + paste0("'", dif_common, sep = "' "), "with different length and ", + "different of length 1.") } } } @@ -307,11 +369,11 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, if (is.null(dates1)) { warning("Parameter 'dates' is NULL and the average of the ", "full data provided in 'data' is computed.") + } else if (!all(c(is.list(start1), is.list(end1)))) { + warning("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") } else { - if (!any(c(is.list(start1), is.list(end1)))) { - stop("Parameter 'start1' and 'end1' must be lists indicating the ", - "day and the month of the period start and end.") - } if (!is.null(dim(dates1))) { data1 <- SelectPeriodOnData(data = data1, dates = dates1, start = start1, end = end1, time_dim = time_dim, @@ -327,11 +389,11 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, if (is.null(dates2)) { warning("Parameter 'dates2' is NULL and the average of the ", "full data provided in 'data' is computed.") + } else if (!all(c(is.list(start2), is.list(end2)))) { + warning("Parameter 'start2' and 'end2' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") } else { - if (!any(c(is.list(start2), is.list(end2)))) { - stop("Parameter 'start2' and 'end2' must be lists indicating the ", - "day and the month of the period start and end.") - } if (!is.null(dim(dates2))) { data2 <- SelectPeriodOnData(data = data2, dates = dates2, start = start2, end = end2, time_dim = time_dim, @@ -348,6 +410,7 @@ MergeRefToExp <- function(data1, dates1, start1 = NULL, end1 = NULL, data2, data1 <- Apply(list(data1, data2), target_dims = time_dim, fun = 'c', output_dims = time_dim, ncores = ncores)$output1 + if (all(names(dim(data1)) %in% data1dims)) { pos <- match(names(dim(data1)), data1dims) data1 <- aperm(data1, pos) diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index c15ab9e..f2195f2 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -7,22 +7,30 @@ CST_MergeRefToExp( data1, data2, - start1, - end1, - start2, - end2, + start1 = NULL, + end1 = NULL, + start2 = NULL, + end2 = NULL, time_dim = "ftime", - sdate_dim = "sdate", memb_dim = "member", ncores = NULL ) } \arguments{ -\item{data1}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} +\item{data1}{An 's2dv_cube' object with the element 'data' being a +multidimensional array of with named dimensions matching the dimensions of +parameter 'data2'. Dimensions with the same name in the 'data2' parameter +must have the same length or length 1; except for the dimension specified +with 'memb_dim', which can be different and in the result will be filled +with NA values. It can also have additional dimensions with different names +in 'data2'.} -\item{data2}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} +\item{data2}{An 's2dv_cube' object with the element 'data' being a +multidimensional array of dates with named dimensions matching +the dimensions on parameter 'data1'. Dimensions with the same name in the +'data1' parameter must have the same length or length 1, except for the +dimension specified with 'memb_dim', which can be different and in the +result will be filled with NA values.} \item{start1}{A list to define the initial date of the period to select from data1 by providing a list of two elements: the initial date of the period @@ -43,10 +51,8 @@ the final month of the period.} \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested period.} - -\item{sdate_dim}{A character string indicating the name of the dimension in -which the initialization dates are stored.} +specified. This dimension is required to subset the data in a requested +period.} \item{memb_dim}{A character string indicating the name of the member dimension. If the data are not ensemble ones, set as NULL. The default @@ -57,7 +63,16 @@ computation.} } \value{ A 's2dv_cube' object containing the indicator in the element - \code{data}. +\code{data}. The element \code{data} will be a multidimensional array with +dimensions named from the combination of 'data1' and 'data2'. The resulting +dimensions will be the following: all the same common dimensions between the +two arrays plus the different dimensions of each array. If there is any +different common dimension but in a dataset it has length 1, it will be added +with the maximum dimension. If memb_dim is used, the dimension of the maximum +value corresponding to memb_dim of the two data sets will be added; the +difference between the dimensions of the set members will be filled with NA. +The other elements of the 's2dv_cube' will be updated with the combined +information of both datasets. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index c446a10..164606c 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -6,24 +6,37 @@ \usage{ MergeRefToExp( data1, - dates1, + data2, + dates1 = NULL, + dates2 = NULL, start1 = NULL, end1 = NULL, - data2, - dates2, start2 = NULL, end2 = NULL, time_dim = "ftime", - sdate_dim = "sdate", memb_dim = "member", ncores = NULL ) } \arguments{ -\item{data1}{A multidimensional array with named dimensions.} +\item{data1}{A multidimensional array of with named dimensions matching the +dimensions of parameter 'data2'. Dimensions with the same name in the +'data2' parameter must have the same length or length 1; except for the +dimension specified with 'memb_dim', which can be different and in the +result will be filled with NA values. It can also have additional dimensions +with different names in 'data2'.} + +\item{data2}{A multidimensional array of dates with named dimensions matching +the dimensions on parameter 'data1'. Dimensions with the same name in the +'data1' parameter must have the same length or length 1, except for the +dimension specified with 'memb_dim', which can be different and in the +result will be filled with NA values.} + +\item{dates1}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data1'.} -\item{dates1}{A vector of dates or a multidimensional array of dates with -named dimensions matching the dimensions on parameter 'data1'.} +\item{dates2}{A multidimensional array of dates with named dimensions matching +the temporal dimensions on parameter 'data2'.} \item{start1}{A list to define the initial date of the period to select from data1 by providing a list of two elements: the initial date of the period @@ -33,11 +46,6 @@ and the initial month of the period.} data1 by providing a list of two elements: the final day of the period and the final month of the period.} -\item{data2}{A multidimensional array with named dimensions.} - -\item{dates2}{A vector of dates or a multidimensional array of dates with -named dimensions matching the dimensions on parameter 'data2'.} - \item{start2}{A list to define the initial date of the period to select from data2 by providing a list of two elements: the initial date of the period and the initial month of the period.} @@ -52,9 +60,6 @@ matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} -\item{sdate_dim}{A character string indicating the name of the dimension in -which the initialization dates are stored.} - \item{memb_dim}{A character string indicating the name of the member dimension. If the data are not ensemble ones, set as NULL. The default value is 'member'.} @@ -63,7 +68,14 @@ value is 'member'.} computation.} } \value{ -A multidimensional array with named dimensions. +A multidimensional array with dimensions named from the combination of +'data1' and 'data2'. The resulting dimensions will be the following: all the +same common dimensions between the two arrays plus the different dimensions of +each array. If there is any different common dimension but in a dataset it has +length 1, it will be added with the maximum dimension. If memb_dim is used, +the dimension of the maximum value corresponding to memb_dim of the two data +sets will be added; the difference between the dimensions of the set members +will be filled with NA. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from @@ -78,15 +90,15 @@ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1993","\%d-\%m-\%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day")) -dim(data_dates) <- c(time = 154, sdate = 2) +dim(data_dates) <- c(ftime = 154, sdate = 2) ref_dates <- seq(as.Date("01-01-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day") -dim(ref_dates) <- c(time = 350, sdate = 2) -ref <- array(1001:1700, c(time = 350, sdate = 2)) -data <- array(1:(2*154*2), c(time = 154, sdate = 2, member= 2)) +dim(ref_dates) <- c(ftime = 350, sdate = 2) +ref <- array(1001:1700, c(ftime = 350, sdate = 2)) +data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) new_data <- MergeRefToExp(data1 = ref, dates1 = ref_dates, start1 = list(21, 6), end1 = list(30, 6), data2 = data, dates2 = data_dates, start2 = list(1, 7), end = list(21, 9), - time_dim = 'time') + time_dim = 'ftime') } diff --git a/tests/testthat/test-MergeRefToExp.R b/tests/testthat/test-MergeRefToExp.R index adbdfd6..effae50 100644 --- a/tests/testthat/test-MergeRefToExp.R +++ b/tests/testthat/test-MergeRefToExp.R @@ -1,6 +1,135 @@ ########################################################################### -test_that("Sanity checks", { +# cube1 +dates_data1 <- c(seq(as.Date("11-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("20-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("11-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("20-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) +dim(dates_data1) <- c(ftime = 10, sdate = 2) +cube1 <- NULL +cube1$data <- array(1:(2*10*2), c(ftime = 10, sdate = 2, member= 2)) +cube1$attrs$Dates <- dates_data1 +class(cube1) <- 's2dv_cube' +ref_dates1 <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) +dim(ref_dates1) <- c(ftime = 10, sdate = 2) +cube_ref <- NULL +cube_ref$data <- array(1001:1700, c(ftime = 10, sdate = 2)) +cube_ref$attrs$Dates <- ref_dates1 +class(cube_ref) <- 's2dv_cube' +start1 <- list(3, 7) +end1 <- list(10, 7) +start2 <- list(11, 7) +end2 <- list(15, 7) + +# dat1 +ref1 <- array(1001:1700, c(ftime = 10, sdate = 2)) +data1 <- array(1:(2*154*2), c(ftime = 11, sdate = 2, member = 2)) + +########################################################################### +test_that("1. Input checks", { + # 's2dv_cube' + expect_error( + CST_MergeRefToExp('a'), + "Parameter 'data1' must be of the class 's2dv_cube'." + ) + expect_error( + CST_MergeRefToExp(cube1, array(10)), + "Parameter 'data2' must be of the class 's2dv_cube'." + ) + # data + expect_error( + MergeRefToExp(10, 10), + "Parameters 'data1' and 'data2' must be arrays." + ) + expect_error( + MergeRefToExp(array(10), array(10)), + "Parameters 'data1' and 'data2' must have named dimensions." + ) + # time_dim + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, time_dim = 'time'), + paste0("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", + "names.") + ) + # memb_dim + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + MergeRefToExp(data1 = ref1, data2 = data1, memb_dim = 'time'), + paste0("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", + "Set it to NULL if there is no member dimension.") + ) + # common dimensions + expect_error( + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, var = 3)), + data2 = array(1:16, c(sdate = 2, ftime = 2, var = 4)), + memb_dim = NULL), + paste0("Parameters 'data1' and 'data2' have common dimension 'var' ", + "with different length and different of length 1.") + ) + # dates + expect_warning( + MergeRefToExp(data1 = array(1:4, c(sdate = 2, ftime = 2, lat = 1)), + data2 = array(1:16, c(sdate = 2, ftime = 2, lat = 4)), + memb_dim = NULL, start1 = list(1, 1), end1 = list(3, 1), + start2 = NULL, end2 = NULL), + paste0("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") + ) + expect_warning( + MergeRefToExp(data1 = ref1, + data2 = data1, dates1 = ref_dates1, dates2 = dates_data1, + start1 = c(3, 7), end1 = end1, + start2 = start2, end2 = end2), + paste0("Parameter 'start1' and 'end1' must be lists indicating the ", + "day and the month of the period start and end. Full data ", + "will be used.") + ) + expect_warning( + MergeRefToExp(data1 = ref1, + data2 = data1, dates1 = as.vector(ref_dates1), + dates2 = dates_data1, start1 = start1, end1 = end1, + start2 = start2, end2 = end2), + paste0("Parameter 'dates1' must have named dimensions if 'start' and ", + "'end' are not NULL. All 'data1' will be used.") + ) +}) + +########################################################################### + +test_that("2. Output checks", { + res1 <- CST_MergeRefToExp(data1 = cube_ref, data2 = cube1, + start1 = start1, end1 = end1, + start2 = start2, end2 = end2) + # dims + expect_equal( + dim(res1$data), + res1$dims + ) + # coords + expect_equal( + names(dim(res1$data)), + names(res1$coords) + ) + # Dates + expect_equal( + dim(res1$data)[c('ftime', 'sdate')], + dim(res1$attrs$Dates) + ) +}) + +########################################################################### + +test_that("3. Output checks", { data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-12-1993", "%d-%m-%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), @@ -19,12 +148,12 @@ test_that("Sanity checks", { data$attrs$Dates <- data_dates class(data) <- 's2dv_cube' -suppressWarnings( - expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), - end1 = list(30, 6), start2 = list(1, 7), - end2 = list(21, 9))$attrs$Dates, - SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) -) + suppressWarnings( + expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), + end1 = list(30, 6), start2 = list(1, 7), + end2 = list(21, 9))$attrs$Dates, + SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) + ) output <- array(c(1172:1181, 1:83, 1537:1546, 155:237, 1172:1181, 309:391, 1537:1546, 463:545), c(ftime = 93, sdate = 2, member = 2)) @@ -33,7 +162,8 @@ suppressWarnings( CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), end1 = list(30, 6), start2 = list(1, 7), end2 = list(21, 9))$data, - output) + output + ) # issue 13: One lead time @@ -81,11 +211,11 @@ suppressWarnings( end2 = list(31, 7))$data, output ) - }) -test_that("Seasonal", { +########################################################################### +test_that("4. Test Seasonal", { dates <- NULL hcst.inityear <- 1993 hcst.endyear <- 2017 -- GitLab From 0a670a068a579d5d5cc7ca0227ea573eaaf058fc Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 14 Jul 2023 12:33:25 +0200 Subject: [PATCH 34/87] Update description of MergeRefToExp; correct CST_MergeRefToExp imports and description --- R/MergeRefToExp.R | 20 +++++++++++--------- man/CST_MergeRefToExp.Rd | 11 +++++------ man/MergeRefToExp.Rd | 6 +++++- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 4f8e16f..abbdbe1 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -4,12 +4,11 @@ #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to #'merge past observations, or other references, to the forecast (or hindcast) -#'to compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. If the forecast simulation doesn't cover the required period because it -#'is initialized too early (e.g.: Initialization on November 1st the forecast -#'covers until the beginning of June next year), a climatology (or other -#'references) could be added at the end of the forecast lead time to cover the -#'desired period (e.g.: until the end of summer). +#'to compute the indicator. If the forecast simulation doesn't cover the +#'required period because it is initialized too early (e.g.: Initialization on +#'November 1st the forecast covers until the beginning of June next year), a +#'climatology (or other references) could be added at the end of the forecast +#'lead time to cover the desired period (e.g.: until the end of summer). #' #'@param data1 An 's2dv_cube' object with the element 'data' being a #' multidimensional array of with named dimensions matching the dimensions of @@ -78,8 +77,7 @@ #'new_data <- CST_MergeRefToExp(data1 = ref, data2 = data, #' start1 = list(21, 6), end1 = list(30, 6), #' start2 = list(1, 7), end2 = list(21, 9)) -#' -#'@import multiApply +#' #'@export CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, start2 = NULL, end2 = NULL, @@ -209,7 +207,11 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #'than the one required for the indicator (e.g.: July 1st), the user may want to #'merge past observations, or other reference, to the forecast (or hindcast) to #'compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. +#'steps. If the forecast simulation doesn't cover the required period because it +#'is initialized too early (e.g.: Initialization on November 1st the forecast +#'covers until the beginning of June next year), a climatology (or other +#'references) could be added at the end of the forecast lead time to cover the +#'desired period (e.g.: until the end of summer). #' #'@param data1 A multidimensional array of with named dimensions matching the #' dimensions of parameter 'data2'. Dimensions with the same name in the diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index f2195f2..67f35a9 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -79,12 +79,11 @@ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to merge past observations, or other references, to the forecast (or hindcast) -to compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. If the forecast simulation doesn't cover the required period because it -is initialized too early (e.g.: Initialization on November 1st the forecast -covers until the beginning of June next year), a climatology (or other -references) could be added at the end of the forecast lead time to cover the -desired period (e.g.: until the end of summer). +to compute the indicator. If the forecast simulation doesn't cover the +required period because it is initialized too early (e.g.: Initialization on +November 1st the forecast covers until the beginning of June next year), a +climatology (or other references) could be added at the end of the forecast +lead time to cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index 164606c..bd79c2f 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -83,7 +83,11 @@ June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to merge past observations, or other reference, to the forecast (or hindcast) to compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. +steps. If the forecast simulation doesn't cover the required period because it +is initialized too early (e.g.: Initialization on November 1st the forecast +covers until the beginning of June next year), a climatology (or other +references) could be added at the end of the forecast lead time to cover the +desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), -- GitLab From bf9e36fefe8288f05934b7b0ca3aad788e407131 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 14 Jul 2023 14:23:14 +0200 Subject: [PATCH 35/87] Correct error of adding metadata to the resultant s2dv_cube --- R/MergeRefToExp.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index abbdbe1..8efa04f 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -186,7 +186,10 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, # Variable data1$attrs$Variable$varName <- unique(data1$attrs$Variable$varName, data2$attrs$Variable$varName) - data1$attrs$Variable$metadata <- intersect(data1$attrs$Variable, data2$attrs$Variable)[[2]] + names_metadata <- names(data1$attrs$Variable$metadata) + data1$attrs$Variable$metadata <- intersect(data1$attrs$Variable$metadata, + data2$attrs$Variable$metadata) + names(data1$attrs$Variable$metadata) <- names_metadata # source_files data1$attrs$source_files <- c(data1$attrs$source_files, data2$attrs$source_files) @@ -196,6 +199,12 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, # when data1$attrs$when <- Sys.time() + + # load_parameters (TO DO: remove with CST_Start) + if (!is.null(c(data1$attrs$load_parameters, data2$attrs$load_parameters))) { + data1$attrs$load_parameters <- list(data1 = data1$attrs$load_parameters, + data2 = data2$attrs$load_parameters) + } return(data1) } -- GitLab From 656acdd712d6d0bb789dee80f91db286784302d0 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 14 Jul 2023 14:35:14 +0200 Subject: [PATCH 36/87] Improve 's2dv_cube' attributes with adding not repeated metadata (added unique()) --- R/MergeRefToExp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 8efa04f..ab626f9 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -192,10 +192,10 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, names(data1$attrs$Variable$metadata) <- names_metadata # source_files - data1$attrs$source_files <- c(data1$attrs$source_files, data2$attrs$source_files) + data1$attrs$source_files <- unique(c(data1$attrs$source_files, data2$attrs$source_files)) # Datasets - data1$attrs$Datasets <- c(data1$attrs$Datasets, data2$attrs$Datasets) + data1$attrs$Datasets <- unique(c(data1$attrs$Datasets, data2$attrs$Datasets)) # when data1$attrs$when <- Sys.time() -- GitLab From a632e22e5235decefb906c1de817a03fe8234b46 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 18 Jul 2023 18:22:17 +0200 Subject: [PATCH 37/87] Correct development with only allowing memb_dim and time_dim to be different between data1 and data2; improved documentation; added unit tests --- NAMESPACE | 1 - R/MergeRefToExp.R | 248 +++++++++++++--------------- man/CST_MergeRefToExp.Rd | 71 ++++---- man/MergeRefToExp.Rd | 83 +++++----- tests/testthat/test-MergeRefToExp.R | 95 ++++++++--- 5 files changed, 262 insertions(+), 236 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8795a86..d80accb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,7 +26,6 @@ export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) import(multiApply) -importFrom(s2dv,InsertDim) importFrom(stats,approxfun) importFrom(stats,ecdf) importFrom(stats,quantile) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index ab626f9..03425c8 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -3,59 +3,58 @@ #'Some indicators are defined for specific temporal periods (e.g.: summer from #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to -#'merge past observations, or other references, to the forecast (or hindcast) -#'to compute the indicator. If the forecast simulation doesn't cover the -#'required period because it is initialized too early (e.g.: Initialization on -#'November 1st the forecast covers until the beginning of June next year), a -#'climatology (or other references) could be added at the end of the forecast -#'lead time to cover the desired period (e.g.: until the end of summer). +#'merge past observations, or other references, to the forecast (or hindcast) to +#'compute the indicator. If the forecast simulation doesn't cover the required +#'period because it is initialized too early (e.g.: Initialization on November +#'1st the forecast covers until the beginning of June next year), a climatology +#'(or other references) could be added at the end of the forecast lead time to +#'cover the desired period (e.g.: until the end of summer). #' #'@param data1 An 's2dv_cube' object with the element 'data' being a -#' multidimensional array of with named dimensions matching the dimensions of -#' parameter 'data2'. Dimensions with the same name in the 'data2' parameter -#' must have the same length or length 1; except for the dimension specified -#' with 'memb_dim', which can be different and in the result will be filled -#' with NA values. It can also have additional dimensions with different names -#' in 'data2'. +#' multidimensional array with named dimensions. All dimensions must be +#' equal to 'data2' dimensions except for the ones specified with 'memb_dim' +#' and 'time_dim'. If 'start1' and 'end1' are used to subset a period, the +#' Dates must be stored in element '$attrs$Dates' of the object. Dates must +#' have same time dimensions as element 'data'. #'@param data2 An 's2dv_cube' object with the element 'data' being a -#' multidimensional array of dates with named dimensions matching -#' the dimensions on parameter 'data1'. Dimensions with the same name in the -#' 'data1' parameter must have the same length or length 1, except for the -#' dimension specified with 'memb_dim', which can be different and in the -#' result will be filled with NA values. +#' multidimensional array of named dimensions matching the dimensions of +#' parameter 'data1'. All dimensions must be equal to 'data1' except for the +#' ones specified with 'memb_dim' and 'time_dim'. If 'start2' and 'end2' are +#' used to subset a period, the Dates must be stored in element '$attrs$Dates' +#' of the object. Dates must have same time dimensions as element 'data'. #'@param start1 A list to define the initial date of the period to select from -#' data1 by providing a list of two elements: the initial date of the period +#' 'data1' by providing a list of two elements: the initial date of the period #' and the initial month of the period. #'@param end1 A list to define the final date of the period to select from -#' data1 by providing a list of two elements: the final day of the period and +#' 'data1' by providing a list of two elements: the final day of the period and #' the final month of the period. #'@param start2 A list to define the initial date of the period to select from -#' data2 by providing a list of two elements: the initial date of the period +#' 'data2' by providing a list of two elements: the initial date of the period #' and the initial month of the period. #'@param end2 A list to define the final date of the period to select from -#' data2 by providing a list of two elements: the final day of the period and +#' 'data2' by providing a list of two elements: the final day of the period and #' the final month of the period. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name -#' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested +#' dimension that will be used to combine the two arrays. By default, it is set +#' to 'ftime'. Also, it will be used to subset the data in a requested #' period. #'@param memb_dim A character string indicating the name of the member #' dimension. If the data are not ensemble ones, set as NULL. The default #' value is 'member'. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. -#'@return A 's2dv_cube' object containing the indicator in the element -#'\code{data}. The element \code{data} will be a multidimensional array with -#'dimensions named from the combination of 'data1' and 'data2'. The resulting -#'dimensions will be the following: all the same common dimensions between the -#'two arrays plus the different dimensions of each array. If there is any -#'different common dimension but in a dataset it has length 1, it will be added -#'with the maximum dimension. If memb_dim is used, the dimension of the maximum -#'value corresponding to memb_dim of the two data sets will be added; the -#'difference between the dimensions of the set members will be filled with NA. -#'The other elements of the 's2dv_cube' will be updated with the combined -#'information of both datasets. +#'@return An 's2dv_cube' object containing the indicator in the element +#'\code{data}. The element \code{data} will be a multidimensional array created +#'from the combination of 'data1' and 'data2'. The resulting array will contain +#'the following dimensions: the original dimensions of the input data, which are +#'common to both arrays and for the 'time_dim' dimension, the sum of the +#'corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, +#'regarding member dimension, two different situations can occur: (1) in the +#'case that one of the arrays does not have member dimension or is equal to 1, +#'the result will contain the repeated values of itself; (2) in the case that +#'both arrays have member dimension and is greater than 1, all combinations of +#'member dimension will be returned. The other elements of the 's2dv_cube' will +#'be updated with the combined information of both datasets. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), @@ -64,7 +63,7 @@ #' as.Date("01-12-1994","%d-%m-%Y", tz = 'UTC'), "day")) #'dim(data_dates) <- c(ftime = 154, sdate = 2) #'data <- NULL -#'data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member= 2)) +#'data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) #'data$attrs$Dates<- data_dates #'class(data) <- 's2dv_cube' #'ref_dates <- seq(as.Date("01-01-1993", "%d-%m-%Y", tz = 'UTC'), @@ -132,7 +131,8 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, data1$coords[[i_dim]] <- NULL data1$coords[[i_dim]] <- 1:dim(data1$data)[i_dim] attr(data1$coords[[i_dim]], 'indices') <- TRUE - } else if (!identical(attributes(data1$coords[[i_dim]]), attributes(data2$coords[[i_dim]]))) { + } else if (!identical(attributes(data1$coords[[i_dim]]), + attributes(data2$coords[[i_dim]]))) { attributes(data1$coords[[i_dim]]) <- NULL } } else { @@ -214,60 +214,61 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #'Some indicators are defined for specific temporal periods (e.g.: summer from #'June 21st to September 21st). If the initialization forecast date is later #'than the one required for the indicator (e.g.: July 1st), the user may want to -#'merge past observations, or other reference, to the forecast (or hindcast) to -#'compute the indicator. The function \code{MergeObs2Exp} takes care of this -#'steps. If the forecast simulation doesn't cover the required period because it -#'is initialized too early (e.g.: Initialization on November 1st the forecast -#'covers until the beginning of June next year), a climatology (or other -#'references) could be added at the end of the forecast lead time to cover the -#'desired period (e.g.: until the end of summer). +#'merge past observations, or other references, to the forecast (or hindcast) to +#'compute the indicator. If the forecast simulation doesn't cover the required +#'period because it is initialized too early (e.g.: Initialization on November +#'1st the forecast covers until the beginning of June next year), a climatology +#'(or other references) could be added at the end of the forecast lead time to +#'cover the desired period (e.g.: until the end of summer). #' -#'@param data1 A multidimensional array of with named dimensions matching the -#' dimensions of parameter 'data2'. Dimensions with the same name in the -#' 'data2' parameter must have the same length or length 1; except for the -#' dimension specified with 'memb_dim', which can be different and in the -#' result will be filled with NA values. It can also have additional dimensions -#' with different names in 'data2'. +#'@param data1 A multidimensional array with named dimensions. All dimensions +#' must be equal to 'data2' dimensions except for the ones specified with +#' 'memb_dim' and 'time_dim'. #'@param dates1 A multidimensional array of dates with named dimensions matching -#' the temporal dimensions on parameter 'data1'. -#'@param data2 A multidimensional array of dates with named dimensions matching -#' the dimensions on parameter 'data1'. Dimensions with the same name in the -#' 'data1' parameter must have the same length or length 1, except for the -#' dimension specified with 'memb_dim', which can be different and in the -#' result will be filled with NA values. +#' the temporal dimensions of parameter 'data1'. The common dimensions must be +#' equal to 'data1' dimensions. +#'@param data2 A multidimensional array of named dimensions matching the +#' dimensions of parameter 'data1'. All dimensions must be equal to 'data1' +#' except for the ones specified with 'memb_dim' and 'time_dim'. #'@param dates2 A multidimensional array of dates with named dimensions matching -#' the temporal dimensions on parameter 'data2'. +#' the temporal dimensions on parameter 'data2'. The common dimensions must be +#' equal to 'data2' dimensions. #'@param start1 A list to define the initial date of the period to select from -#' data1 by providing a list of two elements: the initial date of the period -#' and the initial month of the period. +#' 'data1' by providing a list of two elements: the initial date of the period +#' and the initial month of the period. The initial date of the period must be +#' included in the 'dates1' array. #'@param end1 A list to define the final date of the period to select from -#' data1 by providing a list of two elements: the final day of the period and -#' the final month of the period. +#' 'data1' by providing a list of two elements: the final day of the period and +#' the final month of the period. The final date of the period must be +#' included in the 'dates1' array. #'@param start2 A list to define the initial date of the period to select from -#' data2 by providing a list of two elements: the initial date of the period -#' and the initial month of the period. +#' 'data2' by providing a list of two elements: the initial date of the period +#' and the initial month of the period. The initial date of the period must be +#' included in the 'dates2' array. #'@param end2 A list to define the final date of the period to select from -#' data2 by providing a list of two elements: the final day of the period and -#' the final month of the period. +#' 'data2' by providing a list of two elements: the final day of the period and +#' the final month of the period. The final date of the period must be +#' included in the 'dates2' array. #'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'ftime'. More than one dimension name -#' matching the dimensions provided in the object \code{data$data} can be -#' specified. This dimension is required to subset the data in a requested +#' dimension that will be used to combine the two arrays. By default, it is set +#' to 'ftime'. Also, it will be used to subset the data in a requested #' period. #'@param memb_dim A character string indicating the name of the member -#' dimension. If the data are not ensemble ones, set as NULL. The default -#' value is 'member'. +#' dimension. If the 'data1' and 'data2' have no member dimension, set it as +#' NULL. It is set as 'member' by default. #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return A multidimensional array with dimensions named from the combination of -#''data1' and 'data2'. The resulting dimensions will be the following: all the -#'same common dimensions between the two arrays plus the different dimensions of -#'each array. If there is any different common dimension but in a dataset it has -#'length 1, it will be added with the maximum dimension. If memb_dim is used, -#'the dimension of the maximum value corresponding to memb_dim of the two data -#'sets will be added; the difference between the dimensions of the set members -#'will be filled with NA. +#'@return A multidimensional array created from the combination of 'data1' and +#''data2'. The resulting array will contain the following dimensions: the +#'original dimensions of the input data, which are common to both arrays and for +#'the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' +#'and 'data2'. If 'memb_dim' is not null, regarding member dimension, two +#'different situations can occur: (1) in the case that one of the arrays does +#'not have member dimension or is equal to 1, the result will contain the +#'repeated values of itself; (2) in the case that both arrays have member +#'dimension and is greater than 1, all combinations of member dimension will be +#'returned. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), @@ -286,21 +287,20 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #' time_dim = 'ftime') #' #'@import multiApply -#'@importFrom s2dv InsertDim #'@export MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, start1 = NULL, end1 = NULL, start2 = NULL, end2 = NULL, time_dim = 'ftime', memb_dim = 'member', ncores = NULL) { # Input checks - # data + ## data1 and data2 if (!is.array(data1) | !is.array(data2)) { stop("Parameters 'data1' and 'data2' must be arrays.") } if (is.null(names(dim(data1))) | is.null(names(dim(data2)))) { stop("Parameters 'data1' and 'data2' must have named dimensions.") } - # time_dim + ## time_dim if (!is.character(time_dim)) { stop("Parameter 'time_dim' must be a character string.") } @@ -308,7 +308,9 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, stop("Parameter 'time_dim' is not found in 'data1' or 'data2' dimension ", "names.") } - # memb_dim + ## memb_dim + data1dims <- names(dim(data1)) + data2dims <- names(dim(data2)) if (!is.null(memb_dim)) { if (!is.character(memb_dim)) { stop("Parameter 'memb_dim' must be a character string.") @@ -317,65 +319,41 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, stop("Parameter 'memb_dim' is not found in 'data1' or 'data2' dimension. ", "Set it to NULL if there is no member dimension.") } - if (memb_dim %in% names(dim(data1))) { - if (dim(data1)[memb_dim] == 1) { - data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) - } - } - if (memb_dim %in% names(dim(data2))) { - if (dim(data2)[memb_dim] == 1) { - data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) - } - } - # Add NA to fill member_dim - if (memb_dim %in% names(dim(data1)) & memb_dim %in% names(dim(data2))) { + if ((memb_dim %in% names(dim(data1)) & memb_dim %in% names(dim(data2)))) { if (dim(data1)[memb_dim] != dim(data2)[memb_dim]) { - if (dim(data1)[memb_dim] > dim(data2)[memb_dim]) { - data2 <- Apply(list(data2), target_dims = memb_dim, - fun = function(x, length_new_dim) { - return(c(x, rep(NA, length_new_dim - length(x)))) - }, length_new_dim = dim(data1)[memb_dim], - output_dims = memb_dim)$output1 + if (dim(data1)[memb_dim] == 1) { + data1 <- array(data1, dim = dim(data1)[-which(names(dim(data1)) == memb_dim)]) + } else if (dim(data2)[memb_dim] == 1) { + data2 <- array(data2, dim = dim(data2)[-which(names(dim(data2)) == memb_dim)]) } else { + memb_dim1 <- dim(data1)[memb_dim] data1 <- Apply(list(data1), target_dims = memb_dim, - fun = function(x, length_new_dim) { - return(c(x, rep(NA, length_new_dim - length(x)))) - }, length_new_dim = dim(data2)[memb_dim], - output_dims = memb_dim)$output1 + fun = function(x, memb_rep) { + return(rep(x, each = memb_rep)) + }, memb_rep = dim(data2)[memb_dim], + output_dims = memb_dim, ncores = ncores)$output1 + data2 <- Apply(list(data2), target_dims = memb_dim, + fun = function(x, memb_rep) { + return(rep(x, memb_rep)) + }, memb_rep = memb_dim1, + output_dims = memb_dim, ncores = ncores)$output1 } } } } - - # Find common dims and remove the ones not needed + ## data1 and data2 (2) name_data1 <- sort(names(dim(data1))) name_data2 <- sort(names(dim(data2))) - commondims <- name_data1[name_data1 %in% name_data2] - commondims <- commondims[-which(commondims == time_dim)] + name_data1 <- name_data1[-which(name_data1 %in% c(time_dim, memb_dim))] + name_data2 <- name_data2[-which(name_data2 %in% c(time_dim, memb_dim))] - if (length(commondims) != 0) { - if (!all(dim(data2)[commondims] == dim(data1)[commondims])) { - dif_common <- commondims[!dim(data2)[commondims] == dim(data1)[commondims]] - if (any(dim(data2)[dif_common] == 1)) { - dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] - dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] - dif_common <- dif_common[-which(dif_common == dim_remove)] - } - if (any(dim(data1)[dif_common] == 1)) { - dim_remove <- dif_common[which(dim(data1)[dif_common] == 1)] - dim(data1) <- dim(data1)[-which(names(dim(data1)) == dim_remove)] - dif_common <- dif_common[-which(dif_common == dim_remove)] - } - if (length(dif_common) != 0) { - stop("Parameters 'data1' and 'data2' have common dimension ", - paste0("'", dif_common, sep = "' "), "with different length and ", - "different of length 1.") - } - } + if (!identical(length(name_data1), length(name_data2)) | + !identical(dim(data1)[name_data1], dim(data2)[name_data2])) { + stop(paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.")) } - - # dates1 + ## dates1 if (!is.null(start1) & !is.null(end1)) { if (is.null(dates1)) { warning("Parameter 'dates' is NULL and the average of the ", @@ -395,7 +373,7 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, } } } - # dates2 + ## dates2 if (!is.null(start2) & !is.null(end2)) { if (is.null(dates2)) { warning("Parameter 'dates2' is NULL and the average of the ", @@ -416,18 +394,14 @@ MergeRefToExp <- function(data1, data2, dates1 = NULL, dates2 = NULL, } } - data1dims <- names(dim(data1)) - data2dims <- names(dim(data2)) - data1 <- Apply(list(data1, data2), target_dims = time_dim, fun = 'c', output_dims = time_dim, ncores = ncores)$output1 if (all(names(dim(data1)) %in% data1dims)) { - pos <- match(names(dim(data1)), data1dims) + pos <- match(data1dims, names(dim(data1))) data1 <- aperm(data1, pos) - } - if (all(names(dim(data1)) %in% data2dims)) { - pos <- match(names(dim(data1)), data2dims) + } else if (all(names(dim(data1)) %in% data2dims)) { + pos <- match(data2dims, names(dim(data1))) data1 <- aperm(data1, pos) } return(data1) diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index 67f35a9..bbca8c4 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -18,40 +18,38 @@ CST_MergeRefToExp( } \arguments{ \item{data1}{An 's2dv_cube' object with the element 'data' being a -multidimensional array of with named dimensions matching the dimensions of -parameter 'data2'. Dimensions with the same name in the 'data2' parameter -must have the same length or length 1; except for the dimension specified -with 'memb_dim', which can be different and in the result will be filled -with NA values. It can also have additional dimensions with different names -in 'data2'.} +multidimensional array with named dimensions. All dimensions must be +equal to 'data2' dimensions except for the ones specified with 'memb_dim' +and 'time_dim'. If 'start1' and 'end1' are used to subset a period, the +Dates must be stored in element '$attrs$Dates' of the object. Dates must +have same time dimensions as element 'data'.} \item{data2}{An 's2dv_cube' object with the element 'data' being a -multidimensional array of dates with named dimensions matching -the dimensions on parameter 'data1'. Dimensions with the same name in the -'data1' parameter must have the same length or length 1, except for the -dimension specified with 'memb_dim', which can be different and in the -result will be filled with NA values.} +multidimensional array of named dimensions matching the dimensions of +parameter 'data1'. All dimensions must be equal to 'data1' except for the +ones specified with 'memb_dim' and 'time_dim'. If 'start2' and 'end2' are +used to subset a period, the Dates must be stored in element '$attrs$Dates' +of the object. Dates must have same time dimensions as element 'data'.} \item{start1}{A list to define the initial date of the period to select from -data1 by providing a list of two elements: the initial date of the period +'data1' by providing a list of two elements: the initial date of the period and the initial month of the period.} \item{end1}{A list to define the final date of the period to select from -data1 by providing a list of two elements: the final day of the period and +'data1' by providing a list of two elements: the final day of the period and the final month of the period.} \item{start2}{A list to define the initial date of the period to select from -data2 by providing a list of two elements: the initial date of the period +'data2' by providing a list of two elements: the initial date of the period and the initial month of the period.} \item{end2}{A list to define the final date of the period to select from -data2 by providing a list of two elements: the final day of the period and +'data2' by providing a list of two elements: the final day of the period and the final month of the period.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name -matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested +dimension that will be used to combine the two arrays. By default, it is set +to 'ftime'. Also, it will be used to subset the data in a requested period.} \item{memb_dim}{A character string indicating the name of the member @@ -62,28 +60,29 @@ value is 'member'.} computation.} } \value{ -A 's2dv_cube' object containing the indicator in the element -\code{data}. The element \code{data} will be a multidimensional array with -dimensions named from the combination of 'data1' and 'data2'. The resulting -dimensions will be the following: all the same common dimensions between the -two arrays plus the different dimensions of each array. If there is any -different common dimension but in a dataset it has length 1, it will be added -with the maximum dimension. If memb_dim is used, the dimension of the maximum -value corresponding to memb_dim of the two data sets will be added; the -difference between the dimensions of the set members will be filled with NA. -The other elements of the 's2dv_cube' will be updated with the combined -information of both datasets. +An 's2dv_cube' object containing the indicator in the element +\code{data}. The element \code{data} will be a multidimensional array created +from the combination of 'data1' and 'data2'. The resulting array will contain +the following dimensions: the original dimensions of the input data, which are +common to both arrays and for the 'time_dim' dimension, the sum of the +corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, +regarding member dimension, two different situations can occur: (1) in the +case that one of the arrays does not have member dimension or is equal to 1, +the result will contain the repeated values of itself; (2) in the case that +both arrays have member dimension and is greater than 1, all combinations of +member dimension will be returned. The other elements of the 's2dv_cube' will +be updated with the combined information of both datasets. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to -merge past observations, or other references, to the forecast (or hindcast) -to compute the indicator. If the forecast simulation doesn't cover the -required period because it is initialized too early (e.g.: Initialization on -November 1st the forecast covers until the beginning of June next year), a -climatology (or other references) could be added at the end of the forecast -lead time to cover the desired period (e.g.: until the end of summer). +merge past observations, or other references, to the forecast (or hindcast) to +compute the indicator. If the forecast simulation doesn't cover the required +period because it is initialized too early (e.g.: Initialization on November +1st the forecast covers until the beginning of June next year), a climatology +(or other references) could be added at the end of the forecast lead time to +cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), @@ -92,7 +91,7 @@ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1994","\%d-\%m-\%Y", tz = 'UTC'), "day")) dim(data_dates) <- c(ftime = 154, sdate = 2) data <- NULL -data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member= 2)) +data$data <- array(1:(2*154*2), c(ftime = 154, sdate = 2, member = 2)) data$attrs$Dates<- data_dates class(data) <- 's2dv_cube' ref_dates <- seq(as.Date("01-01-1993", "\%d-\%m-\%Y", tz = 'UTC'), diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index bd79c2f..e22b52d 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -19,75 +19,76 @@ MergeRefToExp( ) } \arguments{ -\item{data1}{A multidimensional array of with named dimensions matching the -dimensions of parameter 'data2'. Dimensions with the same name in the -'data2' parameter must have the same length or length 1; except for the -dimension specified with 'memb_dim', which can be different and in the -result will be filled with NA values. It can also have additional dimensions -with different names in 'data2'.} +\item{data1}{A multidimensional array with named dimensions. All dimensions +must be equal to 'data2' dimensions except for the ones specified with +'memb_dim' and 'time_dim'.} -\item{data2}{A multidimensional array of dates with named dimensions matching -the dimensions on parameter 'data1'. Dimensions with the same name in the -'data1' parameter must have the same length or length 1, except for the -dimension specified with 'memb_dim', which can be different and in the -result will be filled with NA values.} +\item{data2}{A multidimensional array of named dimensions matching the +dimensions of parameter 'data1'. All dimensions must be equal to 'data1' +except for the ones specified with 'memb_dim' and 'time_dim'.} \item{dates1}{A multidimensional array of dates with named dimensions matching -the temporal dimensions on parameter 'data1'.} +the temporal dimensions of parameter 'data1'. The common dimensions must be +equal to 'data1' dimensions.} \item{dates2}{A multidimensional array of dates with named dimensions matching -the temporal dimensions on parameter 'data2'.} +the temporal dimensions on parameter 'data2'. The common dimensions must be +equal to 'data2' dimensions.} \item{start1}{A list to define the initial date of the period to select from -data1 by providing a list of two elements: the initial date of the period -and the initial month of the period.} +'data1' by providing a list of two elements: the initial date of the period +and the initial month of the period. The initial date of the period must be +included in the 'dates1' array.} \item{end1}{A list to define the final date of the period to select from -data1 by providing a list of two elements: the final day of the period and -the final month of the period.} +'data1' by providing a list of two elements: the final day of the period and +the final month of the period. The final date of the period must be +included in the 'dates1' array.} \item{start2}{A list to define the initial date of the period to select from -data2 by providing a list of two elements: the initial date of the period -and the initial month of the period.} +'data2' by providing a list of two elements: the initial date of the period +and the initial month of the period. The initial date of the period must be +included in the 'dates2' array.} \item{end2}{A list to define the final date of the period to select from -data2 by providing a list of two elements: the final day of the period and -the final month of the period.} +'data2' by providing a list of two elements: the final day of the period and +the final month of the period. The final date of the period must be +included in the 'dates2' array.} \item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'ftime'. More than one dimension name -matching the dimensions provided in the object \code{data$data} can be -specified. This dimension is required to subset the data in a requested +dimension that will be used to combine the two arrays. By default, it is set +to 'ftime'. Also, it will be used to subset the data in a requested period.} \item{memb_dim}{A character string indicating the name of the member -dimension. If the data are not ensemble ones, set as NULL. The default -value is 'member'.} +dimension. If the 'data1' and 'data2' have no member dimension, set it as +NULL. It is set as 'member' by default.} \item{ncores}{An integer indicating the number of cores to use in parallel computation.} } \value{ -A multidimensional array with dimensions named from the combination of -'data1' and 'data2'. The resulting dimensions will be the following: all the -same common dimensions between the two arrays plus the different dimensions of -each array. If there is any different common dimension but in a dataset it has -length 1, it will be added with the maximum dimension. If memb_dim is used, -the dimension of the maximum value corresponding to memb_dim of the two data -sets will be added; the difference between the dimensions of the set members -will be filled with NA. +A multidimensional array created from the combination of 'data1' and +'data2'. The resulting array will contain the following dimensions: the +original dimensions of the input data, which are common to both arrays and for +the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' +and 'data2'. If 'memb_dim' is not null, regarding member dimension, two +different situations can occur: (1) in the case that one of the arrays does +not have member dimension or is equal to 1, the result will contain the +repeated values of itself; (2) in the case that both arrays have member +dimension and is greater than 1, all combinations of member dimension will be +returned. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from June 21st to September 21st). If the initialization forecast date is later than the one required for the indicator (e.g.: July 1st), the user may want to -merge past observations, or other reference, to the forecast (or hindcast) to -compute the indicator. The function \code{MergeObs2Exp} takes care of this -steps. If the forecast simulation doesn't cover the required period because it -is initialized too early (e.g.: Initialization on November 1st the forecast -covers until the beginning of June next year), a climatology (or other -references) could be added at the end of the forecast lead time to cover the -desired period (e.g.: until the end of summer). +merge past observations, or other references, to the forecast (or hindcast) to +compute the indicator. If the forecast simulation doesn't cover the required +period because it is initialized too early (e.g.: Initialization on November +1st the forecast covers until the beginning of June next year), a climatology +(or other references) could be added at the end of the forecast lead time to +cover the desired period (e.g.: until the end of summer). } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), diff --git a/tests/testthat/test-MergeRefToExp.R b/tests/testthat/test-MergeRefToExp.R index effae50..57cd425 100644 --- a/tests/testthat/test-MergeRefToExp.R +++ b/tests/testthat/test-MergeRefToExp.R @@ -7,16 +7,16 @@ dates_data1 <- c(seq(as.Date("11-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("20-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) dim(dates_data1) <- c(ftime = 10, sdate = 2) cube1 <- NULL -cube1$data <- array(1:(2*10*2), c(ftime = 10, sdate = 2, member= 2)) +cube1$data <- array(1:(2*10*2), c(ftime = 10, sdate = 2, member = 2)) cube1$attrs$Dates <- dates_data1 class(cube1) <- 's2dv_cube' ref_dates1 <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), - as.Date("10-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), - seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), - as.Date("10-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) + as.Date("10-07-1993","%d-%m-%Y", tz = 'UTC'), "day"), + seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), + as.Date("10-07-1994","%d-%m-%Y", tz = 'UTC'), "day")) dim(ref_dates1) <- c(ftime = 10, sdate = 2) cube_ref <- NULL -cube_ref$data <- array(1001:1700, c(ftime = 10, sdate = 2)) +cube_ref$data <- array(1001:1020, c(ftime = 10, sdate = 2)) cube_ref$attrs$Dates <- ref_dates1 class(cube_ref) <- 's2dv_cube' start1 <- list(3, 7) @@ -25,8 +25,13 @@ start2 <- list(11, 7) end2 <- list(15, 7) # dat1 -ref1 <- array(1001:1700, c(ftime = 10, sdate = 2)) -data1 <- array(1:(2*154*2), c(ftime = 11, sdate = 2, member = 2)) +ref1 <- array(1001:1020, c(ftime = 10, sdate = 2, member = 1)) +data1 <- array(1:40, c(ftime = 10, sdate = 2, member = 2)) + + +# dat2 +ref2 <- array(1001:1015, c(ftime = 5, sdate = 1, member = 3)) +data2 <- array(1:6, c(ftime = 3, sdate = 1, member = 2)) ########################################################################### test_that("1. Input checks", { @@ -70,16 +75,23 @@ test_that("1. Input checks", { ) # common dimensions expect_error( - MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, var = 3)), + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, dat = 3)), data2 = array(1:16, c(sdate = 2, ftime = 2, var = 4)), memb_dim = NULL), - paste0("Parameters 'data1' and 'data2' have common dimension 'var' ", - "with different length and different of length 1.") + paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.") + ) + expect_error( + MergeRefToExp(data1 = array(1:12, c(sdate = 2, ftime = 2, dat = 1)), + data2 = array(1:16, c(sdate = 2, ftime = 2)), + memb_dim = NULL), + paste0("Parameter 'data1' and 'data2' must have same length of ", + "all dimensions except 'memb_dim'.") ) # dates expect_warning( MergeRefToExp(data1 = array(1:4, c(sdate = 2, ftime = 2, lat = 1)), - data2 = array(1:16, c(sdate = 2, ftime = 2, lat = 4)), + data2 = array(1:16, c(sdate = 2, ftime = 2, lat = 1)), memb_dim = NULL, start1 = list(1, 1), end1 = list(3, 1), start2 = NULL, end2 = NULL), paste0("Parameter 'dates' is NULL and the average of the ", @@ -106,7 +118,7 @@ test_that("1. Input checks", { ########################################################################### -test_that("2. Output checks", { +test_that("2. Output checks: CST_MergeRefToExp", { res1 <- CST_MergeRefToExp(data1 = cube_ref, data2 = cube1, start1 = start1, end1 = end1, start2 = start2, end2 = end2) @@ -125,11 +137,54 @@ test_that("2. Output checks", { dim(res1$data)[c('ftime', 'sdate')], dim(res1$attrs$Dates) ) + # data + expect_equal( + res1$data[1:8,,1], + res1$data[1:8,,2] + ) }) ########################################################################### -test_that("3. Output checks", { +test_that("3. Output checks: MergeRefToExp", { + # Minimum dimensions + expect_equal( + MergeRefToExp(data1 = array(1:2, c(ftime = 2)), + data2 = array(1, c(ftime = 1)), memb_dim = NULL), + array(c(1,2,1), dim = c(ftime = 3)) + ) + # res2 + res2 <- MergeRefToExp(data1 = ref1, data2 = data1) + ## dims + expect_equal( + dim(res2), + c(ftime = 20, sdate = 2, member = 2) + ) + ## data + expect_equal( + res2[,1,], + array(c(1001:1010, 1:10, 1001:1010, 21:30), dim = c(ftime = 20, member = 2)) + ) + # res3: multiple different members + res3 <- MergeRefToExp(data1 = ref2, data2 = data2) + ## dims + expect_equal( + dim(res3), + c(ftime = 8, sdate = 1, member = 6) + ) + expect_equal( + as.vector(res3[1:5, 1, ]), + c(rep(1001:1005, 2), rep(1006:1010, 2), rep(1011:1015, 2)) + ) + expect_equal( + as.vector(res3[6:8, 1, ]), + rep(c(1:3, 4:6), 3) + ) +}) + +########################################################################### + +test_that("3. Output checks: Dates", { data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-12-1993", "%d-%m-%Y", tz = 'UTC'), "day"), seq(as.Date("01-07-1994", "%d-%m-%Y", tz = 'UTC'), @@ -152,7 +207,7 @@ test_that("3. Output checks", { expect_equal(CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(21, 6), end1 = list(30, 6), start2 = list(1, 7), end2 = list(21, 9))$attrs$Dates, - SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) + SelectPeriodOnDates(ref_dates, start = list(21, 6), end = list(21,9))) ) output <- array(c(1172:1181, 1:83, 1537:1546, 155:237, 1172:1181, 309:391, @@ -168,9 +223,9 @@ test_that("3. Output checks", { # issue 13: One lead time data_dates <- c(as.Date("01-06-1993", "%d-%m-%Y", tz = 'UTC'), - as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), + as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), as.Date("01-06-1994", "%d-%m-%Y", tz = 'UTC'), - as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) + as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) dim(data_dates) <- c(ftime = 2, sdate = 2) ref_dates <- c(as.Date("01-05-1993", "%d-%m-%Y", tz = 'UTC'), @@ -193,7 +248,6 @@ test_that("3. Output checks", { as.Date("01-07-1994","%d-%m-%Y", tz = 'UTC')) dim(res_dates) <- c(ftime = 3, sdate = 2) - expect_equal( CST_MergeRefToExp(data1 = ref, data2 = data, start1 = list(1, 5), end1 = list(31, 5), start2 = list(1, 6), @@ -223,12 +277,11 @@ test_that("4. Test Seasonal", { dates <- c(dates, format(seq(as.Date(paste0("01-04-",year), "%d-%m-%Y", tz = 'UTC'), as.Date(paste0("01-11-",year), "%d-%m-%Y", - tz = 'UTC'), "day"), - "%Y-%m-%d")) + tz = 'UTC'), "day"), "%Y-%m-%d")) } dates <- as.Date(dates, tz = 'UTC') - dim.dates <- c(ftime=215, sweek = 1, sday = 1, - sdate=(hcst.endyear-hcst.inityear)+1) + dim.dates <- c(ftime = 215, sweek = 1, sday = 1, + sdate = (hcst.endyear - hcst.inityear) + 1) dim(dates) <- dim.dates ref <- NULL ref$data <- array(1:(215*25), c(ftime = 215, sdate = 25)) -- GitLab From 0ed82bc291fb826176be5aac5e5176890f19071d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 19 Jul 2023 12:34:58 +0200 Subject: [PATCH 38/87] Include publications: add new section in README and improve DESCRIPTION --- DESCRIPTION | 9 +++++---- README.md | 9 ++++++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 77a2156..c40d147 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,10 +21,11 @@ Description: Set of generalised tools for the flexible computation of climate predictions, but its methods are also applicable to other time-scales, provided the dimensional structure of the input is maintained. Additionally, the outputs of the functions in this package are compatible with 'CSTools'. - This package was developed in the context of H2020 MED-GOLD (776467) and - S2S4E (776787) projects. - Lledó et al. (2019) . - Pérez-Zanón et al. (2023) . + This package is described in Pérez-Zanón et al. (2023) + and it was developed in the context of + H2020 MED-GOLD (776467) and S2S4E (776787) projects. See Lledó et al. (2019) + and Chou et al., 2023 + for details. Depends: R (>= 3.6.0) Imports: diff --git a/README.md b/README.md index 6607398..360ecbe 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,17 @@ CSIndicators Set of generalised tools for the flexible computation of climate related indicators defined by the user. Each method represents a specific mathematical approach which is combined with the possibility to select an arbitrary time period to define the indicator. This enables a wide range of possibilities to tailor the most suitable indicator for each particular climate service application (agriculture, food security, energy, water management…). This package is intended for sub-seasonal, seasonal and decadal climate predictions, but its methods are also applicable to other time-scales, provided the dimensional structure of the input is maintained. Additionally, the outputs of the functions in this package are compatible with [CSTools](https://earth.bsc.es/gitlab/external/cstools). -A scientific publication was published in the Climate Services Journal, and it can be cited as follows: +How to cite +----------- > Pérez-Zanón, N., Ho, A. Chou, C., Lledó, L., Marcos-Matamoros, R., Rifà, E. and González-Reviriego, N. (2023). CSIndicators: Get tailored climate indicators for applications in your sector. Climate Services. https://doi.org/10.1016/j.cliser.2023.100393 +For details in the methodologies see: + +> Pérez-Zanón, N., Caron, L.-P., Terzago, S., Van Schaeybroeck, B., Lledó, L., Manubens, N., Roulin, E., Alvarez-Castro, M. C., Batté, L., Bretonnière, P.-A., Corti, S., Delgado-Torres, C., Domínguez, M., Fabiano, F., Giuntoli, I., von Hardenberg, J., Sánchez-García, E., Torralba, V., and Verfaillie, D.: Climate Services Toolbox (CSTools) v4.0: from climate forecasts to climate forecast information, Geosci. Model Dev., 15, 6115–6142, https://doi.org/10.5194/gmd-15-6115-2022, 2022. +Chou, C., R. Marcos-Matamoros, L. Palma Garcia, N. Pérez-Zanón, M. Teixeira, S. Silva, N. Fontes, A. Graça, A. Dell'Aquila, S. Calmanti and N. González-Reviriego (2023). Advanced seasonal predictions for vine management based on bioclimatic indicators tailored to the wine sector. Climate Services, 30, 100343, https://doi.org/10.1016/j.cliser.2023.100343. +Lledó, Ll., V. Torralba, A. Soret, J. Ramon and F.J. Doblas-Reyes (2019). Seasonal forecasts of wind power generation. Renewable Energy, 143, 91-100, https://doi.org/10.1016/j.renene.2019.04.135. + Installation ------------ -- GitLab From 008f379d1a844bd713fad3defe5bea6d3935e43c Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 20 Jul 2023 13:20:42 +0200 Subject: [PATCH 39/87] Correct few examples due to time_dim value and add ClimProjDiags in Importd --- DESCRIPTION | 3 ++- R/PeriodMean.R | 4 ++-- R/WindCapacityFactor.R | 7 ++++--- R/WindPowerDensity.R | 7 ++++--- man/PeriodMean.Rd | 4 ++-- man/WindCapacityFactor.Rd | 4 ++-- man/WindPowerDensity.Rd | 4 ++-- 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4e20983..835c96b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,8 @@ Depends: R (>= 3.6.0) Imports: multiApply (>= 2.1.1), - stats + stats, + ClimProjDiags Suggests: testthat, CSTools, diff --git a/R/PeriodMean.R b/R/PeriodMean.R index abc585a..db6a78f 100644 --- a/R/PeriodMean.R +++ b/R/PeriodMean.R @@ -147,7 +147,7 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, #'indicator in the element \code{data}. #' #'@examples -#'data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +#'data <- array(rnorm(45), dim = c(member = 7, sdate = 4, time = 3)) #'Dates <- c(seq(as.Date("2000-11-01", "%Y-%m-%d", tz = "UTC"), #' as.Date("2001-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), #' seq(as.Date("2001-11-01", "%Y-%m-%d", tz = "UTC"), @@ -156,7 +156,7 @@ CST_PeriodMean <- function(data, start = NULL, end = NULL, #' as.Date("2003-01-01", "%Y-%m-%d", tz = "UTC"), by = "month"), #' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), #' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) -#'dim(Dates) <- c(sdate = 4, ftime = 3) +#'dim(Dates) <- c(sdate = 4, time = 3) #'SA <- PeriodMean(data, dates = Dates, start = list(01, 12), end = list(01, 01)) #' #'@import multiApply diff --git a/R/WindCapacityFactor.R b/R/WindCapacityFactor.R index 760dba0..76092dd 100644 --- a/R/WindCapacityFactor.R +++ b/R/WindCapacityFactor.R @@ -80,7 +80,8 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II WindCapacity <- WindCapacityFactor(wind = wind$data, IEC_class = IEC_class, dates = wind$attrs$Dates, start = start, - end = end, ncores = ncores) + end = end, time_dim = time_dim, + ncores = ncores) wind$data <- WindCapacity wind$dims <- dim(WindCapacity) @@ -146,7 +147,7 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II #' #'@examples #'wind <- array(rweibull(n = 32100, shape = 2, scale = 6), -#' c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) +#' c(member = 5, sdate = 3, time = 214, lon = 2, lat = 5)) #' #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), @@ -154,7 +155,7 @@ CST_WindCapacityFactor <- function(wind, IEC_class = c("I", "I/II", "II", "II/II #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'dim(Dates) <- c(sdate = 3, time = 214) #' #'WCF <- WindCapacityFactor(wind, IEC_class = "III", dates = Dates, #' start = list(21, 4), end = list(21, 6)) diff --git a/R/WindPowerDensity.R b/R/WindPowerDensity.R index 3eba59a..3944e3d 100644 --- a/R/WindPowerDensity.R +++ b/R/WindPowerDensity.R @@ -68,7 +68,8 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, } WindPower <- WindPowerDensity(wind = wind$data, ro = ro, dates = wind$attrs$Dates, start = start, - end = end, ncores = ncores) + end = end, time_dim = time_dim, + ncores = ncores) wind$data <- WindPower wind$dims <- dim(WindPower) if ('Variable' %in% names(wind$attrs)) { @@ -122,14 +123,14 @@ CST_WindPowerDensity <- function(wind, ro = 1.225, start = NULL, end = NULL, #' #'@examples #'wind <- array(rweibull(n = 32100, shape = 2, scale = 6), -#' c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) +#' c(member = 5, sdate = 3, time = 214, lon = 2, lat = 5)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'dim(Dates) <- c(sdate = 3, ftime = 214) +#'dim(Dates) <- c(sdate = 3, time = 214) #'WPD <- WindPowerDensity(wind, dates = Dates, start = list(21, 4), #' end = list(21, 6)) #' diff --git a/man/PeriodMean.Rd b/man/PeriodMean.Rd index cd1fcef..9637d58 100644 --- a/man/PeriodMean.Rd +++ b/man/PeriodMean.Rd @@ -59,7 +59,7 @@ this function: } } \examples{ -data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) +data <- array(rnorm(45), dim = c(member = 7, sdate = 4, time = 3)) Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), @@ -68,7 +68,7 @@ Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) -dim(Dates) <- c(sdate = 4, ftime = 3) +dim(Dates) <- c(sdate = 4, time = 3) SA <- PeriodMean(data, dates = Dates, start = list(01, 12), end = list(01, 01)) } diff --git a/man/WindCapacityFactor.Rd b/man/WindCapacityFactor.Rd index a0a7ce5..0b6b958 100644 --- a/man/WindCapacityFactor.Rd +++ b/man/WindCapacityFactor.Rd @@ -66,7 +66,7 @@ below). } \examples{ wind <- array(rweibull(n = 32100, shape = 2, scale = 6), - c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) + c(member = 5, sdate = 3, time = 214, lon = 2, lat = 5)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), @@ -74,7 +74,7 @@ Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -dim(Dates) <- c(sdate = 3, ftime = 214) +dim(Dates) <- c(sdate = 3, time = 214) WCF <- WindCapacityFactor(wind, IEC_class = "III", dates = Dates, start = list(21, 4), end = list(21, 6)) diff --git a/man/WindPowerDensity.Rd b/man/WindPowerDensity.Rd index 8b72009..9ca3234 100644 --- a/man/WindPowerDensity.Rd +++ b/man/WindPowerDensity.Rd @@ -58,14 +58,14 @@ it will give inaccurate results if used with period means. } \examples{ wind <- array(rweibull(n = 32100, shape = 2, scale = 6), - c(member = 5, sdate = 3, ftime = 214, lon = 2, lat = 5)) + c(member = 5, sdate = 3, time = 214, lon = 2, lat = 5)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -dim(Dates) <- c(sdate = 3, ftime = 214) +dim(Dates) <- c(sdate = 3, time = 214) WPD <- WindPowerDensity(wind, dates = Dates, start = list(21, 4), end = list(21, 6)) -- GitLab From 443e5075e9a85a59a4974de5d65469d29e09a630 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 20 Jul 2023 18:16:56 +0200 Subject: [PATCH 40/87] Add new function PeriodPET, improve documentation and add unit tests --- R/PeriodPET.R | 297 +++++++++++++ R/PeriodSPEI.R | 726 ++++++++++++------------------- tests/testthat/test-PeriodPET.R | 178 ++++++++ tests/testthat/test-PeriodSPEI.R | 163 +++++-- 4 files changed, 868 insertions(+), 496 deletions(-) create mode 100644 R/PeriodPET.R create mode 100644 tests/testthat/test-PeriodPET.R diff --git a/R/PeriodPET.R b/R/PeriodPET.R new file mode 100644 index 0000000..fa4079c --- /dev/null +++ b/R/PeriodPET.R @@ -0,0 +1,297 @@ +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. Reference evapotranspiration (ETo) is the amount of +#'evaporation and transpiration from a reference vegetation of grass. They are +#'usually considered equivalent. This set of functions calculate PET or ETo +#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#' +#'@param data A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +#' Variable 'prlr' is always needed. The units for temperature variables +#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +#' precipitation ('prlr') need to be in mm/month. +#'@param dates An array of temporal dimensions containing the Dates of +#' 'exp'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'exp'. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tasmin and tasmax; and 'thornthwaite', that requires variable +#' 'tas'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#' +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#' +#'lat <- c(40) +#' +#'exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#' +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@import SPEI +#'@import lubridate +#'@import multiApply +#'@export +PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + + # Initial checks + ## data + if (!inherits(data, 'list')) { + stop("Parameter 'data' needs to be a named list with the needed variables.") + } + if (is.null(names(data))) { + stop("Parameter 'data' needs to be a named list with the variable names.") + } + if (any(sapply(data, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'data' needs to be a list of arrays with dimension names.") + } + dims <- lapply(data, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'data' variables need to have the same dimensions.") + } + # lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be numeric.") + } + if (!lat_dim %in% names(dims[[1]])) { + stop("Parameter 'data' must have 'lat_dim' dimension.") + } + if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'data'.") + } + + # data (2) + if (all(c('tasmin', 'tasmax', 'prlr') %in% names(data))) { + # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (!(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") + pet_method <- 'hargreaves_modified' + } + } else if (all(c('tasmin', 'tasmax') %in% names(data))) { + if (!(pet_method %in% c('hargreaves'))) { + warning("Parameter 'pet_method' will be set as 'hargreaves'.") + pet_method <- 'hargreaves' + } + } else if (c('tas') %in% names(data)) { + # thornthwaite: 'tas' (mean), 'lat' + if (!(pet_method == 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method <- 'thornthwaite' + } + } else { + stop("Parameter 'data' needs to be a named list with accepted ", + "variable names. See documentation.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(data, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!all(sapply(data, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + ## lat_dim + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!all(sapply(data, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'data' dimension.") + } + # dates + if (is.null(dates)) { + stop("Parameter 'dates' is missing, dates must be provided.") + } + if (!(is.Date(dates)) & !(is.POSIXct(dates))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (!all(dim(data[[1]])[c(time_dim, leadtime_dim)] == + dim(dates)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates' needs to have the same length as 'time_dim' ", + "and 'leadtime_dim' as 'data'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # complete dates + dates_monthly <- .datesmask(dates) + + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) + + # extract mask of NA locations to return to NA the final result + mask_na <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + varnames <- c('tasmax', 'tasmin') + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + varnames <- c('tasmax', 'tasmin', 'prlr') + mask_na[which(is.na(data$tasmax))] <- 0 + mask_na[which(is.na(data$tasmin))] <- 0 + mask_na[which(is.na(data$prlr))] <- 0 + } else if (pet_method == 'thornthwaite') { + varnames <- c('tas') + mask_na[which(is.na(data$tas))] <- 0 + } + + # replace NA with 0 + for (dd in 1:length(data)) { + data[[dd]][which(is.na(data[[dd]]))] <- 0 + } + + # prepare data + target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) + pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), + fun = .pet, + dates_monthly = dates_monthly, pet_method = pet_method, + leadtime_dim = leadtime_dim, time_dim = time_dim, + output_dims = c(leadtime_dim, time_dim), + ncores = ncores)$output1 + # reorder dims in pet_estimated + pos <- match(names(dim(data[[1]])), names(dim(pet))) + pet <- aperm(pet, pos) + + # restore original NAs from mask_na + pet[which(mask_na == 0)] <- NA + + return(pet) +} + +.pet <- function(lat_mask, data2, data3 = NULL, data4 = NULL, + dates_monthly, pet_method = 'hargreaves', + leadtime_dim = 'time', time_dim = 'syear') { + + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(dates_monthly)) + count <- 1 + for (dd in 1:length(dates_monthly)) { + if (dates_monthly[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = FALSE, verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + + if (pet_method == 'hargreaves_modified') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, + verbose = FALSE) + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + + if (pet_method == 'thornthwaite') { + pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, + verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(dates_monthly == 1)], dim = dims) + } + return(pet) +} + + +.datesmask <- function(dates) { + ini <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep = '-')) + end <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep = '-')) + daily <- as.Date(ini:end) + monthly <- daily[which(lubridate::day(daily) == 1)] + dates_mask <- array(0, dim = length(monthly)) + for (dd in 1:length(dates)) { + ii <- which(monthly == as.Date(paste(lubridate::year(dates[dd]), + lubridate::month(dates[dd]), + 01, sep = '-'))) + dates_mask[ii] <- 1 + } + return(dates_mask) +} \ No newline at end of file diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 8a0b7e2..9bc1660 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -1,14 +1,42 @@ #'Compute the Standardised Precipitation-Evapotranspiration Index -#' +#' #'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) #'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with #'respect to normal conditions in a variety of natural and managed systems such -#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -#'using monthly (or weekly) precipitation as the input data. The SPEI uses the -#'monthly (or weekly) difference between precipitation and pet. This represents -#'a simple climatic water balance which is calculated at different time scales -#'to obtain the SPEI. +#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +#'is to compare the highest possible evapotranspiration with the current water +#'availability. The SPEI uses the monthly (or weekly) difference between +#'precipitation and potential evapotranspiration. This represents a simple +#'climatic water balance which is calculated at different time scales to obtain +#'the SPEI. This function is build to work be compatible with other tools in +#'that work with 's2dv_cube' object class. The input data must be this object +#'class. If you don't work with 's2dv_cube', see PeriodSPEI. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. On the one hand, the model to be used to calculate potential +#'evapotranspiration is specified with the pet_method parameter (hargreaves, +#'hargraves modified or thornwhite). On the other hand, to choose the time scale +#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +#'accum parameter, where you must indicate the number of time steps you want to +#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'elapsed time steps, there will be no complete accumulations until reaching the +#'time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. Also, there is a parameter to specify if the +#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +#'probability distribution to transform the original values to standardized +#'units that are comparable in space and time and at different SPEI time scales. +#'The na.rm parameter is a logical parameter used to decide whether to remove +#'the NA values from the data before doing the calculation. It must be taken +#'into account that if na.rm == FALSE and there is some NA value in the specific +#'coordinates which the SPEI is computed, standardization cannot be carried out +#'for those coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. However, when na.rm == TRUE, if the amount of data for +#'those specific coordinates is smaller than 4, it will not be possible to carry +#'out because we will not have enough data and the result will be also filled +#'with NAs for that coordinates. #' #'@param exp A named list with the needed \code{s2dv_cube} objects containing #' the seasonal forecast experiment in the data element for each variable. @@ -32,7 +60,9 @@ #'@param lat_dim A character string indicating the name of the latitudinal #' dimension. By default it is set by 'latitude'. #'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -68,33 +98,31 @@ #' default. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: -#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -#' default. -#'@param fit A character string indicating the name of the method used for -#' computing the distribution function parameters The accepteed names are: -#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). #'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). #'@param na.rm A logical value indicating whether NA values should be removed -#' from data. It is FALSE by default. +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' -#'@return A list with elements: -#'\itemize{ -#' \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' -#' in element data from 'exp' array with the same dimensions as 'exp'. -#' If it is FALSE, it is an array with the accumulated values of PET -#' minus 'prlr' data.} -#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not -#' NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in -#' element 'data'. If 'standarization' is FALSE, only the accumulated -#' values of PET minus 'prlr' is returned.} -#' \item{'params', returned if 'standarization' is TRUE, it contains the -#' parameters used for the standarization of 'exp' that are used for -#' computing the 'SPEI' for 'exp_cor'.} -#' } +#'@return An 's2dv_cube' object containing the SPEI multidimensional array in +#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +#'will be provided. The parameters of the standardization will only be returned +#'if 'return_params' is TRUE. The SPEI will only be computed if +#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic +#'water balance (precipitation minus evapotranspiration) will be returned. The +#'resultant arrays will have the same dimensions as the initial input data. The +#'other elements in the 's2dv_cube' will be updated with the combined +#'information of the input data arrays. #' #'@examples #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, @@ -147,9 +175,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm', + distribution = 'log-Logistic', param_error = -9999, handle_infinity = FALSE, - na.rm = FALSE, ncores = NULL) { + return_params = FALSE, na.rm = FALSE, + ncores = NULL) { # Check 's2dv_cube' if (is.null(exp)) { @@ -199,65 +228,98 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, standardization = standardization, cross_validation = cross_validation, pet_method = pet_method, method = method, - distribution = distribution, fit = fit, - param_error = param_error, - handle_infinity = handle_infinity, na.rm = na.rm, + distribution = distribution, + param_error = param_error, handle_infinity = handle_infinity, + return_params = return_params, na.rm = na.rm, ncores = ncores) - if (is.null(exp_cor)) { + + if (!is.null(exp_cor)) { + source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) source_files <- lapply(exp, function(x) {x$attrs$source_files}) - suppressWarnings( - res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), - Dates = exp[[1]]$attrs$Dates, - source_files = source_files, - when = Sys.time()) - ) - if (standardization) { - return(list(exp = res_exp, params = res[[2]])) - } else { - return(res_exp) - } + source_files <- c(exp = source_files, exp_cor = source_files_expcor) + coords <- exp_cor[[1]]$coords + Dates <- exp_cor[[1]]$attrs$Dates + metadata <- exp_cor[[1]]$attrs$Variable$metadata + metadata_names <- names(metadata) } else { - source_files_exp <- lapply(exp, function(x) {x$attrs$source_files}) + source_files <- lapply(exp, function(x) {x$attrs$source_files}) + coords <- exp[[1]]$coords + Dates <- exp[[1]]$attrs$Dates + metadata <- exp[[1]]$attrs$Variable$metadata + metadata_names <- names(metadata) + } + + if (standardization) { + varname <- 'SPEI' + } else { + varname <- 'Precipitation minus accumulated PET' + } + + if (return_params & standardization) { + metadata_names <- intersect(names(dim(res[[1]])), metadata_names) suppressWarnings( - res_exp <- CSTools::s2dv_cube(data = res[[1]], coords = exp[[1]]$coords, - varName = c('SPEI'), - Dates = exp[[1]]$attrs$Dates, - source_files = source_files_exp, - when = Sys.time()) + res[[1]] <- CSTools::s2dv_cube(data = res[[1]], coords = coords, + varName = varname, + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) ) - source_files_expcor <- lapply(exp_cor, function(x) {x$attrs$source_files}) + return(list(spei = res[[1]], params = res[[2]])) + } else { + metadata_names <- intersect(names(dim(res)), metadata_names) suppressWarnings( - res_expcor <- CSTools::s2dv_cube(data = res[[2]], coords = exp_cor[[1]]$coords, - varName = c('SPEI'), - Dates = exp_cor[[1]]$attrs$Dates, - source_files = source_files_expcor, - when = Sys.time()) + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = varname, + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) ) - - if (standardization) { - return(list(exp = res_exp, exp_cor = res_expcor, params = res[[3]])) - } else { - return(list(exp = res_exp, exp_cor = res_expcor)) - } + return(res) } - return(res) } #'Compute the Standardised Precipitation-Evapotranspiration Index -#' +#' #'Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) #'that is a multiscalar drought index based on climatic data. It can be used for #'determining the onset, duration and magnitude of drought conditions with #'respect to normal conditions in a variety of natural and managed systems such -#'as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -#'using monthly (or weekly) precipitation as the input data. The SPEI uses the -#'monthly (or weekly) difference between precipitation and pet. This represents -#'a simple climatic water balance which is calculated at different time scales -#'to obtain the SPEI. +#'as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +#'is to compare the highest possible evapotranspiration with the current water +#'availability. The SPEI uses the monthly (or weekly) difference between +#'precipitation and potential evapotranspiration. This represents a simple +#'climatic water balance which is calculated at different time scales to obtain +#'the SPEI. #' -#'@param exp A named list with the needed \code{s2dv_cube} objects containing +#'Next, some specifications for the calculation of this indicator will be +#'discussed. On the one hand, the model to be used to calculate potential +#'evapotranspiration is specified with the pet_method parameter (hargreaves, +#'hargraves modified or thornwhite). On the other hand, to choose the time scale +#'in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +#'accum parameter, where you must indicate the number of time steps you want to +#'accumulate throughout leadtime_dim. Since the accumulation is done for the +#'elapsed time steps, there will be no complete accumulations until reaching the +#'time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. Also, there is a parameter to specify if the +#'standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +#'probability distribution to transform the original values to standardized +#'units that are comparable in space and time and at different SPEI time scales. +#'The na.rm parameter is a logical parameter used to decide whether to remove +#'the NA values from the data before doing the calculation. It must be taken +#'into account that if na.rm == FALSE and there is some NA value in the specific +#'coordinates which the SPEI is computed, standardization cannot be carried out +#'for those coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. However, when na.rm == TRUE, if the amount of data for +#'those specific coordinates is smaller than 4, it will not be possible to carry +#'out because we will not have enough data and the result will be also filled +#'with NAs for that coordinates. +#' +#'@param exp A named list with multidimensional array objects containing #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted @@ -269,7 +331,7 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param dates_exp An array of temporal dimensions containing the Dates of #' 'exp'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'exp'. -#'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each +#'@param exp_cor A named list with multidimensional array objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. #'@param dates_expcor An array of temporal dimensions containing the Dates of @@ -283,8 +345,10 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' computed for individual members. #'@param lat_dim A character string indicating the name of the latitudinal #' dimension. By default it is set by 'latitude'. -#'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. +#'@param accum accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be @@ -318,32 +382,31 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' If can be: 'parametric' or 'non-parametric'. #'@param distribution A character string indicating the name of the distribution #' function to be used for computing the SPEI. The accepted names are: -#' 'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -#' default. -#'@param fit A character string indicating the name of the method used for -#' computing the distribution function parameters The accepteed names are: -#' 'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default. +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). #'@param param_error A numeric value with the error accepted. #'@param handle_infinity A logical value wether to return Infinite values (TRUE) #' or not (FALSE). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. #'@param na.rm A logical value indicating whether NA values should be removed -#' from data. It is FALSE by default. +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. #'@param ncores An integer value indicating the number of cores to use in #' parallel computation. #' -#'@return A list with elements: -#'\itemize{ -#' \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from -#' 'exp' array with the same dimensions as 'exp'. If it is FALSE, it -#' is an array with the accumulated values of PET minus 'prlr' data.} -#' \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not -#' NULL. It is an array with the SPEI data from 'exp_cor'. If -#' 'standarization' is FALSE, only the accumulated values of PET minus -#' 'prlr' is returned.} -#' \item{'params', returned if 'standarization' is TRUE, it contains the -#' parameters used for the standarization of 'exp' that are used for -#' computing the 'SPEI' for 'exp_cor'.} -#'} +#'@return An 's2dv_cube' object containing the SPEI multidimensional array in +#'element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +#'will be provided. The parameters of the standardization will only be returned +#'if 'return_params' is TRUE. The SPEI will only be computed if +#''standardization' is TRUE. If 'standardization' is FALSE, only the climatic +#'water balance (precipitation minus evapotranspiration) will be returned. The +#'resultant arrays will have the same dimensions as the initial input data. #' #'@examples #'dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, @@ -391,9 +454,9 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_exp = NULL, pet_expcor = NULL, standardization = TRUE, cross_validation = FALSE, pet_method = 'hargreaves', method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm', + distribution = 'log-Logistic', param_error = -9999, handle_infinity = FALSE, - na.rm = FALSE, ncores = NULL) { + return_params = FALSE, na.rm = FALSE, ncores = NULL) { # Initial checks ## exp @@ -413,7 +476,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'exp' variables need to have the same dimensions.") } - ## exp_cor if (!is.null(exp_cor)) { if (!inherits(exp_cor, 'list')) { @@ -433,53 +495,24 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'exp_cor' variables need to have the same dimensions.") } } - - # lat - if (!is.numeric(lat)) { - stop("Parameter 'lat' must be numeric.") - } - if (!lat_dim %in% names(dims[[1]])) { - stop("Parameter 'exp' must have 'lat_dim' dimension.") - } - if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { - stop("Parameter 'lat' needs to have the same length of latitudinal", - "dimension of all the variables arrays in 'exp'.") - } - if (!is.null(exp_cor)) { - if (!lat_dim %in% names(dimscor[[1]])) { - stop("Parameter 'exp_cor' must have 'lat_dim' dimension.") - } - if (any(sapply(dimscor, FUN = function(x) x[lat_dim] != length(lat)))) { - stop("Parameter 'lat' needs to have the same length of latitudinal", - "dimension of all the variables arrays in 'exp'.") - } - } - # Variable checks ## exp (2) pet <- vector("list", 2) - if (is.null(pet_exp)) { - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!(pet_method[1] %in% c('hargreaves_modified', 'hargreaves'))) { - warning("Parameter 'pet_method' needs to be 'hargreaves' or ", - "'hargreaves_modified'. It is set to 'hargreaves'.") - pet_method[1] <- 'hargreaves' - } - } else if (all(c('tas', 'prlr') %in% names(exp))) { - # thornthwaite: 'tas' (mean), 'lat' - if (!(pet_method[1] == 'thornthwaite')) { - warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") - pet_method[1] <- 'thornthwaite' - } - } else { - stop("Parameter 'exp' needs to be a named list with accepted ", - "variable names if 'pet_exp' is not provided. See documentation.") + if (!('prlr' %in% names(exp))) { + stop("Variable 'prlr' is not included in 'exp'.") + } + ## exp_cor (2) + if (!is.null(exp_cor)) { + if (!('prlr' %in% names(exp_cor))) { + stop("Variable 'prlr' is not included in 'exp_cor'.") } - } else { - if (!('prlr' %in% names(exp))) { - stop("Variable 'prlr' is not included in 'exp'.") + if (length(pet_method) == 1) { + pet_method <- rep(pet_method, 2) } + } + + ## pet_exp + if (!is.null(pet_exp)) { if (length(dim(exp[['prlr']])) != length(dim(pet_exp))) { stop("Parameter 'pet_exp' must have the same length of all the ", "dimensions as variable 'prlr' in 'exp'.") @@ -493,53 +526,27 @@ PeriodSPEI <- function(exp, dates_exp, lat, pet_exp <- aperm(pet_exp, pos) } pet[[1]] <- pet_exp + } else if (is.null(dates_exp)) { + stop("Parameter 'dates_exp' must be provided.") } - ## exp_cor (2) + ## pet_expcor if (!is.null(exp_cor)) { - if (is.null(pet_expcor)) { - if (length(exp_cor) < 1) { - exp_cor <- NULL - } else { - if (length(pet_method) == 1) { - pet_method <- rep(pet_method, 2) - } + if (!is.null(pet_expcor)) { + if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") } - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(exp_cor))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' - if (!(pet_method[2] %in% c('hargreaves_modified', 'hargreaves'))) { - warning("Parameter 'pet_method' needs to be 'hargreaves' or ", - "'hargreaves_modified'. It is set to 'hargreaves'.") - pet_method[2] <- 'hargreaves' - } - } else if (all(c('tas', 'prlr') %in% names(exp_cor))) { - # thornthwaite: 'tas' (mean), 'lat' - if (!(pet_method[2] == 'thornthwaite')) { - warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") - pet_method[2] <- 'thornthwaite' - } - } else { - stop("Parameter 'exp_cor' needs to be a list with the needed ", - "variables if 'pet_expcor' is not provided.") - } - } else { - if (!is.null(exp_cor)) { - if (!('prlr' %in% names(exp_cor))) { - stop("Variable 'prlr' is not included in 'exp_cor'.") - } - if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { - stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") - } - if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { - stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") - } - if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { - pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) - pet_expcor <- aperm(pet_expcor, pos) - } + if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { + stop("Parameter 'pet_expcor' must have the same length of all the ", + "dimensions as variable 'prlr' in 'exp_cor'.") } + if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { + pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) + pet_expcor <- aperm(pet_expcor, pos) + } pet[[2]] <- pet_expcor + } else if (is.null(dates_expcor)) { + stop("Parameter 'dates_expcor' must be provided.") } } @@ -579,6 +586,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") } } + ## lat_dim if (!is.character(lat_dim) | length(lat_dim) != 1) { stop("Parameter 'lat_dim' must be a character string.") @@ -591,7 +599,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'lat_dim' is not found in 'exp_cor' dimension.") } } - # dates + + ## dates if (is.null(dates_exp)) { stop("Parameter 'dates_exp' is missing, dates must be provided.") } @@ -628,12 +637,41 @@ PeriodSPEI <- function(exp, dates_exp, lat, "'time_dim' and 'leadtime_dim' as 'exp_cor'.") } } + ## accum if (accum > dim(exp[[1]])[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", "loaded data has only ", dim(exp[[1]])[leadtime_dim], " months.")) } + ## ref_period + if (!is.null(ref_period)) { + if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% year(dates_exp))) { + warning("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, + indices = 1)) + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## standardization if (!is.logical(standardization)) { stop("Parameter 'standardization' must be a logical value.") @@ -666,17 +704,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'method' must be a character string containing one of ", "the following methods: 'parametric' or 'non-parametric'.") } + ## distribution if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { stop("Parameter 'distribution' must be a character string containing one ", "of the following distributions: 'log-Logistic', 'Gamma' or ", "'PearsonIII'.") } - ## fit - if (!(fit %in% c('max-lik', 'ub-pwm', 'pp-pwm'))) { - stop("Parameter 'fit' must be a character string containing one of ", - "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") - } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | @@ -684,6 +718,12 @@ PeriodSPEI <- function(exp, dates_exp, lat, stop("Parameter 'ncores' must be a positive integer.") } } + + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## params if (!is.null(params)) { if (!is.numeric(params)) { @@ -706,26 +746,14 @@ PeriodSPEI <- function(exp, dates_exp, lat, } } - # Complete dates - dates_monthly <- NULL - k = 0 - for (dates in .return2list(dates_exp, dates_expcor)) { - k = k + 1 - ini_date <- as.Date(paste(lubridate::year(min(dates)), 01, 01, sep= '-')) - end_date <- as.Date(paste(lubridate::year(max(dates)), 12, 31, sep='-')) - dates_complete_daily <- as.Date(ini_date:end_date) - - dates_complete_daily_to_monthly <- lubridate::day(dates_complete_daily) - dates_complete_monthly <- dates_complete_daily[which(dates_complete_daily_to_monthly == 1)] - dates_monthly[[k]] <- array(0, dim = length(dates_complete_monthly)) - for (dd in 1:length(dates)) { - ii <- which(dates_complete_monthly == as.Date(paste(lubridate::year(dates[dd]), - lubridate::month(dates[dd]), - 01, sep = '-'))) - dates_monthly[[k]][ii] <- 1 - } + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") } + # Complete dates + dates_monthly <- .return2list(dates_exp, dates_expcor) + # Compute PeriodSPEI k = 0 spei_res <- NULL @@ -735,238 +763,60 @@ PeriodSPEI <- function(exp, dates_exp, lat, k = k + 1 # Evapotranspiration estimation (unless pet is already provided) if (is.null(pet[[k]]) | computed_pet) { - pet[[k]] <- .Evapotranspiration(data = data, - dates_monthly = dates_monthly[[k]], - lat = lat, pet_method = pet_method[k], - time_dim = time_dim, - leadtime_dim = leadtime_dim, - lat_dim = lat_dim, na.rm = na.rm, - ncores = ncores) + pet[[k]] <- PeriodPET(data = data, dates = dates_monthly[[k]], + lat = lat, pet_method = pet_method[k], + time_dim = time_dim, leadtime_dim = leadtime_dim, + lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) computed_pet <- TRUE } # Accumulation diff_p_pet <- data$prlr - pet[[k]] - data_accum <- .Accumulation(data = diff_p_pet, - dates_monthly = dates_monthly[[k]], accum = accum, - time_dim = time_dim, leadtime_dim = leadtime_dim, - ncores = ncores) + + accumulated <- Apply(data = list(diff_p_pet), + target_dims = list(data = c(leadtime_dim, time_dim)), + output_dims = c(leadtime_dim, time_dim), + fun = function(data, accum) { + return(rollapply(data = data, width = accum, FUN = sum)) + }, accum = accum, ncores = ncores)$output1 + # Standardization if (standardization) { - spei <- .Standardization(data = data_accum, params = params, + spei <- .Standardization(data = accumulated, params = params, accum = accum, time_dim = time_dim, leadtime_dim = leadtime_dim, - memb_dim = memb_dim, - ref_period = ref_period, + memb_dim = memb_dim, ref_period = ref_period, cross_validation = cross_validation, handle_infinity = handle_infinity, param_error = param_error, method = method, distribution = distribution, - fit = fit, na.rm = na.rm, ncores = ncores) + na.rm = na.rm, ncores = ncores) + ref_period <- NULL params <- spei$params - pos <- match(names(dim(data[[1]])), names(dim(spei[[1]]))) - spei[[1]] <- aperm(spei[[1]], pos) - - spei_res[[k]] <- spei[[1]] + spei_res <- spei[[1]] } else { - pos <- match(names(dim(data[[1]])), names(dim(data_accum))) - data_accum <- aperm(data_accum, pos) - - spei_res[[k]] <- data_accum + spei_res <- accumulated } + spei_res <- Apply(data = list(spei_res), target_dims = leadtime_dim, + fun = function(x, accum, leadtime_dim) { + res <- c(rep(NA, accum-1), x) + dim(res) <- length(res) + names(dim(res)) <- leadtime_dim + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 + pos <- match(names(dim(data[[1]])), names(dim(spei_res))) + spei_res <- aperm(spei_res, pos) } if (standardization) { - spei_res[[k+1]] <- params - if (is.null(exp_cor)) { - names(spei_res) <- c('exp', 'params') + if (return_params) { + return(list(spei = spei_res, params = params)) } else { - names(spei_res) <- c('exp', 'exp_cor', 'params') + return(spei_res) } } else { - if (is.null(exp_cor)) { - names(spei_res) <- c('exp') - } else { - names(spei_res) <- c('exp', 'exp_cor') - } - } - return(spei_res) -} - -.Evapotranspiration <- function(data, dates_monthly, lat, pet_method = 'hargreaves', - time_dim = 'syear', leadtime_dim = 'time', - lat_dim = 'latitude', na.rm = FALSE, - ncores = NULL) { - - lat_mask <- array(lat, dim = c(1, length(lat))) - names(dim(lat_mask)) <- c('dat', lat_dim) - - # extract mask of NA locations to return to NA the final result - mask_na <- array(1, dim = dim(data[[1]])) - if (pet_method == 'hargreaves') { - varnames <- c('tasmax', 'tasmin') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 - } else if (pet_method == 'hargreaves_modified') { - varnames <- c('tasmax', 'tasmin', 'prlr') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 - mask_na[which(is.na(data$prlr))] <- 0 - } else if (pet_method == 'thornthwaite') { - varnames <- c('tas') - mask_na[which(is.na(data$tas))] <- 0 - } - - # replace NA with 0 - for (dd in 1:length(data)) { - data[[dd]][which(is.na(data[[dd]]))] <- 0 - } - - # prepare data - target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) - pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), - target_dims = c(list(lat_mask = 'dat'), target_dims_data), - fun = .evapotranspiration, - dates_monthly = dates_monthly, pet_method = pet_method, - leadtime_dim = leadtime_dim, time_dim = time_dim, - output_dims = c(leadtime_dim, time_dim), - ncores = ncores)$output1 - # reorder dims in pet_estimated - pos <- match(names(dim(data[[1]])), names(dim(pet))) - pet <- aperm(pet, pos) - - # restore original NAs from mask_na - pet[which(mask_na == 0 )] <- NA - - return(pet) -} - -.evapotranspiration <- function(lat_mask, data2, data3 = NULL, data4 = NULL, - dates_monthly, pet_method = 'hargreaves', - leadtime_dim = 'time', time_dim = 'syear') { - - dims <- dim(data2) - - # create a vector from data but adding 0 to achive complete time series - # of the considered period - # (starting in January of the first year) so that the solar radiation - # estimation is computed in each case for the correct month - - if (!is.null(data2)) { - data_tmp <- as.vector(data2) - data2 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data2[dd] <- data_tmp[count] - count <- count + 1 - } - } - rm(data_tmp) + return(spei_res) } - if (!is.null(data3)) { - data_tmp <- as.vector(data3) - data3 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data3[dd] <- data_tmp[count] - count <- count + 1 - } - } - rm(data_tmp) - } - if (!is.null(data4)) { - data_tmp <- as.vector(data4) - data4 <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data4[dd] <- data_tmp[count] - count <- count + 1 - } - } - rm(data_tmp) - } - if (pet_method == 'hargreaves') { - pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, na.rm = na.rm, verbose = FALSE) - # line to return the vector to the size of the actual original data - pet <- pet[which(dates_monthly == 1)] - pet <- array(pet, dim = dims) - } - - if (pet_method == 'hargreaves_modified') { - pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), - lat = lat_mask, Pre = as.vector(data4), na.rm = na.rm, - verbose = FALSE) - pet <- pet[which(dates_monthly == 1)] - pet <- array(pet, dim = dims) - } - - if (pet_method == 'thornthwaite') { - pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = na.rm, - verbose = FALSE) - # line to return the vector to the size of the actual original data - pet <- pet[which(dates_monthly == 1)] - pet <- array(pet, dim = dims) - } - return(pet) -} - - -.Accumulation <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time', - ncores = NULL) { - - accumulated <- Apply(data = list(data), - target_dims = list(data = c(leadtime_dim, time_dim)), - dates_monthly = dates_monthly, - accum = accum, - output_dims = c(leadtime_dim, time_dim), - leadtime_dim = leadtime_dim, time_dim = time_dim, - fun = .accumulation, - ncores = ncores)$output1 - - pos <- match(names(dim(accumulated)), names(dim(data))) - accumulated <- aperm(accumulated, pos) - - return(accumulated) - -} - -.accumulation <- function(data, dates_monthly, accum = 1, - time_dim = 'syear', leadtime_dim = 'time') { - - # data:[time, syear] - dims <- dim(data) - - data_vector <- array(0, dim = length(dates_monthly)) - count <- 1 - for (dd in 1:length(dates_monthly)) { - if (dates_monthly[dd] == 1) { - data_vector[dd] <- as.vector(data)[count] - count <- count + 1 - } - } - # Accumulation at different timescales - # rollapply {zoo} A generic function for applying a function to rolling - # margins of an array. - data_sum_x <- rollapply(data_vector, accum, sum) - # adds as many NAs as needed at the begining to account for the months that - # cannot be added (depends on accu) and so that the position in the vector - # corresponds to the accumulated of the previous months (instead of the - # accumulated of the next months) - data_sum_x <- c(rep(NA, accum-1), data_sum_x) - # discard the months that don't appear in the original data - data_sum_x <- data_sum_x[which(dates_monthly == 1)] - accum_result <- array(data_sum_x, dim = c(dims)) - # replace by NA when the accumulation corresponds to months that where not - # present in the original data - if (accum > 1) { - accum_result[1:(accum-1), ] <- NA - } - return(accum_result) } .Standardization <- function(data, params = NULL, accum = 1, time_dim = 'syear', @@ -974,19 +824,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period = NULL, cross_validation = FALSE, handle_infinity = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', na.rm = FALSE, ncores = NULL) { - - # Remove NA data due to accumulation - if (accum > 1) { - data <- ClimProjDiags::Subset(x = data, along = leadtime_dim, - indices = accum:dim(data)[leadtime_dim], - drop = FALSE) - if (!is.null(params)) { - params <- ClimProjDiags::Subset(x = params, along = leadtime_dim, - indices = accum:dim(params)[leadtime_dim], - drop = FALSE) - } - } + na.rm = FALSE, ncores = NULL) { nleadtime <- dim(data)[leadtime_dim] ntime <- dim(data)[time_dim] @@ -1021,23 +859,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period = ref_period, handle_infinity = handle_infinity, cross_validation = cross_validation, param_error = param_error, method = method, distribution = distribution, - fit = fit, na.rm = na.rm, + na.rm = na.rm, output_dims = list(spei = c(leadtime_dim, time_dim, memb_dim), params = c(time_dim, leadtime_dim, 'coef')), ncores = ncores) - - # Add again NA values due to accumulation - if (accum > 1) { - for (s in 1:length(spei)) { - spei[[s]] <- Apply(data = list(spei[[s]]), target_dims = leadtime_dim, - fun = function(x, accum = 1, leadtime_dim = 'time') { - res <- c(rep(NA, accum-1), x) - dim(res) <- length(res) - names(dim(res)) <- leadtime_dim - return(res) - }, accum = accum, leadtime_dim = leadtime_dim)$output1 - } - } return(spei) } @@ -1046,7 +871,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period = NULL, handle_infinity = FALSE, cross_validation = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', - fit = 'ub-pwm', na.rm = FALSE) { + na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] # params: [time_dim, leadtime_dim, 'coef'] @@ -1068,7 +893,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, spei_mod <- array(NA, dim(data)) # if the data [time, sdate, memb] has no variability it will raise an error # further down, so we assign a value to the result and skip the step - } else if (any(is.na(data)) && !na.rm) { + } else if (anyNA(data) && !na.rm) { spei_mod <- array(NA, dim(data)) } else if (var(data, na.rm = T) == 0) { spei_mod <- array(param_error, dim(data)) @@ -1090,15 +915,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} - - spei_data <- .std(data = data_subset, coef = coef, - ntime = ntime, nmemb = nmemb, - method = method, distribution = distribution, - fit = fit, na.rm = na.rm, + spei_data <- .std(data = data_subset, coef = coef, ntime = ntime, + nmemb = nmemb, method = method, + distribution = distribution, na.rm = na.rm, ref.start = ref.start, ref.end = ref.end, - params = params_tmp, - handle_infinity = handle_infinity, + params = params_tmp, handle_infinity = handle_infinity, cross_validation = cross_validation) + spei_mod[ff, , ] <- spei_data[[1]] params_ff <- spei_data[[2]] # lengthen dimension coef of params_ff in case it doesn't match the @@ -1115,12 +938,15 @@ PeriodSPEI <- function(exp, dates_exp, lat, } .std <- function(data, coef, ntime, nmemb, method = 'parametric', - distribution = 'log-Logistic', fit = 'ub-pwm', na.rm = FALSE, + distribution = 'log-Logistic', na.rm = FALSE, ref.start = NULL, ref.end = NULL, params = NULL, handle_infinity = FALSE, cross_validation = FALSE) { # data: [time_dim, memb_dim] # params: NULL or [(ntime), coef] + + fit = 'ub-pwm' # hard-coded + if (method == 'non-parametric') { bp <- matrix(0, length(data), 1) for (i in 1:length(data)) { diff --git a/tests/testthat/test-PeriodPET.R b/tests/testthat/test-PeriodPET.R new file mode 100644 index 0000000..0b7cf57 --- /dev/null +++ b/tests/testthat/test-PeriodPET.R @@ -0,0 +1,178 @@ +############################################## + +# dat1 +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + +set.seed(1) +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +set.seed(2) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +set.seed(3) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) + +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) + +lat <- c(40,40.1) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) + +# dat2 +dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) + +set.seed(1) +exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims2) +set.seed(2) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims2) + +dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) + +lat <- c(40,40.1) + +exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) + +############################################## + +# test_that("1. Initial checks CST_PeriodSPEI", { +# # Check 's2dv_cube' +# expect_error( +# CST_PeriodSPEI(exp = NULL), +# "Parameter 'exp' cannot be NULL." +# ) +# expect_error( +# CST_PeriodSPEI(exp = array(10)), +# "Parameter 'exp' must be a list of 's2dv_cube' class." +# ) +# # latitude +# expect_error( +# CST_PeriodSPEI(exp = list(cube1)), +# paste0("Spatial coordinate names of parameter 'exp' do not match any ", +# "of the names accepted by the package.") +# ) +# # Dates +# expect_error( +# CST_PeriodSPEI(exp = list(cube2)), +# paste0("Element 'Dates' is not found in 'attrs' list of 'exp'. ", +# "See 's2dv_cube' object description in README file for more ", +# "information.") +# ) +# expect_error( +# CST_PeriodSPEI(exp = list(cube3), exp_cor = list(cube2)), +# paste0("Element 'Dates' is not found in 'attrs' list of 'exp_cor'. ", +# "See 's2dv_cube' object description in README file for more ", +# "information.") +# ) +# }) + +############################################## + +test_that("1. Initial checks PeriodSPEI", { + # data + expect_error( + PeriodPET(data = NULL), + "Parameter 'data' needs to be a named list with the needed variables." + ) + expect_error( + PeriodPET(data = list(1)), + "Parameter 'data' needs to be a named list with the variable names." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10))), + "Parameter 'data' needs to be a list of arrays with dimension names." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 11)))), + "Parameter 'data' variables need to have the same dimensions." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(ftime = 10)))), + "Parameter 'data' variables need to have the same dimensions." + ) + # lat + expect_error( + PeriodPET(data = exp1, lat = 'lat'), + "Parameter 'lat' must be numeric." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 10))), lat = 1:2), + "Parameter 'data' must have 'lat_dim' dimension." + ) + # data (2) + expect_warning( + PeriodPET(data = exp1, pet_method = '1', dates = dates_exp, lat = lat), + paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") + ) + # time_dim + expect_error( + PeriodPET(data = exp1, time_dim = 1, dates = dates_exp, lat = lat), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + lat_dim = 'lat', pet_method = 'thornthwaite'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + # leadtime_dim + expect_error( + PeriodPET(data = exp1, leadtime_dim = 1, dates = dates_exp, lat = lat), + "Parameter 'leadtime_dim' must be a character string." + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + lat_dim = 'lat', time_dim = 'ftime', pet_method = 'thornthwaite'), + "Parameter 'leadtime_dim' is not found in 'data' dimension." + ) + # lat_dim + expect_error( + PeriodPET(data = exp1, lat_dim = 1, dates = dates_exp, lat = lat) + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2), + "Parameter 'data' must have 'lat_dim' dimension." + ) + # na.rm + expect_error( + PeriodPET(data = exp1, na.rm = 1.5, dates = dates_exp, lat = lat), + "Parameter 'na.rm' must be one logical value." + ) + # ncores + expect_error( + PeriodPET(data = exp1, ncores = 1.5, dates = dates_exp, lat = lat), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## + +test_that("2. Output checks", { + res1 <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + res2 <- PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + pet_method = c('thornthwaite'), + lat_dim = 'lat', time_dim = 'styear', + leadtime_dim = 'ftime') + # dims + expect_equal( + dim(res1), + c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + ) + # values + expect_equal( + res1[1:4], + c(137.77342, 154.55548, 65.72859, 222.20438), + tolerance = 0.0001 + ) + expect_equal( + res2[1:4], + c(77.76124, 118.94212, 66.57568, 185.67074), + tolerance = 0.0001 + ) +}) + +############################################## \ No newline at end of file diff --git a/tests/testthat/test-PeriodSPEI.R b/tests/testthat/test-PeriodSPEI.R index 4e42859..f978111 100644 --- a/tests/testthat/test-PeriodSPEI.R +++ b/tests/testthat/test-PeriodSPEI.R @@ -35,13 +35,13 @@ expcor_tasmin <- array(rnorm(60, 15.70, 4.40), dim = dimscor) set.seed(3) expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor) -dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) -dim(dates_exp) <- c(sday = 1, sweek = 1, syear = 6, time = 3) +dates_exp <- as.POSIXct(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 6, time = 3) -dates_expcor <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), - paste0(2020, "-10-16"))) -dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) +dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), + paste0(2020, "-10-16")), "UTC") +dim(dates_expcor) <- c(syear = 1, time = 3) lat <- c(40,40.1) @@ -65,7 +65,7 @@ set.seed(2) expcor_prlr <- array(rnorm(60, 15.62, 21.38), dim = dimscor2) dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), - paste0(2010:2015, "-10-16"))) + paste0(2010:2015, "-10-16"))) dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) dates_expcor2 <- as.Date(c(paste0(2020, "-08-16"), paste0(2020, "-09-15"), @@ -77,6 +77,20 @@ lat <- c(40,40.1) exp2 <- list('tas' = exp_tas, 'prlr' = exp_prlr) exp_cor2 <- list('tas' = expcor_tas, 'prlr' = expcor_prlr) +# cube4 +cube4_exp <- lapply(exp1, function(x) { + suppressWarnings( + CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), + varName = 'test', Dates = dates_exp) + ) +}) +cube4_expcor <- lapply(exp_cor1, function(x) { + suppressWarnings( + CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), + varName = 'test', Dates = dates_expcor) + ) +}) + ############################################## test_that("1. Initial checks CST_PeriodSPEI", { @@ -156,19 +170,19 @@ test_that("1. Initial checks PeriodSPEI", { "Parameter 'exp_cor' variables need to have the same dimensions." ) expect_error( - PeriodSPEI(exp = exp1, lat = 'lat'), + PeriodSPEI(exp = exp1, lat = 'lat', dates_exp = dates_exp), "Parameter 'lat' must be numeric." ) expect_error( - PeriodSPEI(exp = list(tasmax = array(10, c(time = 10)), - tasmin = array(10, c(time = 10))), lat = 1:2), - "Parameter 'exp' must have 'lat_dim' dimension." + PeriodSPEI(exp = list(prlr = array(10, c(time = 10, syear = 1, ensemble = 1))), + lat = 1:2, dates_exp = dates_exp), + "Parameter 'lat_dim' is not found in 'exp' dimension." ) # exp (2) expect_warning( PeriodSPEI(exp = exp1, pet_method = '1', dates_exp = dates_exp, lat = lat), paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", - "'hargreaves_modified'. It is set to 'hargreaves'.") + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") ) # time_dim expect_error( @@ -211,13 +225,40 @@ test_that("1. Initial checks PeriodSPEI", { expect_error( PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, dates_exp = dates_exp2, dates_expcor = dates_expcor2), - "Parameter 'exp' must have 'lat_dim' dimension." + "Parameter 'time_dim' is not found in 'exp' dimension." ) # accum expect_error( PeriodSPEI(exp = exp1, accum = 10, dates_exp = dates_exp, lat = lat), "Cannot compute accumulation of 10 months because loaded data has only 3 months." ) + # ref_period + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = 1), + paste0("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ) + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = list('a', 1)), + paste0("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ) + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = list(2012, 2011)), + paste0("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ) + expect_warning( + PeriodSPEI(exp = exp1, exp_cor = exp_cor1, dates_exp = dates_exp, + dates_expcor = dates_expcor, lat = lat, ref_period = list(2008, 2021)), + paste0("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ) # standardization expect_error( PeriodSPEI(exp = exp1, standardization = 10, dates_exp = dates_exp, lat = lat), @@ -251,12 +292,6 @@ test_that("1. Initial checks PeriodSPEI", { "of the following distributions: 'log-Logistic', 'Gamma' or ", "'PearsonIII'.") ) - # fit - expect_error( - PeriodSPEI(exp = exp1, fit = 1, dates_exp = dates_exp, lat = lat), - paste0("Parameter 'fit' must be a character string containing one of ", - "the following fit methods: 'max-lik', 'ub-pwm', 'pp-pwm'.") - ) # ncores expect_error( PeriodSPEI(exp = exp1, ncores = 1.5, dates_exp = dates_exp, lat = lat), @@ -266,37 +301,67 @@ test_that("1. Initial checks PeriodSPEI", { ############################################## +test_that("2. Output checks: CST_PeriodSPEI", { + res1 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = NULL) + res2 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = cube4_expcor) + res3 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = cube4_expcor, standardization = F) + res4 <- CST_PeriodSPEI(exp = cube4_exp, exp_cor = NULL, return_params = T) + expect_equal( + names(res1), + c("data", "dims", "coords", "attrs") + ) + expect_equal( + res2$attrs$Variable$varName, + "SPEI" + ) + expect_equal( + res3$attrs$Variable$varName, + "Precipitation minus accumulated PET" + ) + expect_equal( + names(res3), + c("data", "dims", "coords", "attrs") + ) + expect_equal( + names(res4), + c("spei", "params") + ) +}) + +############################################## + test_that("2. Output checks", { res1 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, - dates_exp = dates_exp, dates_expcor = dates_expcor) + dates_exp = dates_exp, dates_expcor = dates_expcor, + return_params = TRUE) res2 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, - standardization = FALSE) + standardization = FALSE) # No info about accumulation res3 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp) + dates_exp = dates_exp, return_params = TRUE) # output dims expect_equal( names(res1), - c('exp', 'exp_cor', 'params') + c('spei', 'params') ) expect_equal( - names(res2), - c('exp', 'exp_cor') + dim(res2), + c(syear = 1, time = 3, latitude = 2, longitude = 1, ensemble = 15) ) expect_equal( names(res3), - c('exp', 'params') + c('spei', 'params') ) expect_equal( dim(res1[[1]]), - dims + dimscor ) expect_equal( - dim(res1[[3]])[which(!names(dim(res1[[3]])) %in% c('coef', 'syear'))], + dim(res1[[2]])[which(!names(dim(res1[[2]])) %in% c('coef', 'syear'))], dims[which(!names(dims) %in% c('syear', 'ensemble'))] ) expect_equal( - dim(res2[[2]]), + dim(res2), dimscor ) expect_equal( @@ -310,14 +375,21 @@ test_that("2. Output checks", { res11 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, accum = 2, dates_exp = dates_exp, na.rm = TRUE) expect_equal( - res11$exp[1,2,1,1,][1:4], - c(-0.5553128, 0.4689562, -0.4682003, -0.9956847), + res11[1,3,1,1,][1:4], + c(-0.4292409, -0.1375149, -0.5564081, -0.4273380), tolerance = 0.0001 ) # ref_period + res_ref <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, accum = 2, + dates_exp = dates_exp, dates_expcor = dates_expcor, + na.rm = TRUE, ref_period = list(2011, 2013)) + expect_equal( + !identical(res1[[1]], res_ref), + TRUE + ) # params res5 <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - params = params1) + params = params1, return_params = TRUE) expect_error( PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, params = array(abs(rnorm(100)), dim = dimscor)), @@ -333,10 +405,10 @@ test_that("2. Output checks", { dates_exp = dates_exp, standardization = FALSE) expect_equal( names(res4), - c('exp') + NULL ) expect_equal( - dim(res4$exp), + dim(res4), c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) ) # cross_validation @@ -348,12 +420,12 @@ test_that("2. Output checks", { "since 'exp_cor' is provided.") ) res_crossval_T <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - cross_validation = TRUE) + cross_validation = TRUE, return_params = TRUE) res_crossval_F <- PeriodSPEI(exp = exp1, lat = lat, dates_exp = dates_exp, - cross_validation = FALSE) + cross_validation = FALSE, return_params = TRUE) # cross_validation = TRUE expect_equal( - dim(res_crossval_T$exp), + dim(res_crossval_T$spei), dims ) expect_equal( @@ -373,6 +445,14 @@ test_that("2. Output checks", { res6 <- PeriodSPEI(exp = exp1, exp_cor = exp_cor1, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor, pet_method = c('hargreaves_modified', 'hargreaves')) + expect_equal( + identical(res5, res6), + FALSE + ) + expect_equal( + dim(res5), + dim(res6) + ) # time_dim, leadtime_dim, memb_dim, lat_dim res7 <- PeriodSPEI(exp = exp2, exp_cor = exp_cor2, lat = lat, @@ -389,15 +469,6 @@ test_that("2. Output checks", { dates_exp = dates_exp, distribution = 'PearsonIII') # NA res10 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, dates_exp = dates_exp, distribution = 'Gamma') # NA - # fit - - res12 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'ub-pwm') # ok - res13 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'max-lik') # ok - res14 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, - dates_exp = dates_exp, fit = 'pp-pwm') # NA - all.equal(res12, res13) - all.equal(res12, res14) # res14 doesn't work for this data # param_error - # handle_infinity - OK res9 <- PeriodSPEI(exp = exp1, exp_cor = NULL, lat = lat, -- GitLab From 70ec75370d642254964bcd0f23be585ad0f61885 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Fri, 28 Jul 2023 14:45:54 +0200 Subject: [PATCH 41/87] Hide function PeriodFun from CSIndicators package; the function PeriodFun will remain hidden --- DESCRIPTION | 2 +- NAMESPACE | 2 -- R/PeriodFun.R | 24 +++++++------- man/CST_PeriodFun.Rd | 75 -------------------------------------------- man/PeriodFun.Rd | 70 ----------------------------------------- 5 files changed, 13 insertions(+), 160 deletions(-) delete mode 100644 man/CST_PeriodFun.Rd delete mode 100644 man/PeriodFun.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 4151ba4..4e20983 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,4 +40,4 @@ URL: https://earth.bsc.es/gitlab/es/csindicators/ BugReports: https://earth.bsc.es/gitlab/es/csindicators/-/issues Encoding: UTF-8 RoxygenNote: 7.2.0 -Config/testthat/edition: 3 \ No newline at end of file +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index f02cee5..d92da37 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,6 @@ export(CST_AbsToProbs) export(CST_AccumulationExceedingThreshold) export(CST_MergeRefToExp) export(CST_PeriodAccumulation) -export(CST_PeriodFun) export(CST_PeriodMax) export(CST_PeriodMean) export(CST_PeriodMin) @@ -20,7 +19,6 @@ export(CST_WindCapacityFactor) export(CST_WindPowerDensity) export(MergeRefToExp) export(PeriodAccumulation) -export(PeriodFun) export(PeriodMax) export(PeriodMean) export(PeriodMin) diff --git a/R/PeriodFun.R b/R/PeriodFun.R index 9c33ba2..77484a1 100644 --- a/R/PeriodFun.R +++ b/R/PeriodFun.R @@ -46,15 +46,15 @@ #'exp$attrs$Dates <- Dates #'class(exp) <- 's2dv_cube' #' -#'SA <- CST_PeriodFun(exp, fun = mean, start = list(01, 12), -#' end = list(01, 01)) +#'SA <- CSIndicators:::.CST_PeriodFun(exp, fun = mean, start = list(01, 12), +#' end = list(01, 01)) #' #'@import multiApply #'@importFrom ClimProjDiags Subset -#'@export -CST_PeriodFun <- function(data, fun, start = NULL, end = NULL, - time_dim = 'ftime', na.rm = FALSE, - ncores = NULL) { +#'@noRd +.CST_PeriodFun <- function(data, fun, start = NULL, end = NULL, + time_dim = 'ftime', na.rm = FALSE, + ncores = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -70,9 +70,9 @@ CST_PeriodFun <- function(data, fun, start = NULL, end = NULL, } Dates <- data$attrs$Dates - total <- PeriodFun(data = data$data, fun = fun, dates = Dates, start = start, - end = end, time_dim = time_dim, na.rm = na.rm, - ncores = ncores) + total <- CSIndicators:::.PeriodFun(data = data$data, fun = fun, dates = Dates, start = start, + end = end, time_dim = time_dim, na.rm = na.rm, + ncores = ncores) data$data <- total data$dims <- dim(total) @@ -144,12 +144,12 @@ CST_PeriodFun <- function(data, fun, start = NULL, end = NULL, #' seq(as.Date("2003-11-01", "%Y-%m-%d", tz = "UTC"), #' as.Date("2004-01-01", "%Y-%m-%d", tz = "UTC"), by = "month")) #'dim(Dates) <- c(sdate = 4, ftime = 3) -#'SA <- PeriodFun(data, fun = mean, dates = Dates, start = list(01, 12), +#'SA <- CSIndicators:::.PeriodFun(data, fun = mean, dates = Dates, start = list(01, 12), #' end = list(01, 01)) #' #'@import multiApply -#'@export -PeriodFun <- function(data, fun, dates = NULL, start = NULL, end = NULL, +#'@noRd +.PeriodFun <- function(data, fun, dates = NULL, start = NULL, end = NULL, time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { diff --git a/man/CST_PeriodFun.Rd b/man/CST_PeriodFun.Rd deleted file mode 100644 index 1ffe32b..0000000 --- a/man/CST_PeriodFun.Rd +++ /dev/null @@ -1,75 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/PeriodFun.R -\name{CST_PeriodFun} -\alias{CST_PeriodFun} -\title{Period Function on 's2dv_cube' objects} -\usage{ -CST_PeriodFun( - data, - fun, - start = NULL, - end = NULL, - time_dim = "ftime", - na.rm = FALSE, - ncores = NULL -) -} -\arguments{ -\item{data}{An 's2dv_cube' object as provided function \code{CST_Load} in -package CSTools.} - -\item{fun}{An atomic function to compute a calculation over a period.} - -\item{start}{An optional parameter to defined the initial date of the period -to select from the data by providing a list of two elements: the initial -date of the period and the initial month of the period. By default it is set -to NULL and the indicator is computed using all the data provided in -\code{data}.} - -\item{end}{An optional parameter to defined the final date of the period to -select from the data by providing a list of two elements: the final day of -the period and the final month of the period. By default it is set to NULL -and the indicator is computed using all the data provided in \code{data}.} - -\item{time_dim}{A character string indicating the name of the dimension to -compute the indicator. By default, it is set to 'ftime'. More than one -dimension name matching the dimensions provided in the object -\code{data$data} can be specified.} - -\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or -not (FALSE).} - -\item{ncores}{An integer indicating the number of cores to use in parallel -computation.} -} -\value{ -An 's2dv_cube' object containing the indicator in the element -\code{data} with dimensions of the input parameter 'data' except the -dimension where the mean has been computed (specified with 'time_dim'). A new -element called 'time_bounds' will be added into the 'attrs' element in the -'s2dv_cube' object. It consists of a list containing two elements, the start -and end dates of the aggregated period with the same dimensions of 'Dates' -element. -} -\description{ -Period Fun computes a calculation of a given variable in a period. -} -\examples{ -exp <- NULL -exp$data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) -Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) -dim(Dates) <- c(sdate = 4, ftime = 3) -exp$attrs$Dates <- Dates -class(exp) <- 's2dv_cube' - -SA <- CST_PeriodFun(exp, fun = mean, start = list(01, 12), - end = list(01, 01)) - -} diff --git a/man/PeriodFun.Rd b/man/PeriodFun.Rd deleted file mode 100644 index 9378273..0000000 --- a/man/PeriodFun.Rd +++ /dev/null @@ -1,70 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/PeriodFun.R -\name{PeriodFun} -\alias{PeriodFun} -\title{Period Function on multidimensional array objects} -\usage{ -PeriodFun( - data, - fun, - dates = NULL, - start = NULL, - end = NULL, - time_dim = "ftime", - na.rm = FALSE, - ncores = NULL -) -} -\arguments{ -\item{data}{A multidimensional array with named dimensions.} - -\item{fun}{An atomic function to compute a calculation over a period.} - -\item{dates}{A multidimensional array of dates with named dimensions matching -the temporal dimensions on parameter 'data'. By default it is NULL, to -select aperiod this parameter must be provided.} - -\item{start}{An optional parameter to defined the initial date of the period -to select from the data by providing a list of two elements: the initial -date of the period and the initial month of the period. By default it is set -to NULL and the indicator is computed using all the data provided in -\code{data}.} - -\item{end}{An optional parameter to defined the final date of the period to -select from the data by providing a list of two elements: the final day of -the period and the final month of the period. By default it is set to NULL -and the indicator is computed using all the data provided in \code{data}.} - -\item{time_dim}{A character string indicating the name of the dimension to -compute the indicator. By default, it is set to 'ftime'. More than one -dimension name matching the dimensions provided in the object -\code{data$data} can be specified.} - -\item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or -not (FALSE).} - -\item{ncores}{An integer indicating the number of cores to use in parallel -computation.} -} -\value{ -A multidimensional array with named dimensions containing the -indicator in the element \code{data}. -} -\description{ -Period Fun computes a calculation of a given variable in a period. -} -\examples{ -data <- array(rnorm(45), dim = c(member = 7, sdate = 4, ftime = 3)) -Dates <- c(seq(as.Date("2000-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2001-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2001-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2002-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2002-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2003-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month"), - seq(as.Date("2003-11-01", "\%Y-\%m-\%d", tz = "UTC"), - as.Date("2004-01-01", "\%Y-\%m-\%d", tz = "UTC"), by = "month")) -dim(Dates) <- c(sdate = 4, ftime = 3) -SA <- PeriodFun(data, fun = mean, dates = Dates, start = list(01, 12), - end = list(01, 01)) - -} -- GitLab From 98e1c9b4cfd36ea4a7d84ae7e004f86f1ca24f8b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 31 Jul 2023 12:56:52 +0200 Subject: [PATCH 42/87] Add rolling sum in PeriodAccumulation throughout a new parameter rollingwidth --- NAMESPACE | 1 + R/PeriodAccumulation.R | 168 ++++++++++++++++------- man/CST_PeriodAccumulation.Rd | 36 +++-- man/CST_PeriodSPEI.Rd | 84 ++++++++---- man/PeriodAccumulation.Rd | 15 ++ man/PeriodPET.Rd | 82 +++++++++++ man/PeriodSPEI.Rd | 88 ++++++++---- tests/testthat/test-PeriodAccumulation.R | 60 +++++--- 8 files changed, 400 insertions(+), 134 deletions(-) create mode 100644 man/PeriodPET.Rd diff --git a/NAMESPACE b/NAMESPACE index 7cfbe75..c67f13e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(CST_WindPowerDensity) export(MergeRefToExp) export(PeriodAccumulation) export(PeriodMean) +export(PeriodPET) export(PeriodSPEI) export(QThreshold) export(SelectPeriodOnData) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 3b0d33d..b649a1d 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -10,12 +10,19 @@ #' August 21st to October 21st} #'} #' +#'There are two possible ways of performing the accumulation. The default one +#'is by accumulating a variable over a dimension specified with 'time_dim'. To +#'chose a specific time period, start and end must be used. The other method +#'is by using rollingwidth parameter. When this parameter is a positive integer, +#'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +#'is applied towards 'time_dim'. +#' #'@param data An 's2dv_cube' object as provided function \code{CST_Load} in #' package CSTools. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial -#' date of the period and the initial month of the period. By default it is set -#' to NULL and the indicator is computed using all the data provided in +#' date of the period and the initial m onth of the period. By default it is +#' set to NULL and the indicator is computed using all the data provided in #' \code{data}. #'@param end An optional parameter to defined the final date of the period to #' select from the data by providing a list of two elements: the final day of @@ -25,19 +32,27 @@ #' compute the indicator. By default, it is set to 'ftime'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. +#'@param rollingwidth An optional parameter to indicate the number of time +#' steps the rolling sum is applied to. If it is negative, the rolling sum is +#' applied backwards 'time_dim', if it is positive, it will be towards it. When +#' this parameter is NULL, the sum is applied over all 'time_dim', in a +#' specified period. It is NULL by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' #'@return An 's2dv_cube' object containing the indicator in the element -#'\code{data} with dimensions of the input parameter 'data' except the dimension -#'where the accumulation has been computed (specified with 'time_dim'). The -#''Dates' array is updated to the dates corresponding to the beginning of the -#'aggregated time period. A new element called 'time_bounds' will be added into -#'the 'attrs' element in the 's2dv_cube' object. It consists of a list -#'containing two elements, the start and end dates of the aggregated period with -#'the same dimensions of 'Dates' element. +#'\code{data}. If 'rollingwithd' is not used, it will have the dimensions of +#'the input parameter 'data' except the dimension where the accumulation has +#'been computed (specified with 'time_dim'). The 'Dates' array is updated to the +#'dates corresponding to the beginning of the aggregated time period. A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. If 'rollingwithd' is used, it will contain the same dimensions of +#'parameter 'data' and the other elements of the 's2dv_cube' will not be +#'modified. #' #'@examples #'exp <- NULL @@ -66,8 +81,8 @@ #'@importFrom ClimProjDiags Subset #'@export CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, - time_dim = 'ftime', na.rm = FALSE, - ncores = NULL) { + time_dim = 'ftime', rollingwidth = NULL, + na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -83,31 +98,38 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, } Dates <- data$attrs$Dates - total <- PeriodAccumulation(data$data, dates = Dates, start, end, - time_dim = time_dim, na.rm = na.rm, ncores = ncores) - data$data <- total - data$dims <- dim(total) + if (!is.null(rollingwidth)) { + data$data <- PeriodAccumulation(data$data, time_dim = time_dim, + rollingwidth = rollingwidth, na.rm = na.rm, + ncores = ncores) + } else { + total <- PeriodAccumulation(data$data, dates = Dates, start, end, + time_dim = time_dim, na.rm = na.rm, ncores = ncores) + data$data <- total + data$dims <- dim(total) - if (!is.null(Dates)) { - if (!is.null(start) && !is.null(end)) { - Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, - time_dim = time_dim, ncores = ncores) - } - if (is.null(dim(Dates))) { - warning("Element 'Dates' has NULL dimensions. They will not be ", - "subset and 'time_bounds' will be missed.") - data$attrs$Dates <- Dates - } else { - # Create time_bounds - time_bounds <- NULL - time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') - time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + if (!is.null(Dates)) { + if (!is.null(start) && !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + if (is.null(dim(Dates))) { + warning("Element 'Dates' has NULL dimensions. They will not be ", + "subset and 'time_bounds' will be missed.") + data$attrs$Dates <- Dates + } else { + # Create time_bounds + time_bounds <- NULL + time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') - # Add Dates in attrs - data$attrs$Dates <- time_bounds$start - data$attrs$time_bounds <- time_bounds + # Add Dates in attrs + data$attrs$Dates <- time_bounds$start + data$attrs$time_bounds <- time_bounds + } } } + return(data) } @@ -122,6 +144,13 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' \item\code{HarR}{Harvest Total Precipitation: The total precipitation from #' August 21st to October 21st} #'} +#' +#'There are two possible ways of performing the accumulation. The default one +#'is by accumulating a variable over a dimension specified with 'time_dim'. To +#'chose a specific time period, start and end must be used. The other method +#'is by using rollingwidth parameter. When this parameter is a positive integer, +#'the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +#'is applied towards 'time_dim'. #' #'@param data A multidimensional array with named dimensions. #'@param dates A multidimensional array of dates with named dimensions matching @@ -140,6 +169,11 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' compute the indicator. By default, it is set to 'time'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. +#'@param rollingwidth An optional parameter to indicate the number of time +#' steps the rolling sum is applied to. If it is negative, the rolling sum is +#' applied backwards 'time_dim', if it is positive, it will be towards it. When +#' this parameter is NULL, the sum is applied over all 'time_dim', in a +#' specified period. It is NULL by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel @@ -169,8 +203,8 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #'@import multiApply #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', na.rm = FALSE, - ncores = NULL) { + time_dim = 'time', rollingwidth = NULL, + na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } @@ -181,27 +215,59 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, dim(data) <- length(data) names(dim(data)) <- time_dim } + dimnames <- names(dim(data)) - if (!is.null(start) && !is.null(end)) { - if (is.null(dates)) { - warning("Parameter 'dates' is NULL and the average of the ", - "full data provided in 'data' is computed.") - } else { - if (!any(c(is.list(start), is.list(end)))) { - stop("Parameter 'start' and 'end' must be lists indicating the ", - "day and the month of the period start and end.") - } - if (!is.null(dim(dates))) { - data <- SelectPeriodOnData(data, dates, start, end, - time_dim = time_dim, ncores = ncores) + # period accumulation + if (is.null(rollingwidth)) { + if (!is.null(start) && !is.null(end)) { + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the average of the ", + "full data provided in 'data' is computed.") } else { - warning("Parameter 'dates' must have named dimensions if 'start' and ", - "'end' are not NULL. All data will be used.") + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (!is.null(dim(dates))) { + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } else { + warning("Parameter 'dates' must have named dimensions if 'start' and ", + "'end' are not NULL. All data will be used.") + } } } + total <- Apply(list(data), target_dims = time_dim, fun = sum, + na.rm = na.rm, ncores = ncores)$output1 + } else { + # rolling accumulation + if (!is.numeric(rollingwidth)) { + stop("Parameter 'rollingwidth' must be a numeric value.") + } + if (abs(rollingwidth) > dim(data)[time_dim]) { + stop(paste0("Cannot compute accumulation of ", rollingwidth, " months because ", + "loaded data has only ", dim(data)[time_dim], " months.")) + } + backroll <- FALSE + if (rollingwidth < 0) { + rollingwidth <- abs(rollingwidth) + backroll <- TRUE + } + total <- Apply(data = list(data), target_dims = time_dim, + output_dims = time_dim, + fun = function(x, accum, backroll, na.rm) { + res <- rollapply(data = x, width = accum, FUN = sum, + na.rm = na.rm) + if (backroll) { + return(c(rep(NA, accum-1), res)) + } else { + return(c(res, rep(NA, accum-1))) + } + }, accum = rollingwidth, backroll = backroll, na.rm = na.rm, + ncores = ncores)$output1 + pos <- match(dimnames, names(dim(total))) + total <- aperm(total, pos) } - total <- Apply(list(data), target_dims = time_dim, fun = sum, - na.rm = na.rm, ncores = ncores)$output1 + return(total) } - diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 77f4a38..4326ea2 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -9,6 +9,7 @@ CST_PeriodAccumulation( start = NULL, end = NULL, time_dim = "ftime", + rollingwidth = NULL, na.rm = FALSE, ncores = NULL ) @@ -19,8 +20,8 @@ package CSTools.} \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial -date of the period and the initial month of the period. By default it is set -to NULL and the indicator is computed using all the data provided in +date of the period and the initial m onth of the period. By default it is +set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{end}{An optional parameter to defined the final date of the period to @@ -33,6 +34,12 @@ compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} +\item{rollingwidth}{An optional parameter to indicate the number of time +steps the rolling sum is applied to. If it is negative, the rolling sum is +applied backwards 'time_dim', if it is positive, it will be towards it. When +this parameter is NULL, the sum is applied over all 'time_dim', in a +specified period. It is NULL by default.} + \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -41,13 +48,16 @@ computation.} } \value{ An 's2dv_cube' object containing the indicator in the element -\code{data} with dimensions of the input parameter 'data' except the dimension -where the accumulation has been computed (specified with 'time_dim'). The -'Dates' array is updated to the dates corresponding to the beginning of the -aggregated time period. A new element called 'time_bounds' will be added into -the 'attrs' element in the 's2dv_cube' object. It consists of a list -containing two elements, the start and end dates of the aggregated period with -the same dimensions of 'Dates' element. +\code{data}. If 'rollingwithd' is not used, it will have the dimensions of +the input parameter 'data' except the dimension where the accumulation has +been computed (specified with 'time_dim'). The 'Dates' array is updated to the +dates corresponding to the beginning of the aggregated time period. A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. If 'rollingwithd' is used, it will contain the same dimensions of +parameter 'data' and the other elements of the 's2dv_cube' will not be +modified. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a @@ -60,6 +70,14 @@ by using this function: August 21st to October 21st} } } +\details{ +There are two possible ways of performing the accumulation. The default one +is by accumulating a variable over a dimension specified with 'time_dim'. To +chose a specific time period, start and end must be used. The other method +is by using rollingwidth parameter. When this parameter is a positive integer, +the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +is applied towards 'time_dim'. +} \examples{ exp <- NULL exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index 8e8ab19..eaa139e 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -21,9 +21,9 @@ CST_PeriodSPEI( pet_method = "hargreaves", method = "parametric", distribution = "log-Logistic", - fit = "ub-pwm", param_error = -9999, handle_infinity = FALSE, + return_params = FALSE, na.rm = FALSE, ncores = NULL ) @@ -57,7 +57,9 @@ computed for individual members.} dimension. By default it is set by 'latitude'.} \item{accum}{An integer value indicating the number of months for the -accumulation for each variable.} +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default @@ -102,12 +104,9 @@ default.} \item{distribution}{A character string indicating the name of the distribution function to be used for computing the SPEI. The accepted names are: -'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -default.} - -\item{fit}{A character string indicating the name of the method used for -computing the distribution function parameters The accepteed names are: -'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} \item{param_error}{A numeric value with the error accepted.} @@ -115,37 +114,66 @@ computing the distribution function parameters The accepteed names are: or not (FALSE).} \item{na.rm}{A logical value indicating whether NA values should be removed -from data. It is FALSE by default.} +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } \value{ -A list with elements: -\itemize{ - \item{'exp', if 'standarization' is TRUE an 's2dv_cube' conaining the 'SPEI' - in element data from 'exp' array with the same dimensions as 'exp'. - If it is FALSE, it is an array with the accumulated values of PET - minus 'prlr' data.} - \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not - NULL. It is an 's2dv_cube' with the SPEI data from 'exp_cor' in - element 'data'. If 'standarization' is FALSE, only the accumulated - values of PET minus 'prlr' is returned.} - \item{'params', returned if 'standarization' is TRUE, it contains the - parameters used for the standarization of 'exp' that are used for - computing the 'SPEI' for 'exp_cor'.} -} +An 's2dv_cube' object containing the SPEI multidimensional array in +element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +will be provided. The parameters of the standardization will only be returned +if 'return_params' is TRUE. The SPEI will only be computed if +'standardization' is TRUE. If 'standardization' is FALSE, only the climatic +water balance (precipitation minus evapotranspiration) will be returned. The +resultant arrays will have the same dimensions as the initial input data. The +other elements in the 's2dv_cube' will be updated with the combined +information of the input data arrays. } \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for determining the onset, duration and magnitude of drought conditions with respect to normal conditions in a variety of natural and managed systems such -as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -using monthly (or weekly) precipitation as the input data. The SPEI uses the -monthly (or weekly) difference between precipitation and pet. This represents -a simple climatic water balance which is calculated at different time scales -to obtain the SPEI. +as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +is to compare the highest possible evapotranspiration with the current water +availability. The SPEI uses the monthly (or weekly) difference between +precipitation and potential evapotranspiration. This represents a simple +climatic water balance which is calculated at different time scales to obtain +the SPEI. This function is build to work be compatible with other tools in +that work with 's2dv_cube' object class. The input data must be this object +class. If you don't work with 's2dv_cube', see PeriodSPEI. +} +\details{ +Next, some specifications for the calculation of this indicator will be +discussed. On the one hand, the model to be used to calculate potential +evapotranspiration is specified with the pet_method parameter (hargreaves, +hargraves modified or thornwhite). On the other hand, to choose the time scale +in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +accum parameter, where you must indicate the number of time steps you want to +accumulate throughout leadtime_dim. Since the accumulation is done for the +elapsed time steps, there will be no complete accumulations until reaching the +time instant equal to the value of the parameter. For this reason, in the +result, we will find that for the dimension where the accumulation has been +carried out, the values of the array will be NA since they do not include +complete accumulations. Also, there is a parameter to specify if the +standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +probability distribution to transform the original values to standardized +units that are comparable in space and time and at different SPEI time scales. +The na.rm parameter is a logical parameter used to decide whether to remove +the NA values from the data before doing the calculation. It must be taken +into account that if na.rm == FALSE and there is some NA value in the specific +coordinates which the SPEI is computed, standardization cannot be carried out +for those coordinates and therefore, the result will be filled with NA for the +specific coordinates. However, when na.rm == TRUE, if the amount of data for +those specific coordinates is smaller than 4, it will not be possible to carry +out because we will not have enough data and the result will be also filled +with NAs for that coordinates. } \examples{ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index 614b65c..a3dbf46 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -10,6 +10,7 @@ PeriodAccumulation( start = NULL, end = NULL, time_dim = "time", + rollingwidth = NULL, na.rm = FALSE, ncores = NULL ) @@ -37,6 +38,12 @@ compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} +\item{rollingwidth}{An optional parameter to indicate the number of time +steps the rolling sum is applied to. If it is negative, the rolling sum is +applied backwards 'time_dim', if it is positive, it will be towards it. When +this parameter is NULL, the sum is applied over all 'time_dim', in a +specified period. It is NULL by default.} + \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -58,6 +65,14 @@ by using this function: August 21st to October 21st} } } +\details{ +There are two possible ways of performing the accumulation. The default one +is by accumulating a variable over a dimension specified with 'time_dim'. To +chose a specific time period, start and end must be used. The other method +is by using rollingwidth parameter. When this parameter is a positive integer, +the rolling sum is applied over 'time_dim'. If it is negative, the rolling sum +is applied towards 'time_dim'. +} \examples{ exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, ftime = 9, lat = 2, lon = 2)) diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd new file mode 100644 index 0000000..719d2fa --- /dev/null +++ b/man/PeriodPET.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodPET.R +\name{PeriodPET} +\alias{PeriodPET} +\title{Compute the Potential Evapotranspiration} +\usage{ +PeriodPET( + data, + dates, + lat, + pet_method = "hargreaves", + time_dim = "syear", + leadtime_dim = "time", + lat_dim = "latitude", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tas', required for method 'thornthwaite'. +Variable 'prlr' is always needed. The units for temperature variables +('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for +precipitation ('prlr') need to be in mm/month.} + +\item{dates}{An array of temporal dimensions containing the Dates of +'exp'. It must be of class 'Date' or 'POSIXct'.} + +\item{lat}{A numeric vector containing the latitude values of 'exp'.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tasmin and tasmax; and 'thornthwaite', that requires variable +'tas'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Compute the Potential evapotranspiration (PET) that is the amount of +evaporation and transpiration that would occur if a sufficient water source +were available. Reference evapotranspiration (ETo) is the amount of +evaporation and transpiration from a reference vegetation of grass. They are +usually considered equivalent. This set of functions calculate PET or ETo +according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +} +\examples{ +dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) + +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +end_year <- 2012 +dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), + paste0(2010:end_year, "-09-15"), + paste0(2010:end_year, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 3, time = 3) + +lat <- c(40) + +exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) + +res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + +} diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index 86aaa02..fc6c527 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -24,15 +24,15 @@ PeriodSPEI( pet_method = "hargreaves", method = "parametric", distribution = "log-Logistic", - fit = "ub-pwm", param_error = -9999, handle_infinity = FALSE, + return_params = FALSE, na.rm = FALSE, ncores = NULL ) } \arguments{ -\item{exp}{A named list with the needed \code{s2dv_cube} objects containing +\item{exp}{A named list with multidimensional array objects containing the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted @@ -47,7 +47,7 @@ precipitation ('prlr') need to be in mm/month.} \item{lat}{A numeric vector containing the latitude values of 'exp'.} -\item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each +\item{exp_cor}{A named list with multidimensional array objects for each variable in which the quantile PeriodSPEI should be applied. If it is not specified, the PeriodSPEI is calculated from object 'exp'.} @@ -67,8 +67,10 @@ computed for individual members.} \item{lat_dim}{A character string indicating the name of the latitudinal dimension. By default it is set by 'latitude'.} -\item{accum}{An integer value indicating the number of months for the -accumulation for each variable.} +\item{accum}{accum An integer value indicating the number of months for the +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} \item{ref_period}{A list with two numeric values with the starting and end points of the reference period used for computing the index. The default @@ -111,49 +113,75 @@ If can be: 'parametric' or 'non-parametric'.} \item{distribution}{A character string indicating the name of the distribution function to be used for computing the SPEI. The accepted names are: -'log-Logistic', 'Gamma' or 'PearsonIII'. It is set to 'log-Logistic' by -default.} - -\item{fit}{A character string indicating the name of the method used for -computing the distribution function parameters The accepteed names are: -'ub-pwm', 'pp-pwm' and 'max-lik'. It is set to 'ub-pwm' by default.} +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} \item{param_error}{A numeric value with the error accepted.} \item{handle_infinity}{A logical value wether to return Infinite values (TRUE) or not (FALSE).} +\item{return_params}{A logical value indicating wether to return parameters +array (TRUE) or not (FALSE). It is FALSE by default.} + \item{na.rm}{A logical value indicating whether NA values should be removed -from data. It is FALSE by default.} +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} \item{ncores}{An integer value indicating the number of cores to use in parallel computation.} } \value{ -A list with elements: -\itemize{ - \item{'exp', if 'standarization' is TRUE an array conaining SPEI data from - 'exp' array with the same dimensions as 'exp'. If it is FALSE, it - is an array with the accumulated values of PET minus 'prlr' data.} - \item{'exp_cor', if 'standarization' is TRUE and if 'exp_cor' is not - NULL. It is an array with the SPEI data from 'exp_cor'. If - 'standarization' is FALSE, only the accumulated values of PET minus - 'prlr' is returned.} - \item{'params', returned if 'standarization' is TRUE, it contains the - parameters used for the standarization of 'exp' that are used for - computing the 'SPEI' for 'exp_cor'.} -} +An 's2dv_cube' object containing the SPEI multidimensional array in +element \code{data}. If 'exp_cor' is provided, only results from 'exp_cor' +will be provided. The parameters of the standardization will only be returned +if 'return_params' is TRUE. The SPEI will only be computed if +'standardization' is TRUE. If 'standardization' is FALSE, only the climatic +water balance (precipitation minus evapotranspiration) will be returned. The +resultant arrays will have the same dimensions as the initial input data. } \description{ Calculation of the Standardised Precipitation-Evapotranspiration Index (SPEI) that is a multiscalar drought index based on climatic data. It can be used for determining the onset, duration and magnitude of drought conditions with respect to normal conditions in a variety of natural and managed systems such -as crops, ecosystems, rivers, water resources, etc. The SPI is calculated -using monthly (or weekly) precipitation as the input data. The SPEI uses the -monthly (or weekly) difference between precipitation and pet. This represents -a simple climatic water balance which is calculated at different time scales -to obtain the SPEI. +as crops, ecosystems, rivers, water resources, etc. The idea behind the SPEI +is to compare the highest possible evapotranspiration with the current water +availability. The SPEI uses the monthly (or weekly) difference between +precipitation and potential evapotranspiration. This represents a simple +climatic water balance which is calculated at different time scales to obtain +the SPEI. +} +\details{ +Next, some specifications for the calculation of this indicator will be +discussed. On the one hand, the model to be used to calculate potential +evapotranspiration is specified with the pet_method parameter (hargreaves, +hargraves modified or thornwhite). On the other hand, to choose the time scale +in which you want to accumulate the SPEI (SPEI3, SPEI6...) is done using the +accum parameter, where you must indicate the number of time steps you want to +accumulate throughout leadtime_dim. Since the accumulation is done for the +elapsed time steps, there will be no complete accumulations until reaching the +time instant equal to the value of the parameter. For this reason, in the +result, we will find that for the dimension where the accumulation has been +carried out, the values of the array will be NA since they do not include +complete accumulations. Also, there is a parameter to specify if the +standardization that can be TRUE or FALSE. If it is TRUE, the data is fit to a +probability distribution to transform the original values to standardized +units that are comparable in space and time and at different SPEI time scales. +The na.rm parameter is a logical parameter used to decide whether to remove +the NA values from the data before doing the calculation. It must be taken +into account that if na.rm == FALSE and there is some NA value in the specific +coordinates which the SPEI is computed, standardization cannot be carried out +for those coordinates and therefore, the result will be filled with NA for the +specific coordinates. However, when na.rm == TRUE, if the amount of data for +those specific coordinates is smaller than 4, it will not be possible to carry +out because we will not have enough data and the result will be also filled +with NAs for that coordinates. } \examples{ dims <- c(var = 1, sday = 1, sweek = 1, syear = 6, time = 3, diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 6898a93..64bf3d2 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -1,5 +1,22 @@ library(CSTools) +# dat1 +dat1 <- array(1:6, dim = c(sdate = 2, time = 3, member = 1)) + +# exp1 +exp <- NULL +exp$data <- array(1:(1 * 3 * 214 * 2), + c(memb = 1, sdate = 3, ftime = 214, lon = 2)) +exp$dims <- dim(exp$data) +exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2002", format = "%d-%m-%Y"), + as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) +dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) +class(exp) <- 's2dv_cube' + ############################################## test_that("1. Sanity Checks", { expect_error( @@ -27,14 +44,14 @@ test_that("1. Sanity Checks", { PeriodAccumulation(1:10), 55 ) - data <- array(1:24, c(sdate = 2, ftime = 3, lon = 4)) + data <- array(1:24, c(sdate = 2, time = 3, lon = 4)) expect_equal( PeriodAccumulation(data), array(c(9, 12, 27, 30, 45, 48, 63, 66), c(sdate = 2, lon = 4)) ) # Test dates warning expect_warning( - PeriodAccumulation(array(1:10, c(ftime = 10)), + PeriodAccumulation(array(1:10, c(time = 10)), dates = seq(as.Date("01-05-2000", format = "%d-%m-%Y"), as.Date("10-05-2000", format = "%d-%m-%Y"), by = 'day'), start = list(05, 02), end = list(05, 09)), @@ -43,7 +60,7 @@ test_that("1. Sanity Checks", { ) # start and end when dates is not provided expect_warning( - PeriodAccumulation(array(1:10, c(ftime = 10)), + PeriodAccumulation(array(1:10, c(time = 10)), start = list(05, 02), end = list(05, 09)), paste0("Parameter 'dates' is NULL and the average of the full data ", "provided in 'data' is computed.") @@ -53,18 +70,6 @@ test_that("1. Sanity Checks", { ############################################## test_that("2. Seasonal", { - exp <- NULL - exp$data <- array(1:(1 * 3 * 214 * 2), - c(memb = 1, sdate = 3, ftime = 214, lon = 2)) - exp$dims <- dim(exp$data) - exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), - as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2001", format = "%d-%m-%Y"), - as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2002", format = "%d-%m-%Y"), - as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) - dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) - class(exp) <- 's2dv_cube' output <- exp output$data <- array(c(sum(exp$data[1,1,21:82,1]), sum(exp$data[1,2,21:82,1]), sum(exp$data[1,3,21:82,1]), sum(exp$data[1,1,21:82,2]), @@ -132,4 +137,27 @@ test_that("3. Subset Dates and check time_bounds", { all(lubridate::day(res$attrs$time_bounds$end) == 20), TRUE ) -}) \ No newline at end of file +}) + +############################################## + +test_that("4. Rolling", { + expect_error( + PeriodAccumulation(data = dat1, rollingwidth = 'a'), + "Parameter 'rollingwidth' must be a numeric value." + ) + expect_error( + PeriodAccumulation(data = dat1, rollingwidth = 5), + "Cannot compute accumulation of 5 months because loaded data has only 3 months." + ) + expect_equal( + PeriodAccumulation(data = dat1, rollingwidth = 2), + array(c(4,6,8, 10, NA, NA), dim = c(sdate = 2, time = 3, member = 1)) + ) + expect_equal( + PeriodAccumulation(data = dat1, rollingwidth = -3), + array(c(rep(NA, 4), 9, 12), dim = c(sdate = 2, time = 3, member = 1)) + ) + dat1[1,1,1] <- NA + PeriodAccumulation(data = dat1, rollingwidth = 2, na.rm = FALSE) +}) -- GitLab From c0919b19be5f79d9c9e2b760bc979d3e10bf3661 Mon Sep 17 00:00:00 2001 From: EVA RIFA ROVIRA Date: Wed, 2 Aug 2023 17:44:55 +0200 Subject: [PATCH 43/87] Add new function PeriodStandardization; change variable names in PeriodPET and improve PeriodAccumulation --- R/PeriodAccumulation.R | 15 ++-- R/PeriodPET.R | 44 ++++++------ R/PeriodStandardization.R | 147 ++++++++++++++++++++++++++++++++++++++ man/PeriodAccumulation.Rd | 1 + man/PeriodPET.Rd | 16 ++--- 5 files changed, 188 insertions(+), 35 deletions(-) create mode 100644 R/PeriodStandardization.R diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index b649a1d..553b1a5 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -201,10 +201,12 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' end = list(21, 10)) #' #'@import multiApply +#'@import zoo #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, time_dim = 'time', rollingwidth = NULL, - na.rm = FALSE, ncores = NULL) { + rolling_add_na = FALSE, na.rm = FALSE, + ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } @@ -258,11 +260,14 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, fun = function(x, accum, backroll, na.rm) { res <- rollapply(data = x, width = accum, FUN = sum, na.rm = na.rm) - if (backroll) { - return(c(rep(NA, accum-1), res)) - } else { - return(c(res, rep(NA, accum-1))) + if (rolling_add_na) { + if (backroll) { + res <- c(rep(NA, accum-1), res) + } else { + res <- c(res, rep(NA, accum-1)) + } } + return(res) }, accum = rollingwidth, backroll = backroll, na.rm = na.rm, ncores = ncores)$output1 pos <- match(dimnames, names(dim(total))) diff --git a/R/PeriodPET.R b/R/PeriodPET.R index fa4079c..67b6afd 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -11,19 +11,19 @@ #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -#' Variable 'prlr' is always needed. The units for temperature variables -#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -#' precipitation ('prlr') need to be in mm/month. +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. #'@param dates An array of temporal dimensions containing the Dates of #' 'exp'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'exp'. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tasmin and tasmax; and 'thornthwaite', that requires variable -#' 'tas'. +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -49,7 +49,7 @@ #' #'lat <- c(40) #' -#'exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) #' #'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) #' @@ -92,20 +92,20 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', } # data (2) - if (all(c('tasmin', 'tasmax', 'prlr') %in% names(data))) { - # hargreaves modified: 'tasmin', 'tasmax', 'prlr' and 'lat' + if (all(c('tmin', 'tmax', 'pr') %in% names(data))) { + # hargreaves modified: 'tmin', 'tmax', 'pr' and 'lat' if (!(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { warning("Parameter 'pet_method' needs to be 'hargreaves' or ", "'hargreaves_modified'. It is set to 'hargreaves_modified'.") pet_method <- 'hargreaves_modified' } - } else if (all(c('tasmin', 'tasmax') %in% names(data))) { + } else if (all(c('tmin', 'tmax') %in% names(data))) { if (!(pet_method %in% c('hargreaves'))) { warning("Parameter 'pet_method' will be set as 'hargreaves'.") pet_method <- 'hargreaves' } - } else if (c('tas') %in% names(data)) { - # thornthwaite: 'tas' (mean), 'lat' + } else if (c('tmean') %in% names(data)) { + # thornthwaite: 'tmean' (mean), 'lat' if (!(pet_method == 'thornthwaite')) { warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") pet_method <- 'thornthwaite' @@ -173,17 +173,17 @@ PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', # extract mask of NA locations to return to NA the final result mask_na <- array(1, dim = dim(data[[1]])) if (pet_method == 'hargreaves') { - varnames <- c('tasmax', 'tasmin') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 + varnames <- c('tmax', 'tmin') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 } else if (pet_method == 'hargreaves_modified') { - varnames <- c('tasmax', 'tasmin', 'prlr') - mask_na[which(is.na(data$tasmax))] <- 0 - mask_na[which(is.na(data$tasmin))] <- 0 - mask_na[which(is.na(data$prlr))] <- 0 + varnames <- c('tmax', 'tmin', 'pr') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 + mask_na[which(is.na(data$pr))] <- 0 } else if (pet_method == 'thornthwaite') { - varnames <- c('tas') - mask_na[which(is.na(data$tas))] <- 0 + varnames <- c('tmean') + mask_na[which(is.na(data$tmean))] <- 0 } # replace NA with 0 diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R new file mode 100644 index 0000000..b4f21e4 --- /dev/null +++ b/R/PeriodStandardization.R @@ -0,0 +1,147 @@ + +# WIP + +PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, cross_validation = FALSE, + handle_infinity = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + na.rm = FALSE, ncores = NULL) { + # Initial checks + target_dims <- c(leadtime_dim, time_dim, memb_dim) + + if (is.null(ref_period)) { + ref.start <- NULL + ref.end <- NULL + } else { + ref.start <- ref_period[[1]] + ref.end <- ref_period[[2]] + } + + + # Standardization + if (is.null(data_cor)) { + spei <- Apply(data = list(data), + target_dims = target_dims, + fun = .standardization, + leadtime_dim = leadtime_dim, + time_dim = time_dim, memb_dim = memb_dim, + ref_period = ref_period, handle_infinity = handle_infinity, + cross_validation = cross_validation, param_error = param_error, + method = method, distribution = distribution, + na.rm = na.rm, ncores = ncores)$output1 + } else { + spei <- Apply(data = list(data, data_cor), target_dims = target_dims, + fun = .standardization, + leadtime_dim = leadtime_dim, + time_dim = time_dim, memb_dim = memb_dim, + ref_period = ref_period, handle_infinity = handle_infinity, + cross_validation = cross_validation, param_error = param_error, + method = method, distribution = distribution, + na.rm = na.rm, ncores = ncores)$output1 + } + return(spei) +} + +# data <- array(rnorm(10), c(time = 3, syear = 6, ensemble = 25)) + +# res <- .standardization(data = data) + +# data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, +# indices = ff, drop = 'selected') + +.standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', + time_dim = 'syear', memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + cross_validation = FALSE, param_error = -9999, + method = 'parametric', distribution = 'log-Logistic', + na.rm = FALSE) { + # data: [leadtime_dim, time_dim, memb_dim] + + # maximum number of parameters needed + nleadtime <- as.numeric(dim(data)[leadtime_dim]) + ntime <- as.numeric(dim(data)[time_dim]) + nmemb <- as.numeric(dim(data)[memb_dim]) + fit = 'ub-pwm' + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + + if (is.null(data_cor)) { + # cross_val = TRUE + spei_mod <- data*NA + print(dim(spei_mod)) + for (ff in 1:dim(data)[leadtime_dim]) { + data2 <- data[ff, , ] + params_result <- array(dim = c(dim(data)[time_dim], length(coef))) + print(dim(data2)) + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data2, ref.start, ref.end) + } else { + data.fit <- data2 + } + for (nsd in 1:dim(data)[time_dim]) { + acu <- as.vector(data.fit[-nsd, ]) + acu.sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu.sorted <- acu.sorted[!is.na(acu.sorted)] + } + if (!any(is.na(acu.sorted))) { + print('Inside acu.sorted') + if (length(acu.sorted) != 0) { + acu_sd <- sd(acu.sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + pze <- sum(acu == 0) / length(acu) + acu.sorted <- acu.sorted[acu.sorted > 0] + } + if (length(acu.sorted) >= 4) { + print('acu.sorted') + print(acu.sorted) + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), + pwm.ub(acu.sorted, nmom = 3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){parglo(lmom)$para}), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){pargam(lmom)$para}), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + params_result[nsd, ] <- f_params + } + } + if (all(is.na(params_result[nsd,]))) { + cdf_res <- NA + } else { + f_params <- params_result[nsd,] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data, f_params), + "Gamma" = lmom::cdfgam(data, f_params), + "PearsonIII" = lmom::cdfpe3(data, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) + spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] + } + } + } + } + } + } else { + # cross_val = FALSE + + } + return(spei_mod) +} + diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index a3dbf46..c07afea 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -11,6 +11,7 @@ PeriodAccumulation( end = NULL, time_dim = "time", rollingwidth = NULL, + rolling_add_na = FALSE, na.rm = FALSE, ncores = NULL ) diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd index 719d2fa..69aad45 100644 --- a/man/PeriodPET.Rd +++ b/man/PeriodPET.Rd @@ -21,11 +21,11 @@ PeriodPET( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -Variable 'prlr' is always needed. The units for temperature variables -('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -precipitation ('prlr') need to be in mm/month.} +variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +Variable 'pr' is always needed. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} \item{dates}{An array of temporal dimensions containing the Dates of 'exp'. It must be of class 'Date' or 'POSIXct'.} @@ -35,8 +35,8 @@ precipitation ('prlr') need to be in mm/month.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have -variables tasmin and tasmax; and 'thornthwaite', that requires variable -'tas'.} +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'syear'.} @@ -75,7 +75,7 @@ dim(dates_exp) <- c(syear = 3, time = 3) lat <- c(40) -exp1 <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) -- GitLab From b0f124fe7f9babddc312662fb0808472633d9c75 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 3 Aug 2023 18:31:48 +0200 Subject: [PATCH 44/87] Develop PeriodStandardization for exp_cor: reorganized code --- R/PeriodStandardization.R | 110 +++++++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 20 deletions(-) diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index b4f21e4..88ab915 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -1,7 +1,7 @@ # WIP -PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', +PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', ref_period = NULL, cross_validation = FALSE, handle_infinity = FALSE, param_error = -9999, @@ -40,22 +40,33 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } + + # add NA + if (!is.null(accum)) { + spei <- Apply(data = list(spei), target_dims = leadtime_dim, + output_dims = leadtime_dim, + fun = function(x, accum, leadtime_dim) { + res <- c(rep(NA, accum-1), x) + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 + } + if (is.null(data_cor)) { + pos <- match(names(dim(data)), names(dim(spei))) + spei <- aperm(spei, pos) + } else { + pos <- match(names(dim(data_cor)), names(dim(spei))) + spei <- aperm(spei, pos) + } return(spei) } -# data <- array(rnorm(10), c(time = 3, syear = 6, ensemble = 25)) - -# res <- .standardization(data = data) - -# data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, -# indices = ff, drop = 'selected') - .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', time_dim = 'syear', memb_dim = 'ensemble', ref_period = NULL, handle_infinity = FALSE, cross_validation = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { + print(summary(data)) # data: [leadtime_dim, time_dim, memb_dim] # maximum number of parameters needed @@ -72,11 +83,10 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', if (is.null(data_cor)) { # cross_val = TRUE spei_mod <- data*NA - print(dim(spei_mod)) + params_result <- array(data = NA, dim = c(ntime, nleadtime, length(coef))) for (ff in 1:dim(data)[leadtime_dim]) { data2 <- data[ff, , ] - params_result <- array(dim = c(dim(data)[time_dim], length(coef))) - print(dim(data2)) + dim(data2) <- c(ntime, nmemb) if (!is.null(ref.start) && !is.null(ref.end)) { data.fit <- window(data2, ref.start, ref.end) } else { @@ -89,7 +99,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', acu.sorted <- acu.sorted[!is.na(acu.sorted)] } if (!any(is.na(acu.sorted))) { - print('Inside acu.sorted') if (length(acu.sorted) != 0) { acu_sd <- sd(acu.sorted) if (!is.na(acu_sd) & acu_sd != 0) { @@ -98,8 +107,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', acu.sorted <- acu.sorted[acu.sorted > 0] } if (length(acu.sorted) >= 4) { - print('acu.sorted') - print(acu.sorted) pwm = switch(fit, "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), pwm.ub(acu.sorted, nmom = 3) @@ -118,18 +125,18 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', if (distribution == 'log-Logistic' && fit == 'max-lik') { f_params = parglo.maxlik(acu.sorted, f_params)$para } - params_result[nsd, ] <- f_params + params_result[nsd, ff, ] <- f_params } } - if (all(is.na(params_result[nsd,]))) { + if (all(is.na(params_result[nsd, ff, ]))) { cdf_res <- NA } else { - f_params <- params_result[nsd,] + f_params <- params_result[nsd, ff, ] f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data, f_params), - "Gamma" = lmom::cdfgam(data, f_params), - "PearsonIII" = lmom::cdfpe3(data, f_params)) + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] @@ -140,7 +147,70 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } } else { # cross_val = FALSE + spei_mod <- data_cor*NA + params_result <- array(data = NA, dim = c(1, length(coef))) + for (ff in 1:dim(data)[leadtime_dim]) { + data_cor2 <- data_cor[ff, , ] + dim(data_cor2) <- c(1, nmemb) + data2 <- data[ff, , ] + dim(data2) <- c(ntime, nmemb) + if (!is.null(ref.start) && !is.null(ref.end)) { + data.fit <- window(data2, ref.start, ref.end) + } else { + data.fit <- data2 + } + acu <- as.vector(data.fit) + acu.sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu.sorted <- acu.sorted[!is.na(acu.sorted)] + } + if (!any(is.na(acu.sorted))) { + if (length(acu.sorted) != 0) { + acu_sd <- sd(acu.sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + pze <- sum(acu == 0) / length(acu) + acu.sorted <- acu.sorted[acu.sorted > 0] + } + if (length(acu.sorted) >= 4) { + pwm = switch(fit, + "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), + pwm.ub(acu.sorted, nmom = 3) + # TLMoments::PWM(acu.sorted, order = 0:2) + ) + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + f_params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){parglo(lmom)$para}), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){pargam(lmom)$para}), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + f_params = parglo.maxlik(acu.sorted, f_params)$para + } + params_result[1, ] <- f_params + } + } + if (all(is.na(params_result[1, ]))) { + cdf_res <- NA + } else { + f_params <- params_result[1, ] + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = c(1, nmemb)) + spei_mod[ff, , ] <- std_index_cv + } + } + } + } } return(spei_mod) } -- GitLab From 9b78395ef69a7f58a17dd6f0f3afb1a994745621 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 7 Aug 2023 09:24:00 +0200 Subject: [PATCH 45/87] Change variable names in PeriodSPEI --- R/PeriodSPEI.R | 89 +++++++++++++++++++++++-------------------- man/CST_PeriodSPEI.Rd | 24 ++++++------ man/PeriodSPEI.Rd | 24 ++++++------ 3 files changed, 72 insertions(+), 65 deletions(-) diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 9bc1660..5e78d7d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -42,11 +42,11 @@ #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -#' Variable 'prlr' is always needed. The units for temperature variables -#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -#' precipitation ('prlr') need to be in mm/month. +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. #'@param exp_cor A named list with the needed \code{s2dv_cube} objects for each #' variable in which the quantile PeriodSPEI should be applied. If it is not #' specified, the PeriodSPEI is calculated from object 'exp'. @@ -77,11 +77,11 @@ #' have member dimension (specified in 'memb_dim'). #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the -#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided #' variables with specified 'pet_method'. It is NULL by default. #'@param pet_expcor A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the #' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. @@ -91,8 +91,8 @@ #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tasmin and tasmax; and 'thornthwaite', that requires variable -#' 'tas'. +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. @@ -147,9 +147,9 @@ #'expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) #'expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) #' -#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, -#' 'prlr' = expcor_prlr) +#'exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#'exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, +#' 'pr' = expcor_prlr) #' #'exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), #' Dates = dates_exp) @@ -323,11 +323,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -#' Variable 'prlr' is always needed. The units for temperature variables -#' ('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -#' precipitation ('prlr') need to be in mm/month. +#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +#' Variable 'pr' is always needed. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. #'@param dates_exp An array of temporal dimensions containing the Dates of #' 'exp'. It must be of class 'Date' or 'POSIXct'. #'@param lat A numeric vector containing the latitude values of 'exp'. @@ -362,11 +362,11 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #' leadtime dimension (specified in 'leadtime_dim') of length 1. #'@param pet_exp A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp'. It must have the same dimensions of the -#' variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +#' variable 'pr' of 'exp'. If it is NULL it is calculated using the provided #' variables with specified 'pet_method'. It is NULL by default. #'@param pet_expcor A multidimensional array containing the Potential #' EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -#' the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +#' the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the #' provided variables with specified 'pet_method'. It is NULL by default. #'@param standardization A logical value indicating wether the standardization #' is computed. @@ -376,8 +376,8 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have -#' variables tasmin and tasmax; and 'thornthwaite', that requires variable -#' 'tas'. +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. #'@param distribution A character string indicating the name of the distribution @@ -431,9 +431,9 @@ CST_PeriodSPEI <- function(exp, exp_cor = NULL, #'dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) #'lat <- c(40,40.1) #' -#'exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -#'exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, -#' 'prlr' = expcor_prlr) +#'exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#'exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, +#' 'pr' = expcor_prlr) #'res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, #' dates_exp = dates_exp, dates_expcor = dates_expcor) #' @@ -498,13 +498,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, # Variable checks ## exp (2) pet <- vector("list", 2) - if (!('prlr' %in% names(exp))) { - stop("Variable 'prlr' is not included in 'exp'.") + if (!('pr' %in% names(exp))) { + stop("Variable 'pr' is not included in 'exp'.") } ## exp_cor (2) if (!is.null(exp_cor)) { - if (!('prlr' %in% names(exp_cor))) { - stop("Variable 'prlr' is not included in 'exp_cor'.") + if (!('pr' %in% names(exp_cor))) { + stop("Variable 'pr' is not included in 'exp_cor'.") } if (length(pet_method) == 1) { pet_method <- rep(pet_method, 2) @@ -513,16 +513,16 @@ PeriodSPEI <- function(exp, dates_exp, lat, ## pet_exp if (!is.null(pet_exp)) { - if (length(dim(exp[['prlr']])) != length(dim(pet_exp))) { + if (length(dim(exp[['pr']])) != length(dim(pet_exp))) { stop("Parameter 'pet_exp' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp'.") + "dimensions as variable 'pr' in 'exp'.") } - if (!all(dim(exp[['prlr']]) %in% dim(pet_exp))) { + if (!all(dim(exp[['pr']]) %in% dim(pet_exp))) { stop("Parameter 'pet_exp' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp'.") + "dimensions as variable 'pr' in 'exp'.") } - if (any(names(dim(exp[['prlr']])) != names(dim(pet_exp)))) { - pos <- match(names(dim(exp[['prlr']])), names(dim(pet_exp))) + if (any(names(dim(exp[['pr']])) != names(dim(pet_exp)))) { + pos <- match(names(dim(exp[['pr']])), names(dim(pet_exp))) pet_exp <- aperm(pet_exp, pos) } pet[[1]] <- pet_exp @@ -532,16 +532,16 @@ PeriodSPEI <- function(exp, dates_exp, lat, ## pet_expcor if (!is.null(exp_cor)) { if (!is.null(pet_expcor)) { - if (length(dim(exp_cor[['prlr']])) != length(dim(pet_expcor))) { + if (length(dim(exp_cor[['pr']])) != length(dim(pet_expcor))) { stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") + "dimensions as variable 'pr' in 'exp_cor'.") } - if (!all(dim(exp_cor[['prlr']]) %in% dim(pet_expcor))) { + if (!all(dim(exp_cor[['pr']]) %in% dim(pet_expcor))) { stop("Parameter 'pet_expcor' must have the same length of all the ", - "dimensions as variable 'prlr' in 'exp_cor'.") + "dimensions as variable 'pr' in 'exp_cor'.") } - if (any(names(dim(exp_cor[['prlr']])) != names(dim(pet_expcor)))) { - pos <- match(names(dim(exp_cor[['prlr']])), names(dim(pet_expcor))) + if (any(names(dim(exp_cor[['pr']])) != names(dim(pet_expcor)))) { + pos <- match(names(dim(exp_cor[['pr']])), names(dim(pet_expcor))) pet_expcor <- aperm(pet_expcor, pos) } pet[[2]] <- pet_expcor @@ -767,11 +767,13 @@ PeriodSPEI <- function(exp, dates_exp, lat, lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) + print("Step (1): PET") + print(summary(pet[[k]])) computed_pet <- TRUE } # Accumulation - diff_p_pet <- data$prlr - pet[[k]] + diff_p_pet <- data$pr - pet[[k]] accumulated <- Apply(data = list(diff_p_pet), target_dims = list(data = c(leadtime_dim, time_dim)), @@ -779,6 +781,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, fun = function(data, accum) { return(rollapply(data = data, width = accum, FUN = sum)) }, accum = accum, ncores = ncores)$output1 + print("Step (2): Accumulation") + print(summary(accumulated)) # Standardization if (standardization) { @@ -791,6 +795,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores) + ref_period <- NULL params <- spei$params spei_res <- spei[[1]] @@ -806,6 +811,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, }, accum = accum, leadtime_dim = leadtime_dim)$output1 pos <- match(names(dim(data[[1]])), names(dim(spei_res))) spei_res <- aperm(spei_res, pos) + print("Step (3): Standardization") + print(summary(spei_res)) } if (standardization) { diff --git a/man/CST_PeriodSPEI.Rd b/man/CST_PeriodSPEI.Rd index eaa139e..19bf92a 100644 --- a/man/CST_PeriodSPEI.Rd +++ b/man/CST_PeriodSPEI.Rd @@ -33,11 +33,11 @@ CST_PeriodSPEI( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -Variable 'prlr' is always needed. The units for temperature variables -('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -precipitation ('prlr') need to be in mm/month.} +variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +Variable 'pr' is always needed. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} \item{exp_cor}{A named list with the needed \code{s2dv_cube} objects for each variable in which the quantile PeriodSPEI should be applied. If it is not @@ -77,12 +77,12 @@ have member dimension (specified in 'memb_dim').} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the -variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variable 'pr' of 'exp'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{pet_expcor}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{standardization}{A logical value indicating wether the standardization @@ -95,8 +95,8 @@ is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have -variables tasmin and tasmax; and 'thornthwaite', that requires variable -'tas'.} +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} \item{method}{A character string indicating the standardization method used. If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by @@ -198,9 +198,9 @@ expcor_tasmax <- array(rnorm(540, 29.03, 5.67), dim = dimscor) expcor_tasmin <- array(rnorm(540, 15.70, 4.40), dim = dimscor) expcor_prlr <- array(rnorm(540, 15.62, 21.38), dim = dimscor) -exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, - 'prlr' = expcor_prlr) +exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, + 'pr' = expcor_prlr) exp <- lapply(exp, CSTools::s2dv_cube, coords = list(latitude = lat), Dates = dates_exp) diff --git a/man/PeriodSPEI.Rd b/man/PeriodSPEI.Rd index fc6c527..56b687c 100644 --- a/man/PeriodSPEI.Rd +++ b/man/PeriodSPEI.Rd @@ -36,11 +36,11 @@ PeriodSPEI( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tasmin' and 'tasmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tas', required for method 'thornthwaite'. -Variable 'prlr' is always needed. The units for temperature variables -('tasmin', 'tasmax' and 'tas') need to be in Celcius degrees; the units for -precipitation ('prlr') need to be in mm/month.} +variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and +'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. +Variable 'pr' is always needed. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} \item{dates_exp}{An array of temporal dimensions containing the Dates of 'exp'. It must be of class 'Date' or 'POSIXct'.} @@ -87,12 +87,12 @@ leadtime dimension (specified in 'leadtime_dim') of length 1.} \item{pet_exp}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp'. It must have the same dimensions of the -variable 'prlr' of 'exp'. If it is NULL it is calculated using the provided +variable 'pr' of 'exp'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{pet_expcor}{A multidimensional array containing the Potential EvapoTranspiration data of 'exp_cor'. It must have the same dimensions of -the variable 'prlr' of 'exp_cor'. If it is NULL it is calculated using the +the variable 'pr' of 'exp_cor'. If it is NULL it is calculated using the provided variables with specified 'pet_method'. It is NULL by default.} \item{standardization}{A logical value indicating wether the standardization @@ -105,8 +105,8 @@ is not provided. It is FALSE by default.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: 'hargreaves' and 'hargreaves_modified', that require the data to have -variables tasmin and tasmax; and 'thornthwaite', that requires variable -'tas'.} +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} \item{method}{A character string indicating the standardization method used. If can be: 'parametric' or 'non-parametric'.} @@ -206,9 +206,9 @@ dates_expcor <- as.POSIXct(c(paste0(2020, "-08-16"), dim(dates_expcor) <- c(sday = 1, sweek = 1, syear = 1, time = 3) lat <- c(40,40.1) -exp <- list('tasmax' = exp_tasmax, 'tasmin' = exp_tasmin, 'prlr' = exp_prlr) -exp_cor <- list('tasmax' = expcor_tasmax, 'tasmin' = expcor_tasmin, - 'prlr' = expcor_prlr) +exp <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +exp_cor <- list('tmax' = expcor_tasmax, 'tmin' = expcor_tasmin, + 'pr' = expcor_prlr) res <- PeriodSPEI(exp = exp, exp_cor = exp_cor, lat = lat, dates_exp = dates_exp, dates_expcor = dates_expcor) -- GitLab From bd84633f56cd53752ee802592f5318ed0d087a79 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 7 Aug 2023 11:11:59 +0200 Subject: [PATCH 46/87] Add reference in function MergeRefToExp --- R/MergeRefToExp.R | 23 ++++++++++++++++++----- man/CST_MergeRefToExp.Rd | 25 ++++++++++++++++++++----- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 03425c8..3cd9d96 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -9,6 +9,11 @@ #'1st the forecast covers until the beginning of June next year), a climatology #'(or other references) could be added at the end of the forecast lead time to #'cover the desired period (e.g.: until the end of summer). +#' +#'This function is created to merge observations and forecasts in the +#'bioclimatic indicator structure, known as the ‘blending’ strategy (see +#'references). This key strategy aims to increase users’ confidence in the +#'reformed predictions. #' #'@param data1 An 's2dv_cube' object with the element 'data' being a #' multidimensional array with named dimensions. All dimensions must be @@ -50,11 +55,19 @@ #'common to both arrays and for the 'time_dim' dimension, the sum of the #'corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, #'regarding member dimension, two different situations can occur: (1) in the -#'case that one of the arrays does not have member dimension or is equal to 1, -#'the result will contain the repeated values of itself; (2) in the case that -#'both arrays have member dimension and is greater than 1, all combinations of -#'member dimension will be returned. The other elements of the 's2dv_cube' will -#'be updated with the combined information of both datasets. +#'case that one of the arrays does not have member dimension or is equal to 1 +#'and the other array has multiple member dimension, the result will contain the +#'repeated values of the array one up to the lenght of member dimension of array +#'two; (2) in the case that both arrays have member dimension and is greater +#'than 1, all combinations of member dimension will be returned. The other +#'elements of the 's2dv_cube' will be updated with the combined information of +#'both datasets. +#' +#'@references Chou, C., R. Marcos-Matamoros, L. Palma Garcia, N. Pérez-Zanón, +#'M. Teixeira, S. Silva, N. Fontes, A. Graça, A. Dell'Aquila, S. Calmanti and +#'N. González-Reviriego (2023). Advanced seasonal predictions for vine +#'management based on bioclimatic indicators tailored to the wine sector. +#'Climate Services, 30, 100343, \doi{doi.org/10.1016/j.cliser.2023.100343}. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index bbca8c4..5ab328b 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -67,11 +67,13 @@ the following dimensions: the original dimensions of the input data, which are common to both arrays and for the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, regarding member dimension, two different situations can occur: (1) in the -case that one of the arrays does not have member dimension or is equal to 1, -the result will contain the repeated values of itself; (2) in the case that -both arrays have member dimension and is greater than 1, all combinations of -member dimension will be returned. The other elements of the 's2dv_cube' will -be updated with the combined information of both datasets. +case that one of the arrays does not have member dimension or is equal to 1 +and the other array has multiple member dimension, the result will contain the +repeated values of the array one up to the lenght of member dimension of array +two; (2) in the case that both arrays have member dimension and is greater +than 1, all combinations of member dimension will be returned. The other +elements of the 's2dv_cube' will be updated with the combined information of +both datasets. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from @@ -84,6 +86,12 @@ period because it is initialized too early (e.g.: Initialization on November (or other references) could be added at the end of the forecast lead time to cover the desired period (e.g.: until the end of summer). } +\details{ +This function is created to merge observations and forecasts in the +bioclimatic indicator structure, known as the ‘blending’ strategy (see +references). This key strategy aims to increase users’ confidence in the +reformed predictions. +} \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1993","\%d-\%m-\%Y", tz = 'UTC'), "day"), @@ -106,3 +114,10 @@ new_data <- CST_MergeRefToExp(data1 = ref, data2 = data, start2 = list(1, 7), end2 = list(21, 9)) } +\references{ +Chou, C., R. Marcos-Matamoros, L. Palma Garcia, N. Pérez-Zanón, +M. Teixeira, S. Silva, N. Fontes, A. Graça, A. Dell'Aquila, S. Calmanti and +N. González-Reviriego (2023). Advanced seasonal predictions for vine +management based on bioclimatic indicators tailored to the wine sector. +Climate Services, 30, 100343, \doi{doi.org/10.1016/j.cliser.2023.100343}. +} -- GitLab From c28af1f408eb869ebb9a77f78e35bc45dadb70e9 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Mon, 7 Aug 2023 11:21:18 +0200 Subject: [PATCH 47/87] Correct modification of documentation --- R/MergeRefToExp.R | 20 ++++++++++++++++---- man/MergeRefToExp.Rd | 22 ++++++++++++++++++---- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 3cd9d96..898f32e 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -233,6 +233,11 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #'1st the forecast covers until the beginning of June next year), a climatology #'(or other references) could be added at the end of the forecast lead time to #'cover the desired period (e.g.: until the end of summer). +#' +#'This function is created to merge observations and forecasts in the +#'bioclimatic indicator structure, known as the ‘blending’ strategy (see +#'references). This key strategy aims to increase users’ confidence in the +#'reformed predictions. #' #'@param data1 A multidimensional array with named dimensions. All dimensions #' must be equal to 'data2' dimensions except for the ones specified with @@ -278,10 +283,17 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #'the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' #'and 'data2'. If 'memb_dim' is not null, regarding member dimension, two #'different situations can occur: (1) in the case that one of the arrays does -#'not have member dimension or is equal to 1, the result will contain the -#'repeated values of itself; (2) in the case that both arrays have member -#'dimension and is greater than 1, all combinations of member dimension will be -#'returned. +#'not have member dimension or is equal to 1 and the other array has multiple +#'member dimension, the result will contain the repeated values of the array one +#'up to the lenght of member dimension of array two; (2) in the case that both +#'arrays have member dimension and is greater than 1, all combinations of member +#'dimension will be returned. +#' +#'@references Chou, C., R. Marcos-Matamoros, L. Palma Garcia, N. Pérez-Zanón, +#'M. Teixeira, S. Silva, N. Fontes, A. Graça, A. Dell'Aquila, S. Calmanti and +#'N. González-Reviriego (2023). Advanced seasonal predictions for vine +#'management based on bioclimatic indicators tailored to the wine sector. +#'Climate Services, 30, 100343, \doi{doi.org/10.1016/j.cliser.2023.100343}. #' #'@examples #'data_dates <- c(seq(as.Date("01-07-1993", "%d-%m-%Y", tz = 'UTC'), diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index e22b52d..047ad37 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -74,10 +74,11 @@ original dimensions of the input data, which are common to both arrays and for the 'time_dim' dimension, the sum of the corresponding dimension of 'data1' and 'data2'. If 'memb_dim' is not null, regarding member dimension, two different situations can occur: (1) in the case that one of the arrays does -not have member dimension or is equal to 1, the result will contain the -repeated values of itself; (2) in the case that both arrays have member -dimension and is greater than 1, all combinations of member dimension will be -returned. +not have member dimension or is equal to 1 and the other array has multiple +member dimension, the result will contain the repeated values of the array one +up to the lenght of member dimension of array two; (2) in the case that both +arrays have member dimension and is greater than 1, all combinations of member +dimension will be returned. } \description{ Some indicators are defined for specific temporal periods (e.g.: summer from @@ -90,6 +91,12 @@ period because it is initialized too early (e.g.: Initialization on November (or other references) could be added at the end of the forecast lead time to cover the desired period (e.g.: until the end of summer). } +\details{ +This function is created to merge observations and forecasts in the +bioclimatic indicator structure, known as the ‘blending’ strategy (see +references). This key strategy aims to increase users’ confidence in the +reformed predictions. +} \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), as.Date("01-12-1993","\%d-\%m-\%Y", tz = 'UTC'), "day"), @@ -107,3 +114,10 @@ new_data <- MergeRefToExp(data1 = ref, dates1 = ref_dates, start1 = list(21, 6), time_dim = 'ftime') } +\references{ +Chou, C., R. Marcos-Matamoros, L. Palma Garcia, N. Pérez-Zanón, +M. Teixeira, S. Silva, N. Fontes, A. Graça, A. Dell'Aquila, S. Calmanti and +N. González-Reviriego (2023). Advanced seasonal predictions for vine +management based on bioclimatic indicators tailored to the wine sector. +Climate Services, 30, 100343, \doi{doi.org/10.1016/j.cliser.2023.100343}. +} -- GitLab From f202fc5540032772a09b265bcd4ef6587d221da0 Mon Sep 17 00:00:00 2001 From: erifarov Date: Thu, 10 Aug 2023 17:16:36 +0200 Subject: [PATCH 48/87] Add new directory .gitlab --- .gitlab/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 .gitlab/.gitkeep diff --git a/.gitlab/.gitkeep b/.gitlab/.gitkeep new file mode 100644 index 0000000..e69de29 -- GitLab From a7dc45a6ac98826a33d3768ccee69699283dec47 Mon Sep 17 00:00:00 2001 From: erifarov Date: Thu, 10 Aug 2023 17:17:22 +0200 Subject: [PATCH 49/87] Add new directory issue_templates --- .gitlab/issue_templates/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 .gitlab/issue_templates/.gitkeep diff --git a/.gitlab/issue_templates/.gitkeep b/.gitlab/issue_templates/.gitkeep new file mode 100644 index 0000000..e69de29 -- GitLab From 152f912b19d820722c58dfda57461d770e2d007f Mon Sep 17 00:00:00 2001 From: erifarov Date: Thu, 10 Aug 2023 17:23:59 +0200 Subject: [PATCH 50/87] Add new file --- .gitlab/issue_templates/default.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .gitlab/issue_templates/default.md diff --git a/.gitlab/issue_templates/default.md b/.gitlab/issue_templates/default.md new file mode 100644 index 0000000..a430b52 --- /dev/null +++ b/.gitlab/issue_templates/default.md @@ -0,0 +1,25 @@ +(This is a template to report errors and bugs. Please fill in the relevant information and delete the rest.) + +Hi @erifarov (and @aho), + +#### R and packages version +(Which R version are you using? ex. 4.1.2) +(Which R packages versions are you using? use sessionInfo(). ex. CSIndicators_1.0.1, CSTools_5.0.1 ...) +(Which machine are you using? WS, Nord3, other...) + +#### Summary +(Bug: Summarize the bug and explain briefly the expected and the current behavior.) +(New development: Summarize the development needed.) + +#### Example +(Bug: Provide a **minimal reproducible example** and the error message.) +(New development: Provide an example script or useful piece of code if needed.) + +``` +Example: +[ERROR!]: Something went really wrong! +This is the error message that showed up on the terminal. +``` + +#### Other Relevant Information +(Additional information.) -- GitLab From c2ebd0798429b289d9b96f58ce2d863b16152069 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 6 Sep 2023 16:30:41 +0200 Subject: [PATCH 51/87] Add CST_PeriodStandardization, Improve PeriodStandardization and add documentation --- NAMESPACE | 3 + R/PeriodPET.R | 115 +++++++- R/PeriodSPEI.R | 2 +- R/PeriodStandardization.R | 486 +++++++++++++++++++++++-------- man/CST_PeriodPET.Rd | 80 +++++ man/CST_PeriodStandardization.Rd | 91 ++++++ man/PeriodPET.Rd | 10 +- man/PeriodStandardization.Rd | 91 ++++++ 8 files changed, 750 insertions(+), 128 deletions(-) create mode 100644 man/CST_PeriodPET.Rd create mode 100644 man/CST_PeriodStandardization.Rd create mode 100644 man/PeriodStandardization.Rd diff --git a/NAMESPACE b/NAMESPACE index c67f13e..49ec262 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,9 @@ export(CST_AccumulationExceedingThreshold) export(CST_MergeRefToExp) export(CST_PeriodAccumulation) export(CST_PeriodMean) +export(CST_PeriodPET) export(CST_PeriodSPEI) +export(CST_PeriodStandardization) export(CST_QThreshold) export(CST_SelectPeriodOnData) export(CST_Threshold) @@ -20,6 +22,7 @@ export(PeriodAccumulation) export(PeriodMean) export(PeriodPET) export(PeriodSPEI) +export(PeriodStandardization) export(QThreshold) export(SelectPeriodOnData) export(SelectPeriodOnDates) diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 67b6afd..8ab1a64 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -11,14 +11,119 @@ #' the seasonal forecast experiment in the data element for each variable. #' Specific variables are needed for each method used in computing the #' Potential Evapotranspiration. See parameter 'pet_method'. The accepted -#' variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -#' 'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -#' Variable 'pr' is always needed. The units for temperature variables +#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +#' 'tmean' is required. The units for temperature variables #' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for #' precipitation ('pr') need to be in mm/month. #'@param dates An array of temporal dimensions containing the Dates of -#' 'exp'. It must be of class 'Date' or 'POSIXct'. -#'@param lat A numeric vector containing the latitude values of 'exp'. +#' 'data'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'data'. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#' +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#' +#'lat <- c(40) +#' +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#' +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@import SPEI +#'@import lubridate +#'@import multiApply +#'@export +CST_PeriodPET <- function(data, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!all(sapply(data, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'data' must be a list of 's2dv_cube' class.") + } + # latitude + if (!any(names(data[[1]]$coords) %in% .KnownLatNames())) { + stop("Spatial coordinate names of parameter 'data' do not match any ", + "of the names accepted by the package.") + } + # Dates + dates_exp <- data[[1]]$attrs$Dates + if (!'Dates' %in% names(data[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'data'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + } + lat_dim <- names(data[[1]]$coords)[[which(names(data[[1]]$coords) %in% .KnownLatNames())]] + + res <- PeriodPET(data = lapply(data, function(x) x$data), + dates = data[[1]]$attrs$Dates, + lat = data[[1]]$coords[[lat_dim]], + pet_method = pet_method, time_dim = time_dim, + leadtime_dim = leadtime_dim, lat_dim = lat_dim, + na.rm = na.rm, ncores = ncores) + # Add metadata + source_files <- lapply(data, function(x) {x$attrs$source_files}) + coords <- data[[1]]$coords + Dates <- data[[1]]$attrs$Dates + metadata <- data[[1]]$attrs$Variable$metadata + metadata_names <- intersect(names(dim(res)), names(metadata)) + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = paste0('PET'), + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + return(res) +} + +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. Reference evapotranspiration (ETo) is the amount of +#'evaporation and transpiration from a reference vegetation of grass. They are +#'usually considered equivalent. This set of functions calculate PET or ETo +#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#' +#'@param data A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the data element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration. See parameter 'pet_method'. The accepted +#' variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +#' 'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +#' 'tmean' is required. The units for temperature variables +#' ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +#' precipitation ('pr') need to be in mm/month. +#'@param dates An array of temporal dimensions containing the Dates of +#' 'data'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'data'. #'@param pet_method A character string indicating the method used to compute #' the potential evapotranspiration. The accepted methods are: #' 'hargreaves' and 'hargreaves_modified', that require the data to have diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 5e78d7d..979e79d 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -903,7 +903,7 @@ PeriodSPEI <- function(exp, dates_exp, lat, } else if (anyNA(data) && !na.rm) { spei_mod <- array(NA, dim(data)) } else if (var(data, na.rm = T) == 0) { - spei_mod <- array(param_error, dim(data)) + spei_mod <- array(param_error, dim(data)) # Add this? } else { if (is.null(ref_period)) { ref.start <- NULL diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 88ab915..73792ef 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -1,42 +1,299 @@ -# WIP +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'@param data An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data to be standardized. +#'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data in which the standardization should be applied +#' using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An object of class \code{s2dv_cube} containing the standardized data. +#'If 'data_cor' is provided the standardizaton is applied to it using 'data' +#'to adjust it. +#' +#'@examples +#'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +#'data <- NULL +#'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +#'class(data) <- 's2dv_cube' +#'SPEI <- CST_PeriodStandardization(data = data, accum = 2) +#'@export +CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + accum = NULL, ref_period = NULL, param_error = -9999, + handle_infinity = FALSE, method = 'parametric', + distribution = 'log-Logistic', + na.rm = FALSE, ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'exp' cannot be NULL.") + } + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of 's2dv_cube' class.") + } + if (!is.null(data_cor)) { + if (!inherits(data_cor, 's2dv_cube')) { + stop("Parameter 'data_cor' must be of 's2dv_cube' class.") + } + } + std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, accum = accum, + ref_period = ref_period, param_error = param_error, + handle_infinity = handle_infinity, method = method, + distribution = distribution, + na.rm = na.rm, ncores = ncores) + if (is.null(data_cor)) { + data$data <- std + data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') + return(data) + } else { + data_cor$data <- std + data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') + data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) + data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) + return(data_cor) + } +} -PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim = 'syear', +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'@param data A multidimensional array containing the data to be standardized. +#'@param data_cor A multidimensional array containing the data in which the +#' standardization should be applied using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param accum An integer value indicating the number of months for the +#' accumulation for each variable. When it is greater than 1, the result will +#' be filled with NA until the accum time_dim dimension number due to the +#' accumulation to previous months. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param param_error A numeric value with the error accepted. +#'@param handle_infinity A logical value wether to return Infinite values (TRUE) +#' or not (FALSE). +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' (if standardization is TRUE) all values of other dimensions except time_dim +#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return A multidimensional array containing the standardized data. +#'If 'data_cor' is provided the standardizaton is applied to it using 'data' +#'to adjust it. +#' +#'@examples +#'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +#'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +#'data <- array(rnorm(600, -194.5, 64.8), dim = dims) +#'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +#' +#'SPEI <- PeriodStandardization(data = data, accum = 2) +#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) +#'@import multiApply +#'@import ClimProjDiags +#'@import TLMoments +#'@import lmomco +#'@import lmom +#'@export +PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - ref_period = NULL, cross_validation = FALSE, - handle_infinity = FALSE, param_error = -9999, - method = 'parametric', distribution = 'log-Logistic', + accum = NULL, ref_period = NULL, param_error = -9999, + handle_infinity = FALSE, method = 'parametric', + distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { - # Initial checks + # Check inputs + ## data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + ## data_cor + if (!is.null(data_cor)) { + if (!is.array(data_cor)) { + stop("Parameter 'data_cor' must be a numeric array.") + } + if (is.null(names(dim(data_cor)))) { + stop("Parameter 'data_cor' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!time_dim %in% names(dim(data_cor))) { + stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") + } + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!leadtime_dim %in% names(dim(data))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!leadtime_dim %in% names(dim(data_cor))) { + stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!memb_dim %in% names(dim(data_cor))) { + stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") + } + } + ## accum + if (accum > dim(data)[leadtime_dim]) { + stop(paste0("Cannot compute accumulation of ", accum, " months because ", + "loaded data has only ", dim(data)[leadtime_dim], " months.")) + } + ## ref_period + if (!is.null(ref_period)) { + if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% year(dates_exp))) { + warning("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, + indices = 1)) + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } + ## method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") + } + ## distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") + } + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + target_dims <- c(leadtime_dim, time_dim, memb_dim) if (is.null(ref_period)) { - ref.start <- NULL - ref.end <- NULL + ref_start <- NULL + ref_end <- NULL } else { - ref.start <- ref_period[[1]] - ref.end <- ref_period[[2]] + ref_start <- ref_period[[1]] + ref_end <- ref_period[[2]] } - # Standardization if (is.null(data_cor)) { spei <- Apply(data = list(data), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, - time_dim = time_dim, memb_dim = memb_dim, ref_period = ref_period, handle_infinity = handle_infinity, - cross_validation = cross_validation, param_error = param_error, + param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } else { spei <- Apply(data = list(data, data_cor), target_dims = target_dims, fun = .standardization, leadtime_dim = leadtime_dim, - time_dim = time_dim, memb_dim = memb_dim, ref_period = ref_period, handle_infinity = handle_infinity, - cross_validation = cross_validation, param_error = param_error, + param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } @@ -44,11 +301,11 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim # add NA if (!is.null(accum)) { spei <- Apply(data = list(spei), target_dims = leadtime_dim, - output_dims = leadtime_dim, - fun = function(x, accum, leadtime_dim) { - res <- c(rep(NA, accum-1), x) - return(res) - }, accum = accum, leadtime_dim = leadtime_dim)$output1 + output_dims = leadtime_dim, + fun = function(x, accum, leadtime_dim) { + res <- c(rep(NA, accum-1), x) + return(res) + }, accum = accum, leadtime_dim = leadtime_dim)$output1 } if (is.null(data_cor)) { pos <- match(names(dim(data)), names(dim(spei))) @@ -61,18 +318,11 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim } .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', - time_dim = 'syear', memb_dim = 'ensemble', - ref_period = NULL, handle_infinity = FALSE, - cross_validation = FALSE, param_error = -9999, + ref_period = NULL, handle_infinity = FALSE, param_error = -9999, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { - print(summary(data)) # data: [leadtime_dim, time_dim, memb_dim] - - # maximum number of parameters needed - nleadtime <- as.numeric(dim(data)[leadtime_dim]) - ntime <- as.numeric(dim(data)[time_dim]) - nmemb <- as.numeric(dim(data)[memb_dim]) + dims <- dim(data)[-1] fit = 'ub-pwm' coef = switch(distribution, @@ -83,62 +333,48 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim if (is.null(data_cor)) { # cross_val = TRUE spei_mod <- data*NA - params_result <- array(data = NA, dim = c(ntime, nleadtime, length(coef))) for (ff in 1:dim(data)[leadtime_dim]) { data2 <- data[ff, , ] - dim(data2) <- c(ntime, nmemb) - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data2, ref.start, ref.end) + dim(data2) <- dims + if (method == 'non-parametric') { + bp <- matrix(0, length(data2), 1) + for (i in 1:length(data2)) { + bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) + dim(std_index) <- dims + spei_mod[ff, , ] <- std_index } else { - data.fit <- data2 - } - for (nsd in 1:dim(data)[time_dim]) { - acu <- as.vector(data.fit[-nsd, ]) - acu.sorted <- sort.default(acu, method = "quick") - if (na.rm) { - acu.sorted <- acu.sorted[!is.na(acu.sorted)] + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 } - if (!any(is.na(acu.sorted))) { - if (length(acu.sorted) != 0) { - acu_sd <- sd(acu.sorted) + for (nsd in 1:dim(data)[time_dim]) { + acu <- as.vector(data_fit[-nsd, ]) + acu_sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu_sorted <- acu_sorted[!is.na(acu_sorted)] + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { - pze <- sum(acu == 0) / length(acu) - acu.sorted <- acu.sorted[acu.sorted > 0] + acu_sorted <- acu_sorted[acu_sorted > 0] } - if (length(acu.sorted) >= 4) { - pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), - pwm.ub(acu.sorted, nmom = 3) - # TLMoments::PWM(acu.sorted, order = 0:2) - ) - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { - fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - f_params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){parglo(lmom)$para}), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){pargam(lmom)$para}), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){parpe3(lmom)$para})) - if (distribution == 'log-Logistic' && fit == 'max-lik') { - f_params = parglo.maxlik(acu.sorted, f_params)$para - } - params_result[nsd, ff, ] <- f_params - } + if (length(acu_sorted) >= 4) { + f_params <- .std(acu_sorted, fit, distribution) } - if (all(is.na(params_result[nsd, ff, ]))) { + if (all(is.na(f_params))) { cdf_res <- NA } else { - f_params <- params_result[nsd, ff, ] f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, "log-Logistic" = lmom::cdfglo(data2, f_params), "Gamma" = lmom::cdfgam(data2, f_params), "PearsonIII" = lmom::cdfpe3(data2, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = c(ntime, nmemb)) + std_index_cv <- array(qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] } } @@ -148,70 +384,86 @@ PeriodStandardization <- function(data, data_cor = NULL, accum = NULL, time_dim } else { # cross_val = FALSE spei_mod <- data_cor*NA - params_result <- array(data = NA, dim = c(1, length(coef))) + dimscor <- dim(data_cor)[-1] for (ff in 1:dim(data)[leadtime_dim]) { data_cor2 <- data_cor[ff, , ] - dim(data_cor2) <- c(1, nmemb) - data2 <- data[ff, , ] - dim(data2) <- c(ntime, nmemb) - if (!is.null(ref.start) && !is.null(ref.end)) { - data.fit <- window(data2, ref.start, ref.end) + dim(data_cor2) <- dimscor + if (method == 'non-parametric') { + bp <- matrix(0, length(data_cor2), 1) + for (i in 1:length(data_cor2)) { + bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + dim(std_index) <- dimscor + spei_mod[ff, , ] <- std_index } else { - data.fit <- data2 - } - acu <- as.vector(data.fit) - acu.sorted <- sort.default(acu, method = "quick") - if (na.rm) { - acu.sorted <- acu.sorted[!is.na(acu.sorted)] - } - if (!any(is.na(acu.sorted))) { - if (length(acu.sorted) != 0) { - acu_sd <- sd(acu.sorted) + data2 <- data[ff, , ] + dim(data2) <- dims + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + acu <- as.vector(data_fit) + acu_sorted <- sort.default(acu, method = "quick") + if (na.rm) { + acu_sorted <- acu_sorted[!is.na(acu_sorted)] + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) if (!is.na(acu_sd) & acu_sd != 0) { if (distribution != "log-Logistic") { - pze <- sum(acu == 0) / length(acu) - acu.sorted <- acu.sorted[acu.sorted > 0] + acu_sorted <- acu_sorted[acu_sorted > 0] } - if (length(acu.sorted) >= 4) { - pwm = switch(fit, - "pp-pwm" = pwm.pp(acu.sorted, -0.35, 0, nmom = 3), - pwm.ub(acu.sorted, nmom = 3) - # TLMoments::PWM(acu.sorted, order = 0:2) - ) - lmom <- pwm2lmom(pwm) - if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { - fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) - f_params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), - error = function(e){parglo(lmom)$para}), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), - error = function(e){pargam(lmom)$para}), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), - error = function(e){parpe3(lmom)$para})) - if (distribution == 'log-Logistic' && fit == 'max-lik') { - f_params = parglo.maxlik(acu.sorted, f_params)$para - } - params_result[1, ] <- f_params - - } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) } - if (all(is.na(params_result[1, ]))) { + if (all(is.na(f_params))) { cdf_res <- NA } else { - f_params <- params_result[1, ] - f_params <- f_params[which(!is.na(f_params))] - cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data_cor2, f_params), - "Gamma" = lmom::cdfgam(data_cor2, f_params), - "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) } - std_index_cv <- array(qnorm(cdf_res), dim = c(1, nmemb)) + std_index_cv <- array(qnorm(cdf_res), dim = dimscor) spei_mod[ff, , ] <- std_index_cv } } } } } + if (handle_infinity) { + # could also use "param_error" ?; we are giving it the min/max value of the grid point + spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) + spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) + } return(spei_mod) } +.std <- function(data, fit, distribution) { + pwm = switch(fit, + "pp-pwm" = pwm.pp(data, -0.35, 0, nmom = 3), + pwm.ub(data, nmom = 3) + # TLMoments::PWM(data, order = 0:2) + ) + lmom <- pwm2lmom(pwm) + if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + params = switch(distribution, + "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){parglo(lmom)$para}), + "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){pargam(lmom)$para}), + "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + params = parglo.maxlik(data, params)$para + } + return(params) + } else { + return(NA) + } +} \ No newline at end of file diff --git a/man/CST_PeriodPET.Rd b/man/CST_PeriodPET.Rd new file mode 100644 index 0000000..ecb225e --- /dev/null +++ b/man/CST_PeriodPET.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodPET.R +\name{CST_PeriodPET} +\alias{CST_PeriodPET} +\title{Compute the Potential Evapotranspiration} +\usage{ +CST_PeriodPET( + data, + pet_method = "hargreaves", + time_dim = "syear", + leadtime_dim = "time", + lat_dim = "latitude", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the data element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration. See parameter 'pet_method'. The accepted +variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +'tmean' is required. The units for temperature variables +('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for +precipitation ('pr') need to be in mm/month.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} + +\item{dates}{An array of temporal dimensions containing the Dates of +'data'. It must be of class 'Date' or 'POSIXct'.} + +\item{lat}{A numeric vector containing the latitude values of 'data'.} +} +\description{ +Compute the Potential evapotranspiration (PET) that is the amount of +evaporation and transpiration that would occur if a sufficient water source +were available. Reference evapotranspiration (ETo) is the amount of +evaporation and transpiration from a reference vegetation of grass. They are +usually considered equivalent. This set of functions calculate PET or ETo +according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +} +\examples{ +dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) + +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +end_year <- 2012 +dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), + paste0(2010:end_year, "-09-15"), + paste0(2010:end_year, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 3, time = 3) + +lat <- c(40) + +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) + +res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + +} diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd new file mode 100644 index 0000000..6d10acc --- /dev/null +++ b/man/CST_PeriodStandardization.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodStandardization.R +\name{CST_PeriodStandardization} +\alias{CST_PeriodStandardization} +\title{Compute the Standardization of Precipitation-Evapotranspiration Index} +\usage{ +CST_PeriodStandardization( + data, + data_cor = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + accum = NULL, + ref_period = NULL, + param_error = -9999, + handle_infinity = FALSE, + method = "parametric", + distribution = "log-Logistic", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{An 's2dv_cube' that element 'data' stores a multidimensional +array containing the data to be standardized.} + +\item{data_cor}{An 's2dv_cube' that element 'data' stores a multidimensional +array containing the data in which the standardization should be applied +using the fitting parameters from 'data'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\value{ +An object of class \code{s2dv_cube} containing the standardized data. +If 'data_cor' is provided the standardizaton is applied to it using 'data' +to adjust it. +} +\description{ +Compute the Standardization of Precipitation-Evapotranspiration Index +} +\examples{ +dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +data <- NULL +data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +class(data) <- 's2dv_cube' +SPEI <- CST_PeriodStandardization(data = data, accum = 2) +} diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd index 69aad45..0e7aa85 100644 --- a/man/PeriodPET.Rd +++ b/man/PeriodPET.Rd @@ -21,16 +21,16 @@ PeriodPET( the seasonal forecast experiment in the data element for each variable. Specific variables are needed for each method used in computing the Potential Evapotranspiration. See parameter 'pet_method'. The accepted -variables are: 'tmin' and 'tmax', required for methods 'hargreaves' and -'hargreaves_modified' and 'tmean', required for method 'thornthwaite'. -Variable 'pr' is always needed. The units for temperature variables +variables for each method are: for 'hargreaves', 'tmin' and 'tmax'; for +'hargreaves_modified' are 'tmin', 'tmax' and 'pr'; for method 'thornthwaite' +'tmean' is required. The units for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius degrees; the units for precipitation ('pr') need to be in mm/month.} \item{dates}{An array of temporal dimensions containing the Dates of -'exp'. It must be of class 'Date' or 'POSIXct'.} +'data'. It must be of class 'Date' or 'POSIXct'.} -\item{lat}{A numeric vector containing the latitude values of 'exp'.} +\item{lat}{A numeric vector containing the latitude values of 'data'.} \item{pet_method}{A character string indicating the method used to compute the potential evapotranspiration. The accepted methods are: diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd new file mode 100644 index 0000000..663045e --- /dev/null +++ b/man/PeriodStandardization.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodStandardization.R +\name{PeriodStandardization} +\alias{PeriodStandardization} +\title{Compute the Standardization of Precipitation-Evapotranspiration Index} +\usage{ +PeriodStandardization( + data, + data_cor = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + accum = NULL, + ref_period = NULL, + param_error = -9999, + handle_infinity = FALSE, + method = "parametric", + distribution = "log-Logistic", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A multidimensional array containing the data to be standardized.} + +\item{data_cor}{A multidimensional array containing the data in which the +standardization should be applied using the fitting parameters from 'data'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{accum}{An integer value indicating the number of months for the +accumulation for each variable. When it is greater than 1, the result will +be filled with NA until the accum time_dim dimension number due to the +accumulation to previous months.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{param_error}{A numeric value with the error accepted.} + +\item{handle_infinity}{A logical value wether to return Infinite values (TRUE) +or not (FALSE).} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other +variables are 0 because it is positive defined (SPI indicator).} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default. If it is FALSE and there are NA values, +(if standardization is TRUE) all values of other dimensions except time_dim +and leadtime_dim will be set to NA directly. On the other hand, if it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\value{ +A multidimensional array containing the standardized data. +If 'data_cor' is provided the standardizaton is applied to it using 'data' +to adjust it. +} +\description{ +Compute the Standardization of Precipitation-Evapotranspiration Index +} +\examples{ +dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +data <- array(rnorm(600, -194.5, 64.8), dim = dims) +datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) + +SPEI <- PeriodStandardization(data = data, accum = 2) +SPEIcor <- PeriodStandardization(data = data, data_cor = datacor, accum = 2) +} -- GitLab From a1a0e733a3ea8a8a4bace93ccc73d7d990c2a8fa Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 6 Sep 2023 17:08:06 +0200 Subject: [PATCH 52/87] Correct description of the blending strategy in the documentation --- R/MergeRefToExp.R | 10 ++++++---- man/CST_MergeRefToExp.Rd | 9 +++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index 898f32e..e47c988 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -10,10 +10,12 @@ #'(or other references) could be added at the end of the forecast lead time to #'cover the desired period (e.g.: until the end of summer). #' -#'This function is created to merge observations and forecasts in the -#'bioclimatic indicator structure, known as the ‘blending’ strategy (see -#'references). This key strategy aims to increase users’ confidence in the -#'reformed predictions. +#'This function is created to merge observations and forecasts, known as the +#'‘blending’ strategy (see references). The basis for this strategy is that the +#'predictions are progressively replaced with observational data as soon as they +#'become available (i.e., when entering the indicator definition period). This +#'key strategy aims to increase users’ confidence in the reformed predictions. + #' #'@param data1 An 's2dv_cube' object with the element 'data' being a #' multidimensional array with named dimensions. All dimensions must be diff --git a/man/CST_MergeRefToExp.Rd b/man/CST_MergeRefToExp.Rd index 5ab328b..a49c9dc 100644 --- a/man/CST_MergeRefToExp.Rd +++ b/man/CST_MergeRefToExp.Rd @@ -87,10 +87,11 @@ period because it is initialized too early (e.g.: Initialization on November cover the desired period (e.g.: until the end of summer). } \details{ -This function is created to merge observations and forecasts in the -bioclimatic indicator structure, known as the ‘blending’ strategy (see -references). This key strategy aims to increase users’ confidence in the -reformed predictions. +This function is created to merge observations and forecasts, known as the +‘blending’ strategy (see references). The basis for this strategy is that the +predictions are progressively replaced with observational data as soon as they +become available (i.e., when entering the indicator definition period). This +key strategy aims to increase users’ confidence in the reformed predictions. } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), -- GitLab From ad62916c481398adba43e32a9c8977cf42deb725 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 6 Sep 2023 17:09:25 +0200 Subject: [PATCH 53/87] Correct changes in documentation --- R/MergeRefToExp.R | 10 +++++----- man/MergeRefToExp.Rd | 9 +++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/R/MergeRefToExp.R b/R/MergeRefToExp.R index e47c988..f5d9069 100644 --- a/R/MergeRefToExp.R +++ b/R/MergeRefToExp.R @@ -15,7 +15,6 @@ #'predictions are progressively replaced with observational data as soon as they #'become available (i.e., when entering the indicator definition period). This #'key strategy aims to increase users’ confidence in the reformed predictions. - #' #'@param data1 An 's2dv_cube' object with the element 'data' being a #' multidimensional array with named dimensions. All dimensions must be @@ -236,10 +235,11 @@ CST_MergeRefToExp <- function(data1, data2, start1 = NULL, end1 = NULL, #'(or other references) could be added at the end of the forecast lead time to #'cover the desired period (e.g.: until the end of summer). #' -#'This function is created to merge observations and forecasts in the -#'bioclimatic indicator structure, known as the ‘blending’ strategy (see -#'references). This key strategy aims to increase users’ confidence in the -#'reformed predictions. +#'This function is created to merge observations and forecasts, known as the +#'‘blending’ strategy (see references). The basis for this strategy is that the +#'predictions are progressively replaced with observational data as soon as they +#'become available (i.e., when entering the indicator definition period). This +#'key strategy aims to increase users’ confidence in the reformed predictions. #' #'@param data1 A multidimensional array with named dimensions. All dimensions #' must be equal to 'data2' dimensions except for the ones specified with diff --git a/man/MergeRefToExp.Rd b/man/MergeRefToExp.Rd index 047ad37..33b5d42 100644 --- a/man/MergeRefToExp.Rd +++ b/man/MergeRefToExp.Rd @@ -92,10 +92,11 @@ period because it is initialized too early (e.g.: Initialization on November cover the desired period (e.g.: until the end of summer). } \details{ -This function is created to merge observations and forecasts in the -bioclimatic indicator structure, known as the ‘blending’ strategy (see -references). This key strategy aims to increase users’ confidence in the -reformed predictions. +This function is created to merge observations and forecasts, known as the +‘blending’ strategy (see references). The basis for this strategy is that the +predictions are progressively replaced with observational data as soon as they +become available (i.e., when entering the indicator definition period). This +key strategy aims to increase users’ confidence in the reformed predictions. } \examples{ data_dates <- c(seq(as.Date("01-07-1993", "\%d-\%m-\%Y", tz = 'UTC'), -- GitLab From 5c8f2d6729dcc286b65a877533c77d41234cef79 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 7 Sep 2023 16:40:34 +0200 Subject: [PATCH 54/87] Improve PeriodStandardization; write documentation of PeriodPET and PeriodStandardization --- R/PeriodPET.R | 35 +-- R/PeriodSPEI.R | 12 +- R/PeriodStandardization.R | 203 ++++++++++++----- man/CST_PeriodPET.Rd | 13 +- man/CST_PeriodStandardization.Rd | 63 +++-- man/PeriodPET.Rd | 10 +- man/PeriodStandardization.Rd | 63 +++-- tests/testthat/test-PeriodPET.R | 87 ++++--- tests/testthat/test-PeriodStandardization.R | 241 ++++++++++++++++++++ 9 files changed, 579 insertions(+), 148 deletions(-) create mode 100644 tests/testthat/test-PeriodStandardization.R diff --git a/R/PeriodPET.R b/R/PeriodPET.R index 8ab1a64..f365d39 100644 --- a/R/PeriodPET.R +++ b/R/PeriodPET.R @@ -2,10 +2,14 @@ #' #'Compute the Potential evapotranspiration (PET) that is the amount of #'evaporation and transpiration that would occur if a sufficient water source -#'were available. Reference evapotranspiration (ETo) is the amount of -#'evaporation and transpiration from a reference vegetation of grass. They are -#'usually considered equivalent. This set of functions calculate PET or ETo -#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'This function is build to work be compatible with other tools in +#'that work with 's2dv_cube' object class. The input data must be this object +#'class. If you don't work with 's2dv_cube', see PeriodPET. For more information +#'on the SPEI calculation, see functions CST_PeriodStandardization and +#'CST_PeriodAccumulation. #' #'@param data A named list with the needed \code{s2dv_cube} objects containing #' the seasonal forecast experiment in the data element for each variable. @@ -94,12 +98,14 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', Dates <- data[[1]]$attrs$Dates metadata <- data[[1]]$attrs$Variable$metadata metadata_names <- intersect(names(dim(res)), names(metadata)) - res <- CSTools::s2dv_cube(data = res, coords = coords, - varName = paste0('PET'), - metadata = metadata[metadata_names], - Dates = Dates, - source_files = source_files, - when = Sys.time()) + suppressWarnings( + res <- CSTools::s2dv_cube(data = res, coords = coords, + varName = paste0('PET'), + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + ) return(res) } @@ -107,10 +113,11 @@ CST_PeriodPET <- function(data, pet_method = 'hargreaves', #' #'Compute the Potential evapotranspiration (PET) that is the amount of #'evaporation and transpiration that would occur if a sufficient water source -#'were available. Reference evapotranspiration (ETo) is the amount of -#'evaporation and transpiration from a reference vegetation of grass. They are -#'usually considered equivalent. This set of functions calculate PET or ETo -#'according to the Thornthwaite, Hargreaves or Penman-Monteith equations. +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'For more information on the SPEI calculation, see functions +#'PeriodStandardization and PeriodAccumulation. #' #'@param data A named list with the needed \code{s2dv_cube} objects containing #' the seasonal forecast experiment in the data element for each variable. diff --git a/R/PeriodSPEI.R b/R/PeriodSPEI.R index 979e79d..c0593b1 100644 --- a/R/PeriodSPEI.R +++ b/R/PeriodSPEI.R @@ -767,8 +767,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, lat = lat, pet_method = pet_method[k], time_dim = time_dim, leadtime_dim = leadtime_dim, lat_dim = lat_dim, na.rm = na.rm, ncores = ncores) - print("Step (1): PET") - print(summary(pet[[k]])) computed_pet <- TRUE } @@ -781,8 +779,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, fun = function(data, accum) { return(rollapply(data = data, width = accum, FUN = sum)) }, accum = accum, ncores = ncores)$output1 - print("Step (2): Accumulation") - print(summary(accumulated)) # Standardization if (standardization) { @@ -798,6 +794,8 @@ PeriodSPEI <- function(exp, dates_exp, lat, ref_period <- NULL params <- spei$params + print('Params dim:') + print(dim(params)) spei_res <- spei[[1]] } else { spei_res <- accumulated @@ -811,8 +809,6 @@ PeriodSPEI <- function(exp, dates_exp, lat, }, accum = accum, leadtime_dim = leadtime_dim)$output1 pos <- match(names(dim(data[[1]])), names(dim(spei_res))) spei_res <- aperm(spei_res, pos) - print("Step (3): Standardization") - print(summary(spei_res)) } if (standardization) { @@ -919,8 +915,10 @@ PeriodSPEI <- function(exp, dates_exp, lat, for (ff in 1:nleadtime) { data_subset <- ClimProjDiags::Subset(data, along = leadtime_dim, indices = ff, drop = 'selected') - + print('Params subset') + print(dim(params)) params_tmp <- if (all(is.na(params))) {NULL} else {params[, ff, ]} + print(params_tmp) spei_data <- .std(data = data_subset, coef = coef, ntime = ntime, nmemb = nmemb, method = method, diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R index 73792ef..9238147 100644 --- a/R/PeriodStandardization.R +++ b/R/PeriodStandardization.R @@ -1,5 +1,34 @@ - #'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'(Standarized Precipitation-Evapotranspiration Index). With this function the +#'data is fit to a probability distribution to transform the original values to +#'standardized units that are comparable in space and time and at different SPEI +#'time scales. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. To choose the time scale in which you want to accumulate the SPEI +#'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to +#'be performed in the previous step. However, since the accumulation is done for +#'the elapsed time steps, there will be no complete accumulations until reaching +#'the time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). For more information about SPEI, see functions +#'PeriodPET and PeriodAccumulation. This function is build to work be compatible +#'with other tools in that work with 's2dv_cube' object class. The input data +#'must be this object class. If you don't work with 's2dv_cube', see +#'PeriodStandardization. For more information on the SPEI indicator calculation, +#'see CST_PeriodPET and CST_PeriodAccumulation. #' #'@param data An 's2dv_cube' that element 'data' stores a multidimensional #' array containing the data to be standardized. @@ -13,17 +42,20 @@ #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. -#'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. When it is greater than 1, the result will -#' be filled with NA until the accum time_dim dimension number due to the -#' accumulation to previous months. +#'@param accum An integer value indicating the number of +#' time steps (leadtime_dim dimension) that have been accumulated in the +#' previous step. When it is greater than 1, the result will be filled with +#' NA until the accum leadtime_dim dimension number due to the +#' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. -#'@param param_error A numeric value with the error accepted. -#'@param handle_infinity A logical value wether to return Infinite values (TRUE) -#' or not (FALSE). +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. @@ -31,11 +63,11 @@ #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other -#' variables are 0 because it is positive defined (SPI indicator). +#' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, -#' (if standardization is TRUE) all values of other dimensions except time_dim -#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. @@ -43,8 +75,9 @@ #' parallel computation. #' #'@return An object of class \code{s2dv_cube} containing the standardized data. -#'If 'data_cor' is provided the standardizaton is applied to it using 'data' -#'to adjust it. +#'If 'data_cor' is provided the array stored in element data will be of the same +#'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +#'element data will be of the same dimensions as 'data'. #' #'@examples #'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) @@ -55,13 +88,14 @@ #'@export CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', leadtime_dim = 'time', memb_dim = 'ensemble', - accum = NULL, ref_period = NULL, param_error = -9999, - handle_infinity = FALSE, method = 'parametric', + accum = 1, ref_period = NULL, + handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (is.null(data)) { - stop("Parameter 'exp' cannot be NULL.") + stop("Parameter 'data' cannot be NULL.") } if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of 's2dv_cube' class.") @@ -74,7 +108,7 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', std <- PeriodStandardization(data = data$data, data_cor = data_cor$data, time_dim = time_dim, leadtime_dim = leadtime_dim, memb_dim = memb_dim, accum = accum, - ref_period = ref_period, param_error = param_error, + ref_period = ref_period, handle_infinity = handle_infinity, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores) @@ -92,10 +126,38 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } #'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'indicator. With this function the data is fit to a probability distribution to +#'transform the original values to standardized units that are comparable in +#'space and time and at different SPEI time scales. +#' +#'Next, some specifications for the calculation of this indicator will be +#'discussed. To choose the time scale in which you want to accumulate the SPEI +#'(SPEI3, SPEI6...) is done using the accum parameter. The accumulation needs to +#'be performed in the previous step. However, since the accumulation is done for +#'the elapsed time steps, there will be no complete accumulations until reaching +#'the time instant equal to the value of the parameter. For this reason, in the +#'result, we will find that for the dimension where the accumulation has been +#'carried out, the values of the array will be NA since they do not include +#'complete accumulations. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). For more information about SPEI, see functions +#'PeriodPET and PeriodAccumulation. #' #'@param data A multidimensional array containing the data to be standardized. #'@param data_cor A multidimensional array containing the data in which the #' standardization should be applied using the fitting parameters from 'data'. +#'@param dates An array containing the dates of the data with the same time +#' dimensions as the data. It is optional and only necessary for using the +#' parameter 'ref_period' to select a reference period directly from dates. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'syear'. #'@param leadtime_dim A character string indicating the name of the temporal @@ -103,17 +165,20 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@param memb_dim A character string indicating the name of the dimension in #' which the ensemble members are stored. When set it to NULL, threshold is #' computed for individual members. -#'@param accum An integer value indicating the number of months for the -#' accumulation for each variable. When it is greater than 1, the result will -#' be filled with NA until the accum time_dim dimension number due to the -#' accumulation to previous months. +#'@param accum An integer value indicating the number of +#' time steps (leadtime_dim dimension) that have been accumulated in the +#' previous step. When it is greater than 1, the result will be filled with +#' NA until the accum leadtime_dim dimension number due to the +#' accumulation to previous months. If it is 1, no accumulation is done. #'@param ref_period A list with two numeric values with the starting and end #' points of the reference period used for computing the index. The default #' value is NULL indicating that the first and end values in data will be #' used as starting and end points. -#'@param param_error A numeric value with the error accepted. -#'@param handle_infinity A logical value wether to return Infinite values (TRUE) -#' or not (FALSE). +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. #'@param method A character string indicating the standardization method used. #' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by #' default. @@ -121,11 +186,11 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #' function to be used for computing the SPEI. The accepted names are: #' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The #' 'Gamma' method only works when only precipitation is provided and other -#' variables are 0 because it is positive defined (SPI indicator). +#' variables are 0 because it is positive defined (SPI indicator). #'@param na.rm A logical value indicating whether NA values should be removed #' from data. It is FALSE by default. If it is FALSE and there are NA values, -#' (if standardization is TRUE) all values of other dimensions except time_dim -#' and leadtime_dim will be set to NA directly. On the other hand, if it is +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is #' TRUE, if the data from other dimensions except time_dim and leadtime_dim is #' not reaching 4 values, it is not enough values to estimate the parameters #' and the result will include NA. @@ -133,8 +198,9 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #' parallel computation. #' #'@return A multidimensional array containing the standardized data. -#'If 'data_cor' is provided the standardizaton is applied to it using 'data' -#'to adjust it. +#'If 'data_cor' is provided the array will be of the same dimensions as +#''data_cor'. If 'data_cor' is not provided, the array will be of the same +#'dimensions as 'data'. #' #'@examples #'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) @@ -150,10 +216,11 @@ CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', #'@import lmomco #'@import lmom #'@export -PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', - leadtime_dim = 'time', memb_dim = 'ensemble', - accum = NULL, ref_period = NULL, param_error = -9999, - handle_infinity = FALSE, method = 'parametric', +PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', accum = 1, + ref_period = NULL, handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE, ncores = NULL) { # Check inputs @@ -173,6 +240,21 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', stop("Parameter 'data_cor' must have dimension names.") } } + ## dates + if (!is.null(dates)) { + if (!(is.Date(dates)) & !(is.POSIXct(dates))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (dim(data)[c(time_dim)] != dim(dates)[c(time_dim)]) { + stop("Parameter 'dates' needs to have the same length of 'time_dim' ", + "as 'data'.") + } + } ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") @@ -209,6 +291,13 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") } } + ## data_cor (2) + if (!is.null(data_cor)) { + if (dim(data)[leadtime_dim] != dim(data_cor)[leadtime_dim]) { + stop("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + } + } ## accum if (accum > dim(data)[leadtime_dim]) { stop(paste0("Cannot compute accumulation of ", accum, " months because ", @@ -216,7 +305,11 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } ## ref_period if (!is.null(ref_period)) { - if (length(ref_period) != 2) { + if (is.null(dates)) { + warning("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ref_period <- NULL + } else if (length(ref_period) != 2) { warning("Parameter 'ref_period' must be of length two indicating the ", "first and end years of the reference period. It will not ", "be used.") @@ -230,12 +323,12 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", "will not be used.") ref_period <- NULL - } else if (!all(unlist(ref_period) %in% year(dates_exp))) { + } else if (!all(unlist(ref_period) %in% year(dates))) { warning("Parameter 'ref_period' contain years outside the dates. ", "It will not be used.") ref_period <- NULL } else { - years <- year(ClimProjDiags::Subset(dates_exp, along = leadtime_dim, + years <- year(ClimProjDiags::Subset(dates, along = leadtime_dim, indices = 1)) ref_period[[1]] <- which(ref_period[[1]] == years) ref_period[[2]] <- which(ref_period[[2]] == years) @@ -285,7 +378,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, - param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } else { @@ -293,7 +385,6 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', fun = .standardization, leadtime_dim = leadtime_dim, ref_period = ref_period, handle_infinity = handle_infinity, - param_error = param_error, method = method, distribution = distribution, na.rm = na.rm, ncores = ncores)$output1 } @@ -305,7 +396,8 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', fun = function(x, accum, leadtime_dim) { res <- c(rep(NA, accum-1), x) return(res) - }, accum = accum, leadtime_dim = leadtime_dim)$output1 + }, accum = accum, leadtime_dim = leadtime_dim, + ncores = ncores)$output1 } if (is.null(data_cor)) { pos <- match(names(dim(data)), names(dim(spei))) @@ -318,7 +410,7 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } .standardization <- function(data, data_cor = NULL, leadtime_dim = 'time', - ref_period = NULL, handle_infinity = FALSE, param_error = -9999, + ref_period = NULL, handle_infinity = FALSE, method = 'parametric', distribution = 'log-Logistic', na.rm = FALSE) { # data: [leadtime_dim, time_dim, memb_dim] @@ -352,9 +444,10 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', } for (nsd in 1:dim(data)[time_dim]) { acu <- as.vector(data_fit[-nsd, ]) - acu_sorted <- sort.default(acu, method = "quick") if (na.rm) { - acu_sorted <- acu_sorted[!is.na(acu_sorted)] + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) @@ -363,16 +456,19 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', acu_sorted <- acu_sorted[acu_sorted > 0] } if (length(acu_sorted) >= 4) { - f_params <- .std(acu_sorted, fit, distribution) + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } else { + f_params <- NA } if (all(is.na(f_params))) { cdf_res <- NA } else { f_params <- f_params[which(!is.na(f_params))] cdf_res = switch(distribution, - "log-Logistic" = lmom::cdfglo(data2, f_params), - "Gamma" = lmom::cdfgam(data2, f_params), - "PearsonIII" = lmom::cdfpe3(data2, f_params)) + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) } std_index_cv <- array(qnorm(cdf_res), dim = dims) spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] @@ -405,9 +501,10 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', data_fit <- data2 } acu <- as.vector(data_fit) - acu_sorted <- sort.default(acu, method = "quick") if (na.rm) { - acu_sorted <- acu_sorted[!is.na(acu_sorted)] + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) } if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { acu_sd <- sd(acu_sorted) @@ -443,9 +540,9 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', return(spei_mod) } -.std <- function(data, fit, distribution) { +.std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { pwm = switch(fit, - "pp-pwm" = pwm.pp(data, -0.35, 0, nmom = 3), + 'pp-pwm' = pwm.pp(data, -0.35, 0, nmom = 3), pwm.ub(data, nmom = 3) # TLMoments::PWM(data, order = 0:2) ) @@ -453,11 +550,11 @@ PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', if (!(!are.lmom.valid(lmom) || anyNA(lmom[[1]]) || any(is.nan(lmom[[1]])))) { fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) params = switch(distribution, - "log-Logistic" = tryCatch(lmom::pelglo(fortran_vec), + 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), error = function(e){parglo(lmom)$para}), - "Gamma" = tryCatch(lmom::pelgam(fortran_vec), + 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), error = function(e){pargam(lmom)$para}), - "PearsonIII" = tryCatch(lmom::pelpe3(fortran_vec), + 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), error = function(e){parpe3(lmom)$para})) if (distribution == 'log-Logistic' && fit == 'max-lik') {