diff --git a/DESCRIPTION b/DESCRIPTION index ea4cb5846ebab26e27cb84b866f339ad5e46a242..b716d5d55a89680136ab85df0e6f7473f8df8cb5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,10 +31,14 @@ Depends: Imports: multiApply (>= 2.1.1), stats, - ClimProjDiags + ClimProjDiags, + CSTools, + SPEI, + lmom, + lmomco, + zoo Suggests: testthat, - CSTools, knitr, markdown, rmarkdown diff --git a/NAMESPACE b/NAMESPACE index d92da379f48f9ca97e2c65c7d476b85c73a73a60..bab43edcccecb70f92ef63df66c81e870745ec26 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,8 @@ export(CST_PeriodAccumulation) export(CST_PeriodMax) export(CST_PeriodMean) export(CST_PeriodMin) +export(CST_PeriodPET) +export(CST_PeriodStandardization) export(CST_PeriodVariance) export(CST_QThreshold) export(CST_SelectPeriodOnData) @@ -22,6 +24,8 @@ export(PeriodAccumulation) export(PeriodMax) export(PeriodMean) export(PeriodMin) +export(PeriodPET) +export(PeriodStandardization) export(PeriodVariance) export(QThreshold) export(SelectPeriodOnData) @@ -32,8 +36,29 @@ export(TotalTimeExceedingThreshold) export(WindCapacityFactor) export(WindPowerDensity) import(multiApply) +importFrom(CSTools,s2dv_cube) importFrom(ClimProjDiags,Subset) +importFrom(SPEI,hargreaves) +importFrom(SPEI,parglo.maxlik) +importFrom(SPEI,thornthwaite) +importFrom(lmom,cdfgam) +importFrom(lmom,cdfglo) +importFrom(lmom,cdfpe3) +importFrom(lmom,pelgam) +importFrom(lmom,pelglo) +importFrom(lmom,pelpe3) +importFrom(lmomco,are.lmom.valid) +importFrom(lmomco,pargam) +importFrom(lmomco,parglo) +importFrom(lmomco,parpe3) +importFrom(lmomco,pwm.pp) +importFrom(lmomco,pwm.ub) +importFrom(lmomco,pwm2lmom) importFrom(stats,approxfun) importFrom(stats,ecdf) +importFrom(stats,qnorm) importFrom(stats,quantile) +importFrom(stats,sd) +importFrom(stats,window) importFrom(utils,read.delim) +importFrom(zoo,rollapply) diff --git a/R/PeriodAccumulation.R b/R/PeriodAccumulation.R index 89a4d37f5162e57a23bcb699457651eaa337ac04..60245b6bdc06fe8192b2084ad8cf35de7e1389eb 100644 --- a/R/PeriodAccumulation.R +++ b/R/PeriodAccumulation.R @@ -10,12 +10,22 @@ #' August 21st to October 21st} #'} #' +#'There are two possible ways of performing the accumulation. The default one +#'is by accumulating a variable over a dimension specified with 'time_dim'. To +#'chose a specific time period, 'start' and 'end' must be used. The other method +#'is by using 'rollwidth' parameter. When this parameter is a positive integer, +#'the cumulative backward sum is applied to the time dimension. If it is +#'negative, the rolling sum is applied backwards. This function is build to +#'be compatible with other tools in that work with 's2dv_cube' object class. The +#'input data must be this object class. If you don't work with 's2dv_cube', see +#'PeriodAccumulation. +#' #'@param data An 's2dv_cube' object as provided function \code{CST_Start} or #' \code{CST_Load} in package CSTools. #'@param start An optional parameter to defined the initial date of the period #' to select from the data by providing a list of two elements: the initial -#' date of the period and the initial month of the period. By default it is set -#' to NULL and the indicator is computed using all the data provided in +#' date of the period and the initial m onth of the period. By default it is +#' set to NULL and the indicator is computed using all the data provided in #' \code{data}. #'@param end An optional parameter to defined the final date of the period to #' select from the data by providing a list of two elements: the final day of @@ -25,40 +35,59 @@ #' compute the indicator. By default, it is set to 'time'. More than one #' dimension name matching the dimensions provided in the object #' \code{data$data} can be specified. +#'@param rollwidth An optional parameter to indicate the number of time +#' steps the rolling sum is applied to. If it is positive, the rolling sum is +#' applied backwards 'time_dim', if it is negative, it will be forward it. When +#' this parameter is NULL, the sum is applied over all 'time_dim', in a +#' specified period. It is NULL by default. +#'@param sdate_dim (Only needed when rollwidth is used). A character string +#' indicating the name of the start date dimension to compute the rolling +#' accumulation. By default, it is set to 'sdate'. +#'@param frequency (Only needed when rollwidth is used). A character string +#' indicating the time frequency of the data to apply the rolling accumulation. +#' It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +#' continuous months will be accumulated; if it is 'daliy', values from +#' continuous days will be accumulated. It is set to 'monthly' by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel #' computation. #' -#'@return An 's2dv_cube' object containing the indicator in the element -#'\code{data} with dimensions of the input parameter 'data' except the dimension -#'where the accumulation has been computed (specified with 'time_dim'). The -#''Dates' array is updated to the dates corresponding to the beginning of the -#'aggregated time period. A new element called 'time_bounds' will be added into -#'the 'attrs' element in the 's2dv_cube' object. It consists of a list -#'containing two elements, the start and end dates of the aggregated period with -#'the same dimensions of 'Dates' element. +#'@return An 's2dv_cube' object containing the accumulated data in the element +#'\code{data}. If parameter 'rollwidth' is not used, it will have the dimensions +#'of the input parameter 'data' except the dimension where the accumulation has +#'been computed (specified with 'time_dim'). If 'rollwidth' is used, it will be +#'of same dimensions as input data. The 'Dates' array is updated to the +#'dates corresponding to the beginning of the aggregated time period. A new +#'element called 'time_bounds' will be added into the 'attrs' element in the +#''s2dv_cube' object. It consists of a list containing two elements, the start +#'and end dates of the aggregated period with the same dimensions of 'Dates' +#'element. If 'rollwidth' is used, it will contain the same dimensions of +#'parameter 'data' and the other elements of the 's2dv_cube' will not be +#'modified. #' #'@examples #'exp <- NULL #'exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, -#' time = 9, lat = 2, lon = 2)) +#' ftime = 9, lat = 2, lon = 2)) #'class(exp) <- 's2dv_cube' -#'TP <- CST_PeriodAccumulation(exp) +#'TP <- CST_PeriodAccumulation(exp, time_dim = 'ftime') #'exp$data <- array(rnorm(5 * 3 * 214 * 2), -#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'dim(Dates) <- c(sdate = 3, time = 214) +#'dim(Dates) <- c(sdate = 3, ftime = 214) #'exp$attrs$Dates <- Dates -#'SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6)) +#'SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), +#' time_dim = 'ftime') #'dim(SprR$data) #'head(SprR$attrs$Dates) -#'HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10)) +#'HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10), +#' time_dim = 'ftime') #'dim(HarR$data) #'head(HarR$attrs$Dates) #' @@ -66,12 +95,17 @@ #'@importFrom ClimProjDiags Subset #'@export CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, - time_dim = 'time', na.rm = FALSE, - ncores = NULL) { + time_dim = 'time', rollwidth = NULL, + sdate_dim = 'sdate', frequency = 'monthly', + na.rm = FALSE, ncores = NULL) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") } + if (!all(c('data') %in% names(data))) { + stop("Parameter 'data' doesn't have 's2dv_cube' structure. ", + "Use PeriodAccumulation instead.") + } # Dates subset if (!is.null(start) && !is.null(end)) { if (is.null(dim(data$attrs$Dates))) { @@ -83,32 +117,31 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, } Dates <- data$attrs$Dates - total <- PeriodAccumulation(data$data, dates = Dates, start, end, - time_dim = time_dim, na.rm = na.rm, ncores = ncores) - data$data <- total - data$dims <- dim(total) - data$coords[[time_dim]] <- NULL - - if (!is.null(Dates)) { - if (!is.null(start) && !is.null(end)) { - Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, - time_dim = time_dim, ncores = ncores) - } - if (is.null(dim(Dates))) { - warning("Element 'Dates' has NULL dimensions. They will not be ", - "subset and 'time_bounds' will be missed.") - data$attrs$Dates <- Dates - } else { + data$data <- PeriodAccumulation(data = data$data, dates = Dates, + start = start, end = end, + time_dim = time_dim, rollwidth = rollwidth, + sdate_dim = sdate_dim, frequency = frequency, + na.rm = na.rm, ncores = ncores) + data$dims <- dim(data$data) + if (!is.null(start) & !is.null(end)) { + Dates <- SelectPeriodOnDates(dates = Dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + data$attrs$Dates <- Dates + } + if (is.null(rollwidth)) { + data$coords[[time_dim]] <- NULL + if (!is.null(dim(Dates))) { # Create time_bounds time_bounds <- NULL - time_bounds$start <- ClimProjDiags::Subset(Dates, time_dim, 1, drop = 'selected') - time_bounds$end <- ClimProjDiags::Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') + time_bounds$start <- Subset(Dates, time_dim, 1, drop = 'selected') + time_bounds$end <- Subset(Dates, time_dim, dim(Dates)[time_dim], drop = 'selected') # Add Dates in attrs data$attrs$Dates <- time_bounds$start data$attrs$time_bounds <- time_bounds } } + return(data) } @@ -123,6 +156,13 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' \item\code{HarR}{Harvest Total Precipitation: The total precipitation from #' August 21st to October 21st} #'} +#' +#'There are two possible ways of performing the accumulation. The default one +#'is by accumulating a variable over a dimension specified with 'time_dim'. To +#'chose a specific time period, 'start' and 'end' must be used. The other method +#'is by using 'rollwidth' parameter. When this parameter is a positive integer, +#'the cumulative backward sum is applied to the time dimension. If it is +#'negative, the rolling sum is applied backwards. #' #'@param data A multidimensional array with named dimensions. #'@param dates A multidimensional array of dates with named dimensions matching @@ -138,50 +178,75 @@ CST_PeriodAccumulation <- function(data, start = NULL, end = NULL, #' the period and the final month of the period. By default it is set to NULL #' and the indicator is computed using all the data provided in \code{data}. #'@param time_dim A character string indicating the name of the dimension to -#' compute the indicator. By default, it is set to 'time'. More than one -#' dimension name matching the dimensions provided in the object -#' \code{data$data} can be specified. +#' compute the indicator. By default, it is set to 'time'. +#'@param rollwidth An optional parameter to indicate the number of time +#' steps the rolling sum is applied to. If it is positive, the rolling sum is +#' applied backwards 'time_dim', if it is negative, it will be forward it. When +#' this parameter is NULL, the sum is applied over all 'time_dim', in a +#' specified period. It is NULL by default. +#'@param sdate_dim (Only needed when rollwidth is used). A character string +#' indicating the name of the start date dimension to compute the rolling +#' accumulation. By default, it is set to 'sdate'. +#'@param frequency (Only needed when rollwidth is used). A character string +#' indicating the time frequency of the data to apply the rolling accumulation. +#' It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +#' continuous months will be accumulated; if it is 'daliy', values from +#' continuous days will be accumulated. It is set to 'monthly' by default. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or #' not (FALSE). #'@param ncores An integer indicating the number of cores to use in parallel #' computation. -#' #'@return A multidimensional array with named dimensions containing the -#'indicator in the element \code{data}. +#'accumulated data in the element \code{data}. If parameter 'rollwidth' is +#'not used, it will have the dimensions of the input 'data' except the dimension +#'where the accumulation has been computed (specified with 'time_dim'). If +#''rollwidth' is used, it will be of same dimensions as input data. #' #'@examples #'exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, -#' time = 9, lat = 2, lon = 2)) -#'TP <- PeriodAccumulation(exp, time_dim = 'time') +#' ftime = 9, lat = 2, lon = 2)) +#'TP <- PeriodAccumulation(exp, time_dim = 'ftime') #'data <- array(rnorm(5 * 3 * 214 * 2), -#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'dim(Dates) <- c(sdate = 3, time = 214) +#'dim(Dates) <- c(sdate = 3, ftime = 214) #'SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), -#' end = list(21, 6)) +#' end = list(21, 6), time_dim = 'ftime') #'HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), -#' end = list(21, 10)) +#' end = list(21, 10), time_dim = 'ftime') #' #'@import multiApply +#'@importFrom zoo rollapply #'@export PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, - time_dim = 'time', na.rm = FALSE, - ncores = NULL) { + time_dim = 'time', rollwidth = NULL, + sdate_dim = 'sdate', frequency = 'monthly', + na.rm = FALSE, ncores = NULL) { + # Initial checks + ## data if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") } if (!is.numeric(data)) { stop("Parameter 'data' must be numeric.") } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } if (!is.array(data)) { dim(data) <- length(data) names(dim(data)) <- time_dim } + dimnames <- names(dim(data)) + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } if (!is.null(start) && !is.null(end)) { if (is.null(dates)) { @@ -190,19 +255,96 @@ PeriodAccumulation <- function(data, dates = NULL, start = NULL, end = NULL, } else { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", - "day and the month of the period start and end.") + "day and the month of the period start and end.") } if (!is.null(dim(dates))) { data <- SelectPeriodOnData(data, dates, start, end, time_dim = time_dim, ncores = ncores) + if (!is.null(rollwidth)) { + dates <- SelectPeriodOnDates(dates = dates, start = start, end = end, + time_dim = time_dim, ncores = ncores) + } } else { warning("Parameter 'dates' must have named dimensions if 'start' and ", "'end' are not NULL. All data will be used.") } } } - total <- Apply(list(data), target_dims = time_dim, fun = sum, - na.rm = na.rm, ncores = ncores)$output1 + + if (is.null(rollwidth)) { + # period accumulation + total <- Apply(list(data), target_dims = time_dim, fun = sum, + na.rm = na.rm, ncores = ncores)$output1 + } else { + # rolling accumulation + ## dates + if (is.null(dates)) { + stop("Parameter 'dates' is NULL. Cannot compute the rolling accumulation.") + } + + ## rollwidth + if (!is.numeric(rollwidth)) { + stop("Parameter 'rollwidth' must be a numeric value.") + } + if (abs(rollwidth) > dim(data)[time_dim]) { + stop(paste0("Cannot compute accumulation of ", rollwidth, " months because ", + "loaded data has only ", dim(data)[time_dim], " months.")) + } + ## sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) != 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(data))) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + ## frequency + if (!is.character(frequency)) { + stop("Parameter 'frequency' must be a character string.") + } + + forwardroll <- FALSE + if (rollwidth < 0) { + rollwidth <- abs(rollwidth) + forwardroll <- TRUE + } + + mask_dates <- .datesmask(dates, frequency = frequency) + total <- Apply(data = list(data), + target_dims = list(data = c(time_dim, sdate_dim)), + fun = .rollaccumulation, + mask_dates = mask_dates, + rollwidth = rollwidth, + forwardroll = forwardroll, na.rm = na.rm, + output_dims = c(time_dim, sdate_dim), + ncores = ncores)$output1 + + pos <- match(dimnames, names(dim(total))) + total <- aperm(total, pos) + } + return(total) } +.rollaccumulation <- function(data, mask_dates, rollwidth = 1, + forwardroll = FALSE, na.rm = FALSE) { + dims <- dim(data) + data_vector <- array(NA, dim = length(mask_dates)) + count <- 1 + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { + data_vector[dd] <- as.vector(data)[count] + count <- count + 1 + } + } + + data_accum <- rollapply(data = data_vector, width = rollwidth, FUN = sum, na.rm = na.rm) + if (!forwardroll) { + data_accum <- c(rep(NA, rollwidth-1), data_accum) + } else { + data_accum <- c(data_accum, rep(NA, rollwidth-1)) + } + + data_accum <- data_accum[which(mask_dates == 1)] + data_accum <- array(data_accum, dim = c(dims)) + return(data_accum) +} diff --git a/R/PeriodPET.R b/R/PeriodPET.R new file mode 100644 index 0000000000000000000000000000000000000000..27d6ecaeeed9d7a9e778426495348ebcd9f519ab --- /dev/null +++ b/R/PeriodPET.R @@ -0,0 +1,386 @@ +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'This function is build to be compatible with other tools in +#'that work with 's2dv_cube' object class. The input data must be this object +#'class. If you don't work with 's2dv_cube', see PeriodPET. For more information +#'on the SPEI calculation, see functions CST_PeriodStandardization and +#'CST_PeriodAccumulation. +#' +#'@param data A named list with the needed \code{s2dv_cube} objects containing +#' the seasonal forecast experiment in the 'data' element for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration (see parameter 'pet_method'). The accepted +#' variable names are fixed in order to be recognized by the function. +#' The accepted name corresponding to the Minimum Temperature is 'tmin', +#' for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +#' for Precipitation is 'pr'. The accepted variable names for each method are: +#' For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +#' 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +#' for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +#' degrees; the units for precipitation ('pr') need to be in mm/month. +#' Currently the function works only with monthly data from different years. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#'lat <- c(40) +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@importFrom CSTools s2dv_cube +#'@export +CST_PeriodPET <- function(data, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!all(sapply(data, function(x) inherits(x, 's2dv_cube')))) { + stop("Parameter 'data' must be a list of 's2dv_cube' class.") + } + # latitude + if (!any(names(data[[1]]$coords) %in% .KnownLatNames())) { + stop("Spatial coordinate names of parameter 'data' do not match any ", + "of the names accepted by the package.") + } + # Dates + dates_exp <- data[[1]]$attrs$Dates + if (!'Dates' %in% names(data[[1]]$attrs)) { + stop("Element 'Dates' is not found in 'attrs' list of 'data'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + } + lat_dim <- names(data[[1]]$coords)[[which(names(data[[1]]$coords) %in% .KnownLatNames())]] + + res <- PeriodPET(data = lapply(data, function(x) x$data), + dates = data[[1]]$attrs$Dates, + lat = data[[1]]$coords[[lat_dim]], + pet_method = pet_method, time_dim = time_dim, + leadtime_dim = leadtime_dim, lat_dim = lat_dim, + na.rm = na.rm, ncores = ncores) + # Add metadata + source_files <- lapply(data, function(x) {x$attrs$source_files}) + coords <- data[[1]]$coords + Dates <- data[[1]]$attrs$Dates + metadata <- data[[1]]$attrs$Variable$metadata + metadata_names <- intersect(names(dim(res)), names(metadata)) + suppressWarnings( + res <- s2dv_cube(data = res, coords = coords, + varName = paste0('PET'), + metadata = metadata[metadata_names], + Dates = Dates, + source_files = source_files, + when = Sys.time()) + ) + return(res) +} + +#'Compute the Potential Evapotranspiration +#' +#'Compute the Potential Evapotranspiration (PET) that is the amount of +#'evaporation and transpiration that would occur if a sufficient water source +#'were available. This function calculate PET according to the Thornthwaite, +#'Hargreaves or Hargreaves-modified equations. +#' +#'For more information on the SPEI calculation, see functions +#'PeriodStandardization and PeriodAccumulation. +#' +#'@param data A named list of multidimensional arrays containing +#' the seasonal forecast experiment data for each variable. +#' Specific variables are needed for each method used in computing the +#' Potential Evapotranspiration (see parameter 'pet_method'). The accepted +#' variable names are fixed in order to be recognized by the function. +#' The accepted name corresponding to the Minimum Temperature is 'tmin', +#' for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +#' for Precipitation is 'pr'. The accepted variable names for each method are: +#' For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +#' 'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +#' for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +#' degrees; the units for precipitation ('pr') need to be in mm/month. +#' Currently the function works only with monthly data from different years. +#'@param dates An array of temporal dimensions containing the Dates of +#' 'data'. It must be of class 'Date' or 'POSIXct'. +#'@param lat A numeric vector containing the latitude values of 'data'. +#'@param pet_method A character string indicating the method used to compute +#' the potential evapotranspiration. The accepted methods are: +#' 'hargreaves' and 'hargreaves_modified', that require the data to have +#' variables tmin and tmax; and 'thornthwaite', that requires variable +#' 'tmean'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default it is set by 'latitude'. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@examples +#'dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +#'exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +#'exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +#'exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +#'end_year <- 2012 +#'dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), +#' paste0(2010:end_year, "-09-15"), +#' paste0(2010:end_year, "-10-16")), "UTC") +#'dim(dates_exp) <- c(syear = 3, time = 3) +#'lat <- c(40) +#'exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +#'res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) +#' +#'@importFrom SPEI hargreaves thornthwaite +#'@import multiApply +#'@export +PeriodPET <- function(data, dates, lat, pet_method = 'hargreaves', + time_dim = 'syear', leadtime_dim = 'time', + lat_dim = 'latitude', na.rm = FALSE, + ncores = NULL) { + + # Initial checks + # data + if (!inherits(data, 'list')) { + stop("Parameter 'data' needs to be a named list with the needed variables.") + } + if (is.null(names(data))) { + stop("Parameter 'data' needs to be a named list with the variable names.") + } + if (any(sapply(data, function(x) is.null(names(dim(x)))))) { + stop("Parameter 'data' needs to be a list of arrays with dimension names.") + } + dims <- lapply(data, function(x) dim(x)) + first_dims <- dims[[1]] + all_equal <- all(sapply(dims[-1], function(x) identical(first_dims, x))) + if (!all_equal) { + stop("Parameter 'data' variables need to have the same dimensions.") + } + # lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be numeric.") + } + if (!lat_dim %in% names(dims[[1]])) { + stop("Parameter 'data' must have 'lat_dim' dimension.") + } + if (any(sapply(dims, FUN = function(x) x[lat_dim] != length(lat)))) { + stop("Parameter 'lat' needs to have the same length of latitudinal", + "dimension of all the variables arrays in 'data'.") + } + + # data (2) + if (all(c('tmin', 'tmax', 'pr') %in% names(data))) { + # hargreaves modified: 'tmin', 'tmax', 'pr' and 'lat' + if (!(pet_method %in% c('hargreaves_modified', 'hargreaves'))) { + warning("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") + pet_method <- 'hargreaves_modified' + } + } else if (all(c('tmin', 'tmax') %in% names(data))) { + if (!(pet_method %in% c('hargreaves'))) { + warning("Parameter 'pet_method' will be set as 'hargreaves'.") + pet_method <- 'hargreaves' + } + } else if (c('tmean') %in% names(data)) { + # thornthwaite: 'tmean' (mean), 'lat' + if (!(pet_method == 'thornthwaite')) { + warning("Parameter 'pet_method' it is set to be 'thornthwaite'.") + pet_method <- 'thornthwaite' + } + } else { + stop("Parameter 'data' needs to be a named list with accepted ", + "variable names. See documentation.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!all(sapply(data, function(x) time_dim %in% names(dim(x))))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!all(sapply(data, function(x) leadtime_dim %in% names(dim(x))))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + ## lat_dim + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!all(sapply(data, function(x) lat_dim %in% names(dim(x))))) { + stop("Parameter 'lat_dim' is not found in 'data' dimension.") + } + # dates + if (is.null(dates)) { + stop("Parameter 'dates' is missing, dates must be provided.") + } + if (!any(inherits(dates, 'Date'), inherits(dates, 'POSIXct'))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (!all(dim(data[[1]])[c(time_dim, leadtime_dim)] == + dim(dates)[c(time_dim, leadtime_dim)])) { + stop("Parameter 'dates' needs to have the same length as 'time_dim' ", + "and 'leadtime_dim' as 'data'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # complete dates + mask_dates <- .datesmask(dates, frequency = 'monthly') + lat_mask <- array(lat, dim = c(1, length(lat))) + names(dim(lat_mask)) <- c('dat', lat_dim) + + # extract mask of NA locations to return to NA the final result + mask_na <- array(1, dim = dim(data[[1]])) + if (pet_method == 'hargreaves') { + varnames <- c('tmax', 'tmin') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 + } else if (pet_method == 'hargreaves_modified') { + varnames <- c('tmax', 'tmin', 'pr') + mask_na[which(is.na(data$tmax))] <- 0 + mask_na[which(is.na(data$tmin))] <- 0 + mask_na[which(is.na(data$pr))] <- 0 + } else if (pet_method == 'thornthwaite') { + varnames <- c('tmean') + mask_na[which(is.na(data$tmean))] <- 0 + } + + # replace NA with 0 + for (dd in 1:length(data)) { + data[[dd]][which(is.na(data[[dd]]))] <- 0 + } + + # prepare data + target_dims_data <- lapply(data[varnames], function(x) rep(c(leadtime_dim, time_dim), 1)) + pet <- Apply(data = c(list(lat_mask = lat_mask), data[varnames]), + target_dims = c(list(lat_mask = 'dat'), target_dims_data), + fun = .pet, + mask_dates = mask_dates, pet_method = pet_method, + leadtime_dim = leadtime_dim, time_dim = time_dim, + output_dims = c(leadtime_dim, time_dim), + ncores = ncores)$output1 + # reorder dims in pet_estimated + pos <- match(names(dim(data[[1]])), names(dim(pet))) + pet <- aperm(pet, pos) + + # restore original NAs from mask_na + pet[which(mask_na == 0)] <- NA + + return(pet) +} + +.pet <- function(lat_mask, data2, data3 = NULL, data4 = NULL, + mask_dates, pet_method = 'hargreaves', + leadtime_dim = 'time', time_dim = 'syear') { + + dims <- dim(data2) + + # create a vector from data but adding 0 to achive complete time series + # of the considered period + # (starting in January of the first year) so that the solar radiation + # estimation is computed in each case for the correct month + + if (!is.null(data2)) { + data_tmp <- as.vector(data2) + data2 <- array(0, dim = length(mask_dates)) + count <- 1 + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { + data2[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data3)) { + data_tmp <- as.vector(data3) + data3 <- array(0, dim = length(mask_dates)) + count <- 1 + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { + data3[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (!is.null(data4)) { + data_tmp <- as.vector(data4) + data4 <- array(0, dim = length(mask_dates)) + count <- 1 + for (dd in 1:length(mask_dates)) { + if (mask_dates[dd] == 1) { + data4[dd] <- data_tmp[count] + count <- count + 1 + } + } + rm(data_tmp) + } + if (pet_method == 'hargreaves') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, na.rm = FALSE, verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(mask_dates == 1)], dim = dims) + } + + if (pet_method == 'hargreaves_modified') { + pet <- hargreaves(Tmin = as.vector(data3), Tmax = as.vector(data2), + lat = lat_mask, Pre = as.vector(data4), na.rm = FALSE, + verbose = FALSE) + pet <- array(pet[which(mask_dates == 1)], dim = dims) + } + + if (pet_method == 'thornthwaite') { + pet <- thornthwaite(as.vector(data2), lat = lat_mask, na.rm = TRUE, + verbose = FALSE) + # line to return the vector to the size of the actual original data + pet <- array(pet[which(mask_dates == 1)], dim = dims) + } + return(pet) +} \ No newline at end of file diff --git a/R/PeriodStandardization.R b/R/PeriodStandardization.R new file mode 100644 index 0000000000000000000000000000000000000000..6ab707e713174dc8ebb41ce8fce3e240946bd05f --- /dev/null +++ b/R/PeriodStandardization.R @@ -0,0 +1,646 @@ +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'(Standarized Precipitation-Evapotranspiration Index). With this function the +#'data is fit to a probability distribution to transform the original values to +#'standardized units that are comparable in space and time and at different SPEI +#'time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. This function is build to +#'be compatible with other tools in that work with 's2dv_cube' object +#'class. The input data must be this object class. If you don't work with +#''s2dv_cube', see PeriodStandardization. For more information on the SPEI +#'indicator calculation, see CST_PeriodPET and CST_PeriodAccumulation. +#' +#'@param data An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data to be standardized. +#'@param data_cor An 's2dv_cube' that element 'data' stores a multidimensional +#' array containing the data in which the standardization should be applied +#' using the fitting parameters from 'data'. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return An object of class \code{s2dv_cube} containing the standardized data. +#'If 'data_cor' is provided the array stored in element data will be of the same +#'dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +#'element data will be of the same dimensions as 'data'. The parameters of the +#'standardization will only be returned if 'return_params' is TRUE, in this +#'case, the output will be a list of two objects one for the standardized data +#'and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +#'data <- NULL +#'data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +#'class(data) <- 's2dv_cube' +#'SPEI <- CST_PeriodStandardization(data = data) +#'@export +CST_PeriodStandardization <- function(data, data_cor = NULL, time_dim = 'syear', + leadtime_dim = 'time', memb_dim = 'ensemble', + ref_period = NULL, + handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check 's2dv_cube' + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of 's2dv_cube' class.") + } + if (!is.null(data_cor)) { + if (!inherits(data_cor, 's2dv_cube')) { + stop("Parameter 'data_cor' must be of 's2dv_cube' class.") + } + } + res <- PeriodStandardization(data = data$data, data_cor = data_cor$data, + time_dim = time_dim, leadtime_dim = leadtime_dim, + memb_dim = memb_dim, + ref_period = ref_period, + handle_infinity = handle_infinity, method = method, + distribution = distribution, + params = params, return_params = return_params, + na.rm = na.rm, ncores = ncores) + if (return_params) { + std <- res$spei + params <- res$params + } else { + std <- res + } + + if (is.null(data_cor)) { + data$data <- std + data$attrs$Variable$varName <- paste0(data$attrs$Variable$varName, ' standardized') + if (return_params) { + return(list(spei = data, params = params)) + } else { + return(data) + } + } else { + data_cor$data <- std + data_cor$attrs$Variable$varName <- paste0(data_cor$attrs$Variable$varName, ' standardized') + data_cor$attrs$Datasets <- c(data_cor$attrs$Datasets, data$attrs$Datasets) + data_cor$attrs$source_files <- c(data_cor$attrs$source_files, data$attrs$source_files) + return(data_cor) + } +} + +#'Compute the Standardization of Precipitation-Evapotranspiration Index +#' +#'The Standardization of the data is the last step of computing the SPEI +#'indicator. With this function the data is fit to a probability distribution to +#'transform the original values to standardized units that are comparable in +#'space and time and at different SPEI time scales. +#' +#'Next, some specifications for the calculation of the standardization will be +#'discussed. If there are NAs in the data and they are not removed with the +#'parameter 'na.rm', the standardization cannot be carried out for those +#'coordinates and therefore, the result will be filled with NA for the +#'specific coordinates. When NAs are not removed, if the length of the data for +#'a computational step is smaller than 4, there will not be enough data for +#'standarize and the result will be also filled with NAs for that coordinates. +#'About the distribution used to fit the data, there are only two possibilities: +#''log-logistic' and 'Gamma'. The 'Gamma' method only works when only +#'precipitation is provided and other variables are 0 because it is positive +#'defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +#'standardization is computed with cross validation. For more information about +#'SPEI, see functions PeriodPET and PeriodAccumulation. +#' +#'@param data A multidimensional array containing the data to be standardized. +#'@param data_cor A multidimensional array containing the data in which the +#' standardization should be applied using the fitting parameters from 'data'. +#'@param dates An array containing the dates of the data with the same time +#' dimensions as the data. It is optional and only necessary for using the +#' parameter 'ref_period' to select a reference period directly from dates. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'syear'. +#'@param leadtime_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param memb_dim A character string indicating the name of the dimension in +#' which the ensemble members are stored. When set it to NULL, threshold is +#' computed for individual members. +#'@param ref_period A list with two numeric values with the starting and end +#' points of the reference period used for computing the index. The default +#' value is NULL indicating that the first and end values in data will be +#' used as starting and end points. +#'@param params An optional parameter that needs to be a multidimensional array +#' with named dimensions. This option overrides computation of fitting +#' parameters. It needs to be of same time dimensions (specified in 'time_dim' +#' and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +#' of the coefficients needed for the used distribution (for 'Gamma' coef +#' dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +#' to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +#' will only be used if 'data_cor' is not provided. +#'@param handle_infinity A logical value wether to return infinite values (TRUE) +#' or not (FALSE). When it is TRUE, the positive infinite values (negative +#' infinite) are substituted by the maximum (minimum) values of each +#' computation step, a subset of the array of dimensions time_dim, leadtime_dim +#' and memb_dim. +#'@param method A character string indicating the standardization method used. +#' If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +#' default. +#'@param distribution A character string indicating the name of the distribution +#' function to be used for computing the SPEI. The accepted names are: +#' 'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +#' 'Gamma' method only works when only precipitation is provided and other +#' variables are 0 because it is positive defined (SPI indicator). +#'@param return_params A logical value indicating wether to return parameters +#' array (TRUE) or not (FALSE). It is FALSE by default. +#'@param na.rm A logical value indicating whether NA values should be removed +#' from data. It is FALSE by default. If it is FALSE and there are NA values, +#' standardization cannot be carried out for those coordinates and therefore, +#' the result will be filled with NA for the specific coordinates. If it is +#' TRUE, if the data from other dimensions except time_dim and leadtime_dim is +#' not reaching 4 values, it is not enough values to estimate the parameters +#' and the result will include NA. +#'@param ncores An integer value indicating the number of cores to use in +#' parallel computation. +#' +#'@return A multidimensional array containing the standardized data. +#'If 'data_cor' is provided the array will be of the same dimensions as +#''data_cor'. If 'data_cor' is not provided, the array will be of the same +#'dimensions as 'data'. The parameters of the standardization will only be +#'returned if 'return_params' is TRUE, in this case, the output will be a list +#'of two objects one for the standardized data and one for the parameters. +#' +#'@examples +#'dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +#'dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +#'data <- array(rnorm(600, -194.5, 64.8), dim = dims) +#'datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +#' +#'SPEI <- PeriodStandardization(data = data) +#'SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom lmomco pwm.pp pwm.ub pwm2lmom are.lmom.valid parglo pargam parpe3 +#'@importFrom lmom cdfglo cdfgam cdfpe3 pelglo pelgam pelpe3 +#'@importFrom SPEI parglo.maxlik +#'@importFrom stats qnorm sd window +#'@export +PeriodStandardization <- function(data, data_cor = NULL, dates = NULL, + time_dim = 'syear', leadtime_dim = 'time', + memb_dim = 'ensemble', + ref_period = NULL, handle_infinity = FALSE, + method = 'parametric', + distribution = 'log-Logistic', + params = NULL, return_params = FALSE, + na.rm = FALSE, ncores = NULL) { + # Check inputs + ## data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + ## data_cor + if (!is.null(data_cor)) { + if (!is.array(data_cor)) { + stop("Parameter 'data_cor' must be a numeric array.") + } + if (is.null(names(dim(data_cor)))) { + stop("Parameter 'data_cor' must have dimension names.") + } + } + ## dates + if (!is.null(dates)) { + if (!any(inherits(dates, 'Date'), inherits(dates, 'POSIXct'))) { + stop("Parameter 'dates' is not of the correct class, ", + "only 'Date' and 'POSIXct' classes are accepted.") + } + if (!time_dim %in% names(dim(dates)) | !leadtime_dim %in% names(dim(dates))) { + stop("Parameter 'dates' must have 'time_dim' and 'leadtime_dim' ", + "dimension.") + } + if (dim(data)[c(time_dim)] != dim(dates)[c(time_dim)]) { + stop("Parameter 'dates' needs to have the same length of 'time_dim' ", + "as 'data'.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!time_dim %in% names(dim(data_cor))) { + stop("Parameter 'time_dim' is not found in 'data_cor' dimension.") + } + } + ## leadtime_dim + if (!is.character(leadtime_dim) | length(leadtime_dim) != 1) { + stop("Parameter 'leadtime_dim' must be a character string.") + } + if (!leadtime_dim %in% names(dim(data))) { + stop("Parameter 'leadtime_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!leadtime_dim %in% names(dim(data_cor))) { + stop("Parameter 'leadtime_dim' is not found in 'data_cor' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) != 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + if (!is.null(data_cor)) { + if (!memb_dim %in% names(dim(data_cor))) { + stop("Parameter 'memb_dim' is not found in 'data_cor' dimension.") + } + } + ## data_cor (2) + if (!is.null(data_cor)) { + if (dim(data)[leadtime_dim] != dim(data_cor)[leadtime_dim]) { + stop("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + } + } + ## ref_period + if (!is.null(ref_period)) { + years_dates <- format(dates, "%Y") + if (is.null(dates)) { + warning("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ref_period <- NULL + } else if (length(ref_period) != 2) { + warning("Parameter 'ref_period' must be of length two indicating the ", + "first and end years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (!all(sapply(ref_period, is.numeric))) { + warning("Parameter 'ref_period' must be a numeric vector indicating the ", + "'start' and 'end' years of the reference period. It will not ", + "be used.") + ref_period <- NULL + } else if (ref_period[[1]] > ref_period[[2]]) { + warning("In parameter 'ref_period' 'start' cannot be after 'end'. It ", + "will not be used.") + ref_period <- NULL + } else if (!all(unlist(ref_period) %in% years_dates)) { + warning("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ref_period <- NULL + } else { + years <- format(Subset(dates, along = leadtime_dim, indices = 1), "%Y") + ref_period[[1]] <- which(ref_period[[1]] == years) + ref_period[[2]] <- which(ref_period[[2]] == years) + } + } + ## handle_infinity + if (!is.logical(handle_infinity)) { + stop("Parameter 'handle_infinity' must be a logical value.") + } + ## method + if (!(method %in% c('parametric', 'non-parametric'))) { + stop("Parameter 'method' must be a character string containing one of ", + "the following methods: 'parametric' or 'non-parametric'.") + } + ## distribution + if (!(distribution %in% c('log-Logistic', 'Gamma', 'PearsonIII'))) { + stop("Parameter 'distribution' must be a character string containing one ", + "of the following distributions: 'log-Logistic', 'Gamma' or ", + "'PearsonIII'.") + } + ## params + if (!is.null(params)) { + if (!is.numeric(params)) { + stop("Parameter 'params' must be numeric.") + } + if (!all(c(time_dim, leadtime_dim, 'coef') %in% names(dim(params)))) { + stop("Parameter 'params' must be a multidimensional array with named ", + "dimensions: '", time_dim, "', '", leadtime_dim, "' and 'coef'.") + } + dims_data <- dim(data)[-which(names(dim(data)) == memb_dim)] + dims_params <- dim(params)[-which(names(dim(params)) == 'coef')] + if (!all(dims_data == dims_params)) { + stop("Parameter 'data' and 'params' must have same common dimensions ", + "except 'memb_dim' and 'coef'.") + } + + if (distribution == "Gamma") { + if (dim(params)['coef'] != 2) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 2.") + } + } else { + if (dim(params)['coef'] != 3) { + stop("For '", distribution, "' distribution, params array should have ", + "'coef' dimension of length 3.") + } + } + } + ## return_params + if (!is.logical(return_params)) { + stop("Parameter 'return_params' must be logical.") + } + ## na.rm + if (!is.logical(na.rm)) { + stop("Parameter 'na.rm' must be logical.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + if (is.null(ref_period)) { + ref_start <- NULL + ref_end <- NULL + } else { + ref_start <- ref_period[[1]] + ref_end <- ref_period[[2]] + } + + # Standardization + if (is.null(data_cor)) { + if (is.null(params)) { + res <- Apply(data = list(data), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, data_cor = NULL, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } else { + res <- Apply(data = list(data = data, params = params), + target_dims = list(data = c(leadtime_dim, time_dim, memb_dim), + params = c(leadtime_dim, time_dim, 'coef')), + fun = .standardization, data_cor = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + } else { + res <- Apply(data = list(data = data, data_cor = data_cor), + target_dims = c(leadtime_dim, time_dim, memb_dim), + fun = .standardization, params = NULL, + leadtime_dim = leadtime_dim, time_dim = time_dim, + ref_start = ref_start, ref_end = ref_end, + handle_infinity = handle_infinity, + method = method, distribution = distribution, + return_params = return_params, + na.rm = na.rm, ncores = ncores) + } + if (return_params) { + spei <- res$spei + params <- res$params + } else { + spei <- res$output1 + } + + if (is.null(data_cor)) { + pos <- match(names(dim(data)), names(dim(spei))) + spei <- aperm(spei, pos) + } else { + pos <- match(names(dim(data_cor)), names(dim(spei))) + spei <- aperm(spei, pos) + } + + if (return_params) { + pos <- match(c(names(dim(spei))[-which(names(dim(spei)) == memb_dim)], 'coef'), + names(dim(params))) + params <- aperm(params, pos) + return(list('spei' = spei, 'params' = params)) + } else { + return(spei) + } +} + +.standardization <- function(data, data_cor = NULL, params = NULL, + leadtime_dim = 'time', time_dim = 'syear', + ref_start = NULL, ref_end = NULL, handle_infinity = FALSE, + method = 'parametric', distribution = 'log-Logistic', + return_params = FALSE, na.rm = FALSE) { + # data (data_cor): [leadtime_dim, time_dim, memb_dim] + dims <- dim(data)[-1] + fit = 'ub-pwm' + + coef = switch(distribution, + "Gamma" = array(NA, dim = 2, dimnames = list(c('alpha', 'beta'))), + "log-Logistic" = array(NA, dim = 3, dimnames = list(c('xi', 'alpha', 'kappa'))), + "PearsonIII" = array(NA, dim = 3, dimnames = list(c('mu', 'sigma', 'gamma')))) + + if (is.null(data_cor)) { + # cross_val = TRUE + spei_mod <- data*NA + if (return_params) { + params_result <- array(dim = c(dim(data)[-length(dim(data))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data2 <- data[ff, , ] + dim(data2) <- dims + if (method == 'non-parametric') { + bp <- matrix(0, length(data2), 1) + for (i in 1:length(data2)) { + bp[i,1] = sum(data2[] <= data2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data2) + 0.12)) + dim(std_index) <- dims + spei_mod[ff, , ] <- std_index + } else { + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + for (nsd in 1:dim(data)[time_dim]) { + if (is.null(params)) { + acu <- as.vector(data_fit[-nsd, ]) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + f_params <- NA + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + } + } + } else { + f_params <- params[ff, nsd, ] + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data2, f_params), + "Gamma" = lmom::cdfgam(data2, f_params), + "PearsonIII" = lmom::cdfpe3(data2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dims) + spei_mod[ff, nsd, ] <- std_index_cv[nsd, ] + if (return_params) params_result[ff, nsd, ] <- f_params + } + } + } + } else { + # cross_val = FALSE + spei_mod <- data_cor*NA + dimscor <- dim(data_cor)[-1] + if (return_params) { + params_result <- array(dim = c(dim(data_cor)[-length(dim(data_cor))], coef = length(coef))) + } + for (ff in 1:dim(data)[leadtime_dim]) { + data_cor2 <- data_cor[ff, , ] + dim(data_cor2) <- dimscor + if (method == 'non-parametric') { + bp <- matrix(0, length(data_cor2), 1) + for (i in 1:length(data_cor2)) { + bp[i,1] = sum(data_cor2[] <= data_cor2[i], na.rm = na.rm); # Writes the rank of the data + } + std_index <- qnorm((bp - 0.44)/(length(data_cor2) + 0.12)) + dim(std_index) <- dimscor + spei_mod[ff, , ] <- std_index + } else { + data2 <- data[ff, , ] + dim(data2) <- dims + if (!is.null(ref_start) && !is.null(ref_end)) { + data_fit <- window(data2, ref_start, ref_end) + } else { + data_fit <- data2 + } + acu <- as.vector(data_fit) + if (na.rm) { + acu_sorted <- sort.default(acu, method = "quick") + } else { + acu_sorted <- sort.default(acu, method = "quick", na.last = TRUE) + } + if (!any(is.na(acu_sorted)) & length(acu_sorted) != 0) { + acu_sd <- sd(acu_sorted) + if (!is.na(acu_sd) & acu_sd != 0) { + if (distribution != "log-Logistic") { + acu_sorted <- acu_sorted[acu_sorted > 0] + } + if (length(acu_sorted) >= 4) { + f_params <- .std(data = acu_sorted, fit = fit, + distribution = distribution) + } + if (all(is.na(f_params))) { + cdf_res <- NA + } else { + f_params <- f_params[which(!is.na(f_params))] + cdf_res = switch(distribution, + "log-Logistic" = lmom::cdfglo(data_cor2, f_params), + "Gamma" = lmom::cdfgam(data_cor2, f_params), + "PearsonIII" = lmom::cdfpe3(data_cor2, f_params)) + } + std_index_cv <- array(qnorm(cdf_res), dim = dimscor) + spei_mod[ff, , ] <- std_index_cv + if (return_params) params_result[ff, , ] <- f_params + } + } + } + } + } + if (handle_infinity) { + # could also use "param_error" ?; we are giving it the min/max value of the grid point + spei_mod[is.infinite(spei_mod) & spei_mod < 0] <- min(spei_mod[!is.infinite(spei_mod)]) + spei_mod[is.infinite(spei_mod) & spei_mod > 0] <- max(spei_mod[!is.infinite(spei_mod)]) + } + if (return_params) { + return(list(spei = spei_mod, params = params_result)) + } else { + return(spei_mod) + } +} + +.std <- function(data, fit = 'pp-pwm', distribution = 'log-Logistic') { + pwm = switch(fit, + 'pp-pwm' = lmomco::pwm.pp(data, -0.35, 0, nmom = 3), + lmomco::pwm.ub(data, nmom = 3) + # TLMoments::PWM(data, order = 0:2) + ) + lmom <- lmomco::pwm2lmom(pwm) + if (!any(!lmomco::are.lmom.valid(lmom), anyNA(lmom[[1]]), any(is.nan(lmom[[1]])))) { + fortran_vec = c(lmom$lambdas[1:2], lmom$ratios[3]) + params_result = switch(distribution, + 'log-Logistic' = tryCatch(lmom::pelglo(fortran_vec), + error = function(e){lmomco::parglo(lmom)$para}), + 'Gamma' = tryCatch(lmom::pelgam(fortran_vec), + error = function(e){lmomco::pargam(lmom)$para}), + 'PearsonIII' = tryCatch(lmom::pelpe3(fortran_vec), + error = function(e){lmomco::parpe3(lmom)$para})) + if (distribution == 'log-Logistic' && fit == 'max-lik') { + params_result = SPEI::parglo.maxlik(data, params_result)$para + } + return(params_result) + } else { + return(NA) + } +} \ No newline at end of file diff --git a/R/zzz.R b/R/zzz.R index 0724f066e6c2ce674c779159f96ccee4ff38576c..da7c3a18b558c8fe2f58fa301ada5303167025a7 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -59,4 +59,50 @@ wind2CF <- function(wind, pc) { power <- wind2power(wind, pc) CF <- power / pc$attr$RatedPower return(CF) -} \ No newline at end of file +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'lons', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'lats', 'latitude', 'y', 'j', 'nav_lat') +} + +.return2list <- function(data1, data2 = NULL) { + if (is.null(data1) & is.null(data2)) { + return(NULL) + } else if (is.null(data2)) { + return(list(data1)) + } else { + return(list(data1, data2)) + } +} + +# Function that creates a mask array from dates for the whole year +.datesmask <- function(dates, frequency = 'monthly') { + years <- format(dates, "%Y") + ini <- as.Date(paste(min(years), 01, 01, sep = '-')) + end <- as.Date(paste(max(years), 12, 31, sep = '-')) + daily <- as.Date(seq(ini, end, by = "day")) + if (frequency == 'monthly') { + days <- as.numeric(format(daily, "%d")) + monthly <- daily[which(days == 1)] + dates_mask <- array(0, dim = length(monthly)) + for (dd in 1:length(dates)) { + year <- format(dates[dd], "%Y") + month <- format(dates[dd], "%m") + ii <- which(monthly == as.Date(paste(year, month, 01, sep = '-'))) + dates_mask[ii] <- 1 + } + } else { + # daily + dates_mask <- array(0, dim = length(daily)) + for (dd in 1:length(dates)) { + ii <- which(daily == dates[dd]) + dates_mask[ii] <- 1 + } + } + + return(dates_mask) +} diff --git a/man/CST_PeriodAccumulation.Rd b/man/CST_PeriodAccumulation.Rd index 0820a0dc57848cd6cf257636cec0a12d3568a7da..4d71ff3d17d56e3828673b61fdf72422ed02becf 100644 --- a/man/CST_PeriodAccumulation.Rd +++ b/man/CST_PeriodAccumulation.Rd @@ -9,6 +9,9 @@ CST_PeriodAccumulation( start = NULL, end = NULL, time_dim = "time", + rollwidth = NULL, + sdate_dim = "sdate", + frequency = "monthly", na.rm = FALSE, ncores = NULL ) @@ -19,8 +22,8 @@ CST_PeriodAccumulation( \item{start}{An optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial -date of the period and the initial month of the period. By default it is set -to NULL and the indicator is computed using all the data provided in +date of the period and the initial m onth of the period. By default it is +set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{end}{An optional parameter to defined the final date of the period to @@ -33,6 +36,22 @@ compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} +\item{rollwidth}{An optional parameter to indicate the number of time +steps the rolling sum is applied to. If it is positive, the rolling sum is +applied backwards 'time_dim', if it is negative, it will be forward it. When +this parameter is NULL, the sum is applied over all 'time_dim', in a +specified period. It is NULL by default.} + +\item{sdate_dim}{(Only needed when rollwidth is used). A character string +indicating the name of the start date dimension to compute the rolling +accumulation. By default, it is set to 'sdate'.} + +\item{frequency}{(Only needed when rollwidth is used). A character string +indicating the time frequency of the data to apply the rolling accumulation. +It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +continuous months will be accumulated; if it is 'daliy', values from +continuous days will be accumulated. It is set to 'monthly' by default.} + \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -40,14 +59,18 @@ not (FALSE).} computation.} } \value{ -An 's2dv_cube' object containing the indicator in the element -\code{data} with dimensions of the input parameter 'data' except the dimension -where the accumulation has been computed (specified with 'time_dim'). The -'Dates' array is updated to the dates corresponding to the beginning of the -aggregated time period. A new element called 'time_bounds' will be added into -the 'attrs' element in the 's2dv_cube' object. It consists of a list -containing two elements, the start and end dates of the aggregated period with -the same dimensions of 'Dates' element. +An 's2dv_cube' object containing the accumulated data in the element +\code{data}. If parameter 'rollwidth' is not used, it will have the dimensions +of the input parameter 'data' except the dimension where the accumulation has +been computed (specified with 'time_dim'). If 'rollwidth' is used, it will be +of same dimensions as input data. The 'Dates' array is updated to the +dates corresponding to the beginning of the aggregated time period. A new +element called 'time_bounds' will be added into the 'attrs' element in the +'s2dv_cube' object. It consists of a list containing two elements, the start +and end dates of the aggregated period with the same dimensions of 'Dates' +element. If 'rollwidth' is used, it will contain the same dimensions of +parameter 'data' and the other elements of the 's2dv_cube' will not be +modified. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a @@ -60,26 +83,39 @@ by using this function: August 21st to October 21st} } } +\details{ +There are two possible ways of performing the accumulation. The default one +is by accumulating a variable over a dimension specified with 'time_dim'. To +chose a specific time period, 'start' and 'end' must be used. The other method +is by using 'rollwidth' parameter. When this parameter is a positive integer, +the cumulative backward sum is applied to the time dimension. If it is +negative, the rolling sum is applied backwards. This function is build to +be compatible with other tools in that work with 's2dv_cube' object class. The +input data must be this object class. If you don't work with 's2dv_cube', see +PeriodAccumulation. +} \examples{ exp <- NULL exp$data <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, - time = 9, lat = 2, lon = 2)) + ftime = 9, lat = 2, lon = 2)) class(exp) <- 's2dv_cube' -TP <- CST_PeriodAccumulation(exp) +TP <- CST_PeriodAccumulation(exp, time_dim = 'ftime') exp$data <- array(rnorm(5 * 3 * 214 * 2), - c(memb = 5, sdate = 3, time = 214, lon = 2)) + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -dim(Dates) <- c(sdate = 3, time = 214) +dim(Dates) <- c(sdate = 3, ftime = 214) exp$attrs$Dates <- Dates -SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6)) +SprR <- CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), + time_dim = 'ftime') dim(SprR$data) head(SprR$attrs$Dates) -HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10)) +HarR <- CST_PeriodAccumulation(exp, start = list(21, 8), end = list(21, 10), + time_dim = 'ftime') dim(HarR$data) head(HarR$attrs$Dates) diff --git a/man/CST_PeriodPET.Rd b/man/CST_PeriodPET.Rd new file mode 100644 index 0000000000000000000000000000000000000000..eb84f25ac7129ea296b12b904105667936ef11aa --- /dev/null +++ b/man/CST_PeriodPET.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodPET.R +\name{CST_PeriodPET} +\alias{CST_PeriodPET} +\title{Compute the Potential Evapotranspiration} +\usage{ +CST_PeriodPET( + data, + pet_method = "hargreaves", + time_dim = "syear", + leadtime_dim = "time", + lat_dim = "latitude", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A named list with the needed \code{s2dv_cube} objects containing +the seasonal forecast experiment in the 'data' element for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration (see parameter 'pet_method'). The accepted +variable names are fixed in order to be recognized by the function. +The accepted name corresponding to the Minimum Temperature is 'tmin', +for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +for Precipitation is 'pr'. The accepted variable names for each method are: +For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +degrees; the units for precipitation ('pr') need to be in mm/month. +Currently the function works only with monthly data from different years.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Compute the Potential evapotranspiration (PET) that is the amount of +evaporation and transpiration that would occur if a sufficient water source +were available. This function calculate PET according to the Thornthwaite, +Hargreaves or Hargreaves-modified equations. +} +\details{ +This function is build to be compatible with other tools in +that work with 's2dv_cube' object class. The input data must be this object +class. If you don't work with 's2dv_cube', see PeriodPET. For more information +on the SPEI calculation, see functions CST_PeriodStandardization and +CST_PeriodAccumulation. +} +\examples{ +dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +end_year <- 2012 +dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), + paste0(2010:end_year, "-09-15"), + paste0(2010:end_year, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 3, time = 3) +lat <- c(40) +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + +} diff --git a/man/CST_PeriodStandardization.Rd b/man/CST_PeriodStandardization.Rd new file mode 100644 index 0000000000000000000000000000000000000000..fe0dd3b9f59e082ae322e884b94415ea4246c8af --- /dev/null +++ b/man/CST_PeriodStandardization.Rd @@ -0,0 +1,125 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodStandardization.R +\name{CST_PeriodStandardization} +\alias{CST_PeriodStandardization} +\title{Compute the Standardization of Precipitation-Evapotranspiration Index} +\usage{ +CST_PeriodStandardization( + data, + data_cor = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + ref_period = NULL, + handle_infinity = FALSE, + method = "parametric", + distribution = "log-Logistic", + params = NULL, + return_params = FALSE, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{An 's2dv_cube' that element 'data' stores a multidimensional +array containing the data to be standardized.} + +\item{data_cor}{An 's2dv_cube' that element 'data' stores a multidimensional +array containing the data in which the standardization should be applied +using the fitting parameters from 'data'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{handle_infinity}{A logical value wether to return infinite values (TRUE) +or not (FALSE). When it is TRUE, the positive infinite values (negative +infinite) are substituted by the maximum (minimum) values of each +computation step, a subset of the array of dimensions time_dim, leadtime_dim +and memb_dim.} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other + variables are 0 because it is positive defined (SPI indicator).} + +\item{params}{An optional parameter that needs to be a multidimensional array +with named dimensions. This option overrides computation of fitting +parameters. It needs to be of same time dimensions (specified in 'time_dim' +and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +of the coefficients needed for the used distribution (for 'Gamma' coef +dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +will only be used if 'data_cor' is not provided.} + +\item{return_params}{A logical value indicating wether to return parameters +array (TRUE) or not (FALSE). It is FALSE by default.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default. If it is FALSE and there are NA values, +standardization cannot be carried out for those coordinates and therefore, +the result will be filled with NA for the specific coordinates. If it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\value{ +An object of class \code{s2dv_cube} containing the standardized data. +If 'data_cor' is provided the array stored in element data will be of the same +dimensions as 'data_cor'. If 'data_cor' is not provided, the array stored in +element data will be of the same dimensions as 'data'. The parameters of the +standardization will only be returned if 'return_params' is TRUE, in this +case, the output will be a list of two objects one for the standardized data +and one for the parameters. +} +\description{ +The Standardization of the data is the last step of computing the SPEI +(Standarized Precipitation-Evapotranspiration Index). With this function the +data is fit to a probability distribution to transform the original values to +standardized units that are comparable in space and time and at different SPEI +time scales. +} +\details{ +Next, some specifications for the calculation of the standardization will be +discussed. If there are NAs in the data and they are not removed with the +parameter 'na.rm', the standardization cannot be carried out for those +coordinates and therefore, the result will be filled with NA for the +specific coordinates. When NAs are not removed, if the length of the data for +a computational step is smaller than 4, there will not be enough data for +standarize and the result will be also filled with NAs for that coordinates. +About the distribution used to fit the data, there are only two possibilities: +'log-logistic' and 'Gamma'. The 'Gamma' method only works when only +precipitation is provided and other variables are 0 because it is positive +defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +standardization is computed with cross validation. This function is build to +be compatible with other tools in that work with 's2dv_cube' object +class. The input data must be this object class. If you don't work with +'s2dv_cube', see PeriodStandardization. For more information on the SPEI +indicator calculation, see CST_PeriodPET and CST_PeriodAccumulation. +} +\examples{ +dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +data <- NULL +data$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +class(data) <- 's2dv_cube' +SPEI <- CST_PeriodStandardization(data = data) +} diff --git a/man/PeriodAccumulation.Rd b/man/PeriodAccumulation.Rd index e9ee608cd26360b3ac1b83d39ab25bc2866799ba..ca01c205a3fc912b0c18cc98f89e395651932fd5 100644 --- a/man/PeriodAccumulation.Rd +++ b/man/PeriodAccumulation.Rd @@ -10,6 +10,9 @@ PeriodAccumulation( start = NULL, end = NULL, time_dim = "time", + rollwidth = NULL, + sdate_dim = "sdate", + frequency = "monthly", na.rm = FALSE, ncores = NULL ) @@ -33,9 +36,23 @@ the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} \item{time_dim}{A character string indicating the name of the dimension to -compute the indicator. By default, it is set to 'time'. More than one -dimension name matching the dimensions provided in the object -\code{data$data} can be specified.} +compute the indicator. By default, it is set to 'time'.} + +\item{rollwidth}{An optional parameter to indicate the number of time +steps the rolling sum is applied to. If it is positive, the rolling sum is +applied backwards 'time_dim', if it is negative, it will be forward it. When +this parameter is NULL, the sum is applied over all 'time_dim', in a +specified period. It is NULL by default.} + +\item{sdate_dim}{(Only needed when rollwidth is used). A character string +indicating the name of the start date dimension to compute the rolling +accumulation. By default, it is set to 'sdate'.} + +\item{frequency}{(Only needed when rollwidth is used). A character string +indicating the time frequency of the data to apply the rolling accumulation. +It can be 'daily' or 'monthly'. If it is set to 'monthly', values from +continuous months will be accumulated; if it is 'daliy', values from +continuous days will be accumulated. It is set to 'monthly' by default.} \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -45,7 +62,10 @@ computation.} } \value{ A multidimensional array with named dimensions containing the -indicator in the element \code{data}. +accumulated data in the element \code{data}. If parameter 'rollwidth' is +not used, it will have the dimensions of the input 'data' except the dimension +where the accumulation has been computed (specified with 'time_dim'). If +'rollwidth' is used, it will be of same dimensions as input data. } \description{ Period Accumulation computes the sum (accumulation) of a given variable in a @@ -58,22 +78,30 @@ by using this function: August 21st to October 21st} } } +\details{ +There are two possible ways of performing the accumulation. The default one +is by accumulating a variable over a dimension specified with 'time_dim'. To +chose a specific time period, 'start' and 'end' must be used. The other method +is by using 'rollwidth' parameter. When this parameter is a positive integer, +the cumulative backward sum is applied to the time dimension. If it is +negative, the rolling sum is applied backwards. +} \examples{ exp <- array(rnorm(216)*200, dim = c(dataset = 1, member = 2, sdate = 3, - time = 9, lat = 2, lon = 2)) -TP <- PeriodAccumulation(exp, time_dim = 'time') + ftime = 9, lat = 2, lon = 2)) +TP <- PeriodAccumulation(exp, time_dim = 'ftime') data <- array(rnorm(5 * 3 * 214 * 2), - c(memb = 5, sdate = 3, time = 214, lon = 2)) + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -dim(Dates) <- c(sdate = 3, time = 214) +dim(Dates) <- c(sdate = 3, ftime = 214) SprR <- PeriodAccumulation(data, dates = Dates, start = list(21, 4), - end = list(21, 6)) + end = list(21, 6), time_dim = 'ftime') HarR <- PeriodAccumulation(data, dates = Dates, start = list(21, 8), - end = list(21, 10)) + end = list(21, 10), time_dim = 'ftime') } diff --git a/man/PeriodPET.Rd b/man/PeriodPET.Rd new file mode 100644 index 0000000000000000000000000000000000000000..d8c77472fd021543b317efc2e02fd2948a55a3ed --- /dev/null +++ b/man/PeriodPET.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodPET.R +\name{PeriodPET} +\alias{PeriodPET} +\title{Compute the Potential Evapotranspiration} +\usage{ +PeriodPET( + data, + dates, + lat, + pet_method = "hargreaves", + time_dim = "syear", + leadtime_dim = "time", + lat_dim = "latitude", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A named list of multidimensional arrays containing +the seasonal forecast experiment data for each variable. +Specific variables are needed for each method used in computing the +Potential Evapotranspiration (see parameter 'pet_method'). The accepted +variable names are fixed in order to be recognized by the function. +The accepted name corresponding to the Minimum Temperature is 'tmin', +for Maximum Temperature is 'tmax', for Mean Temperature is 'tmean' and +for Precipitation is 'pr'. The accepted variable names for each method are: +For 'hargreaves': 'tmin' and 'tmax'; for 'hargreaves_modified' are 'tmin', +'tmax' and 'pr'; for method 'thornthwaite' 'tmean' is required. The units +for temperature variables ('tmin', 'tmax' and 'tmean') need to be in Celcius +degrees; the units for precipitation ('pr') need to be in mm/month. +Currently the function works only with monthly data from different years.} + +\item{dates}{An array of temporal dimensions containing the Dates of +'data'. It must be of class 'Date' or 'POSIXct'.} + +\item{lat}{A numeric vector containing the latitude values of 'data'.} + +\item{pet_method}{A character string indicating the method used to compute +the potential evapotranspiration. The accepted methods are: +'hargreaves' and 'hargreaves_modified', that require the data to have +variables tmin and tmax; and 'thornthwaite', that requires variable +'tmean'.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default it is set by 'latitude'.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\description{ +Compute the Potential Evapotranspiration (PET) that is the amount of +evaporation and transpiration that would occur if a sufficient water source +were available. This function calculate PET according to the Thornthwaite, +Hargreaves or Hargreaves-modified equations. +} +\details{ +For more information on the SPEI calculation, see functions +PeriodStandardization and PeriodAccumulation. +} +\examples{ +dims <- c(time = 3, syear = 3, ensemble = 1, latitude = 1) +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) +end_year <- 2012 +dates_exp <- as.POSIXct(c(paste0(2010:end_year, "-08-16"), + paste0(2010:end_year, "-09-15"), + paste0(2010:end_year, "-10-16")), "UTC") +dim(dates_exp) <- c(syear = 3, time = 3) +lat <- c(40) +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) +res <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + +} diff --git a/man/PeriodStandardization.Rd b/man/PeriodStandardization.Rd new file mode 100644 index 0000000000000000000000000000000000000000..94bbee12044d81075d726b42d7f7757c465c3574 --- /dev/null +++ b/man/PeriodStandardization.Rd @@ -0,0 +1,125 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PeriodStandardization.R +\name{PeriodStandardization} +\alias{PeriodStandardization} +\title{Compute the Standardization of Precipitation-Evapotranspiration Index} +\usage{ +PeriodStandardization( + data, + data_cor = NULL, + dates = NULL, + time_dim = "syear", + leadtime_dim = "time", + memb_dim = "ensemble", + ref_period = NULL, + handle_infinity = FALSE, + method = "parametric", + distribution = "log-Logistic", + params = NULL, + return_params = FALSE, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{A multidimensional array containing the data to be standardized.} + +\item{data_cor}{A multidimensional array containing the data in which the +standardization should be applied using the fitting parameters from 'data'.} + +\item{dates}{An array containing the dates of the data with the same time +dimensions as the data. It is optional and only necessary for using the +parameter 'ref_period' to select a reference period directly from dates.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'syear'.} + +\item{leadtime_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{memb_dim}{A character string indicating the name of the dimension in +which the ensemble members are stored. When set it to NULL, threshold is +computed for individual members.} + +\item{ref_period}{A list with two numeric values with the starting and end +points of the reference period used for computing the index. The default +value is NULL indicating that the first and end values in data will be +used as starting and end points.} + +\item{handle_infinity}{A logical value wether to return infinite values (TRUE) +or not (FALSE). When it is TRUE, the positive infinite values (negative +infinite) are substituted by the maximum (minimum) values of each +computation step, a subset of the array of dimensions time_dim, leadtime_dim +and memb_dim.} + +\item{method}{A character string indicating the standardization method used. +If can be: 'parametric' or 'non-parametric'. It is set to 'parametric' by +default.} + +\item{distribution}{A character string indicating the name of the distribution +function to be used for computing the SPEI. The accepted names are: +'log-Logistic' and 'Gamma'. It is set to 'log-Logistic' by default. The +'Gamma' method only works when only precipitation is provided and other + variables are 0 because it is positive defined (SPI indicator).} + +\item{params}{An optional parameter that needs to be a multidimensional array +with named dimensions. This option overrides computation of fitting +parameters. It needs to be of same time dimensions (specified in 'time_dim' +and 'leadtime_dim') of 'data' and a dimension named 'coef' with the length +of the coefficients needed for the used distribution (for 'Gamma' coef +dimension is of lenght 2, for 'log-Logistic' is of length 3). It also needs +to have a leadtime dimension (specified in 'leadtime_dim') of length 1. It +will only be used if 'data_cor' is not provided.} + +\item{return_params}{A logical value indicating wether to return parameters +array (TRUE) or not (FALSE). It is FALSE by default.} + +\item{na.rm}{A logical value indicating whether NA values should be removed +from data. It is FALSE by default. If it is FALSE and there are NA values, +standardization cannot be carried out for those coordinates and therefore, +the result will be filled with NA for the specific coordinates. If it is +TRUE, if the data from other dimensions except time_dim and leadtime_dim is +not reaching 4 values, it is not enough values to estimate the parameters +and the result will include NA.} + +\item{ncores}{An integer value indicating the number of cores to use in +parallel computation.} +} +\value{ +A multidimensional array containing the standardized data. +If 'data_cor' is provided the array will be of the same dimensions as +'data_cor'. If 'data_cor' is not provided, the array will be of the same +dimensions as 'data'. The parameters of the standardization will only be +returned if 'return_params' is TRUE, in this case, the output will be a list +of two objects one for the standardized data and one for the parameters. +} +\description{ +The Standardization of the data is the last step of computing the SPEI +indicator. With this function the data is fit to a probability distribution to +transform the original values to standardized units that are comparable in +space and time and at different SPEI time scales. +} +\details{ +Next, some specifications for the calculation of the standardization will be +discussed. If there are NAs in the data and they are not removed with the +parameter 'na.rm', the standardization cannot be carried out for those +coordinates and therefore, the result will be filled with NA for the +specific coordinates. When NAs are not removed, if the length of the data for +a computational step is smaller than 4, there will not be enough data for +standarize and the result will be also filled with NAs for that coordinates. +About the distribution used to fit the data, there are only two possibilities: +'log-logistic' and 'Gamma'. The 'Gamma' method only works when only +precipitation is provided and other variables are 0 because it is positive +defined (SPI indicator). When only 'data' is provided ('data_cor' is NULL) the +standardization is computed with cross validation. For more information about +SPEI, see functions PeriodPET and PeriodAccumulation. +} +\examples{ +dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +data <- array(rnorm(600, -194.5, 64.8), dim = dims) +datacor <- array(rnorm(100, -217.8, 68.29), dim = dimscor) + +SPEI <- PeriodStandardization(data = data) +SPEIcor <- PeriodStandardization(data = data, data_cor = datacor) +} diff --git a/tests/testthat/test-PeriodAccumulation.R b/tests/testthat/test-PeriodAccumulation.R index 441b0213eebd2ea835ab9572ef6b854119bc6dfc..9dcbcf9c44cb96b10cbe0c520e875e7ad70dfd8e 100644 --- a/tests/testthat/test-PeriodAccumulation.R +++ b/tests/testthat/test-PeriodAccumulation.R @@ -1,25 +1,72 @@ library(CSTools) +# dat1 +dat1 <- array(1:6, dim = c(sdate = 2, ftime = 3, member = 1)) +dat1_1 <- dat1 +class(dat1_1) <- 's2dv_cube' +dat1_2 <- NULL +dat1_2$data <- dat1 +class(dat1_2) <- 's2dv_cube' + +# dat2 +dat2 <- array(1:6, dim = c(sdate = 2, time = 3, member = 1)) +dates2 <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("03-04-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("03-04-2001", format = "%d-%m-%Y"), by = 'day')) +dim(dates2) <- c(time = 3, sdate = 2) + +# exp1 +exp <- NULL +exp$data <- array(1:(1 * 3 * 214 * 2), + c(memb = 1, sdate = 3, ftime = 214, lon = 2)) +exp$dims <- dim(exp$data) +exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), + as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2001", format = "%d-%m-%Y"), + as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-04-2002", format = "%d-%m-%Y"), + as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) +dim(exp$attrs$Dates) <- c(ftime = 214, sdate = 3) +class(exp) <- 's2dv_cube' + ############################################## -test_that("1. Sanity Checks", { +test_that("1. Initial checks", { + # s2dv_cube expect_error( - PeriodAccumulation('x'), - "Parameter 'data' must be numeric." + CST_PeriodAccumulation(array(1)), + "Parameter 'data' must be of the class 's2dv_cube'." ) - expect_equal( - PeriodAccumulation(1), - 1 + expect_error( + CST_PeriodAccumulation(data = dat1_1, start = list(1,2), end = list(2,3)), + "Parameter 'data' doesn't have 's2dv_cube' structure. Use PeriodAccumulation instead." ) - expect_equal( - PeriodAccumulation(1, time_dim = 'x'), - 1 + # Dates subset + expect_warning( + CST_PeriodAccumulation(data = dat1_2, start = list(1,2), end = list(2,3), + time_dim = 'ftime'), + paste0("Dimensions in 'data' element 'attrs$Dates' are missed and ", + "all data would be used."), + fixed = TRUE + ) + # data + expect_error( + PeriodAccumulation('x'), + "Parameter 'data' must be numeric." ) expect_error( PeriodAccumulation(data = NULL), "Parameter 'data' cannot be NULL." ) + # time_dim + expect_error( + PeriodAccumulation(data = dat2, time_dim = 'ftime', rollwidth = 1, + dates = dates2), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + # start and end expect_error( - PeriodAccumulation(1, dates = '2000-01-01', end = 3, start = 4), + PeriodAccumulation(dat2, dates = '2000-01-01', end = 3, start = 4), paste("Parameter 'start' and 'end' must be lists indicating", "the day and the month of the period start and end.") ) @@ -53,18 +100,6 @@ test_that("1. Sanity Checks", { ############################################## test_that("2. Seasonal", { - exp <- NULL - exp$data <- array(1:(1 * 3 * 214 * 2), - c(memb = 1, sdate = 3, time = 214, lon = 2)) - exp$dims <- dim(exp$data) - exp$attrs$Dates <- c(seq(as.Date("01-04-2000", format = "%d-%m-%Y"), - as.Date("31-10-2000", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2001", format = "%d-%m-%Y"), - as.Date("31-10-2001", format = "%d-%m-%Y"), by = 'day'), - seq(as.Date("01-04-2002", format = "%d-%m-%Y"), - as.Date("31-10-2002", format = "%d-%m-%Y"), by = 'day')) - dim(exp$attrs$Dates) <- c(time = 214, sdate = 3) - class(exp) <- 's2dv_cube' output <- exp output$data <- array(c(sum(exp$data[1,1,21:82,1]), sum(exp$data[1,2,21:82,1]), sum(exp$data[1,3,21:82,1]), sum(exp$data[1,1,21:82,2]), @@ -72,7 +107,8 @@ test_that("2. Seasonal", { c(memb = 1, sdate = 3, lon = 2)) expect_equal( - CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6))$data, + CST_PeriodAccumulation(exp, start = list(21, 4), end = list(21, 6), + time_dim = 'ftime')$data, output$data ) }) @@ -133,3 +169,56 @@ test_that("3. Subset Dates and check time_bounds", { TRUE ) }) + +############################################## + +test_that("4. Rolling", { + # dates + expect_error( + PeriodAccumulation(data = dat2, rollwidth = 1), + "Parameter 'dates' is NULL. Cannot compute the rolling accumulation." + ) + # rollwidth + expect_error( + PeriodAccumulation(data = dat2, rollwidth = 'a', dates = dates2), + "Parameter 'rollwidth' must be a numeric value." + ) + expect_error( + PeriodAccumulation(data = dat2, rollwidth = 5, dates = dates2), + "Cannot compute accumulation of 5 months because loaded data has only 3 months." + ) + # sdate_dim + expect_error( + PeriodAccumulation(data = dat2, rollwidth = 1, dates = dates2, + sdate_dim = 'syear'), + "Parameter 'sdate_dim' is not found in 'data' dimension." + ) + # Output checks + expect_equal( + PeriodAccumulation(data = dat2, rollwidth = -2, dates = dates2, frequency = 'daily'), + array(c(4,6,8, 10, NA, NA), dim = c(sdate = 2, time = 3, member = 1)) + ) + expect_equal( + PeriodAccumulation(data = dat2, rollwidth = 3, dates = dates2, frequency = 'daily'), + array(c(rep(NA, 4), 9, 12), dim = c(sdate = 2, time = 3, member = 1)) + ) + dat2_1 <- dat2 + dat2_1[1,1,1] <- NA + expect_equal( + PeriodAccumulation(data = dat2_1, rollwidth = 2, dates = dates2, na.rm = FALSE, + frequency = 'daily'), + array(c(rep(NA, 3), 6,8,10), dim = c(sdate = 2, time = 3, member = 1)) + ) + + # Test rolling with start and end + expect_equal( + PeriodAccumulation(data = dat2, rollwidth = 1, dates = dates2, + start = list(1, 4), end = list(2, 4), frequency = 'daily'), + array(c(1, 2, 3, 4), dim = c(sdate = 2, time = 2, member = 1)) + ) + expect_equal( + PeriodAccumulation(data = dat2, rollwidth = 2, dates = dates2, + start = list(1, 4), end = list(2, 4), frequency = 'daily'), + array(c(NA, NA, 4, 6), dim = c(sdate = 2, time = 2, member = 1)) + ) +}) diff --git a/tests/testthat/test-PeriodPET.R b/tests/testthat/test-PeriodPET.R new file mode 100644 index 0000000000000000000000000000000000000000..632d1c48ac6a7b77af048488646108e25b7d49d0 --- /dev/null +++ b/tests/testthat/test-PeriodPET.R @@ -0,0 +1,197 @@ +############################################## + +# cube1 +cube1 <- NULL +cube1$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube1) <- 's2dv_cube' + +# cube2 +cube2 <- NULL +cube2$data <- array(rnorm(10), dim = c(lat = 2, time = 5)) +class(cube2) <- 's2dv_cube' +cube2$coords <- list(lat = 1:2) + +# dat1 +dims <- c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + +set.seed(1) +exp_tasmax <- array(rnorm(360, 27.73, 5.26), dim = dims) +set.seed(2) +exp_tasmin <- array(rnorm(360, 14.83, 3.86), dim = dims) +set.seed(3) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims) + +dates_exp <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp) <- c(syear = 6, time = 3) + +lat <- c(40,40.1) + +exp1 <- list('tmax' = exp_tasmax, 'tmin' = exp_tasmin, 'pr' = exp_prlr) + +# dat2 +dims2 <- c(styear = 6, ftime = 3, lat = 2, lon = 1, member = 10) + +set.seed(1) +exp_tas <- array(rnorm(100, 17.34, 9.18), dim = dims2) +set.seed(2) +exp_prlr <- array(rnorm(360, 21.19, 25.64), dim = dims2) + +dates_exp2 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates_exp2) <- c(sday = 1, sweek = 1, styear = 6, ftime = 3) + +lat <- c(40,40.1) + +exp2 <- list('tmean' = exp_tas, 'pr' = exp_prlr) + +# cube4 +cube4_exp <- lapply(exp1, function(x) { + suppressWarnings( + CSTools::s2dv_cube(data = x, coords = list(latitude = c(40, 40.1)), + varName = 'test', Dates = as.POSIXct(dates_exp)) + ) +}) + +############################################## + +test_that("1. Initial checks CST_PeriodPET", { + # Check 's2dv_cube' + expect_error( + CST_PeriodPET(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + CST_PeriodPET(data = array(10)), + "Parameter 'data' must be a list of 's2dv_cube' class." + ) + # latitude + expect_error( + CST_PeriodPET(data = list(cube1)), + paste0("Spatial coordinate names of parameter 'data' do not match any ", + "of the names accepted by the package.") + ) + # Dates + expect_error( + CST_PeriodPET(data = list(cube2)), + paste0("Element 'Dates' is not found in 'attrs' list of 'data'. ", + "See 's2dv_cube' object description in README file for more ", + "information.") + ) +}) + +############################################## + +test_that("1. Initial checks PeriodPET", { + # data + expect_error( + PeriodPET(data = NULL), + "Parameter 'data' needs to be a named list with the needed variables." + ) + expect_error( + PeriodPET(data = list(1)), + "Parameter 'data' needs to be a named list with the variable names." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10))), + "Parameter 'data' needs to be a list of arrays with dimension names." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 11)))), + "Parameter 'data' variables need to have the same dimensions." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(ftime = 10)))), + "Parameter 'data' variables need to have the same dimensions." + ) + # lat + expect_error( + PeriodPET(data = exp1, lat = 'lat'), + "Parameter 'lat' must be numeric." + ) + expect_error( + PeriodPET(data = list(tasmax = array(10, c(time = 10)), + tasmin = array(10, c(time = 10))), lat = 1:2), + "Parameter 'data' must have 'lat_dim' dimension." + ) + # data (2) + expect_warning( + PeriodPET(data = exp1, pet_method = '1', dates = dates_exp, lat = lat), + paste0("Parameter 'pet_method' needs to be 'hargreaves' or ", + "'hargreaves_modified'. It is set to 'hargreaves_modified'.") + ) + # time_dim + expect_error( + PeriodPET(data = exp1, time_dim = 1, dates = dates_exp, lat = lat), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + lat_dim = 'lat', pet_method = 'thornthwaite'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + # leadtime_dim + expect_error( + PeriodPET(data = exp1, leadtime_dim = 1, dates = dates_exp, lat = lat), + "Parameter 'leadtime_dim' must be a character string." + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + lat_dim = 'lat', time_dim = 'ftime', pet_method = 'thornthwaite'), + "Parameter 'leadtime_dim' is not found in 'data' dimension." + ) + # lat_dim + expect_error( + PeriodPET(data = exp1, lat_dim = 1, dates = dates_exp, lat = lat) + ) + expect_error( + PeriodPET(data = exp2, lat = lat, dates = dates_exp2), + "Parameter 'data' must have 'lat_dim' dimension." + ) + # na.rm + expect_error( + PeriodPET(data = exp1, na.rm = 1.5, dates = dates_exp, lat = lat), + "Parameter 'na.rm' must be one logical value." + ) + # ncores + expect_error( + PeriodPET(data = exp1, ncores = 1.5, dates = dates_exp, lat = lat), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## + +test_that("2. Output checks", { + res01 <- CST_PeriodPET(data = cube4_exp) + res1 <- PeriodPET(data = exp1, lat = lat, dates = dates_exp) + res2 <- PeriodPET(data = exp2, lat = lat, dates = dates_exp2, + pet_method = c('thornthwaite'), + lat_dim = 'lat', time_dim = 'styear', + leadtime_dim = 'ftime') + # structure + expect_equal( + names(res01), + c('data', 'dims', 'coords', 'attrs') + ) + # dims + expect_equal( + dim(res1), + c(syear = 6, time = 3, latitude = 2, longitude = 1, ensemble = 10) + ) + # values + expect_equal( + res1[1:4], + c(137.77342, 154.55548, 65.72859, 222.20438), + tolerance = 0.0001 + ) + expect_equal( + res2[1:4], + c(77.76124, 118.94212, 66.57568, 185.67074), + tolerance = 0.0001 + ) +}) + +############################################## \ No newline at end of file diff --git a/tests/testthat/test-PeriodStandardization.R b/tests/testthat/test-PeriodStandardization.R new file mode 100644 index 0000000000000000000000000000000000000000..7673db2d070cef7f71506bf80acbef78694ae42e --- /dev/null +++ b/tests/testthat/test-PeriodStandardization.R @@ -0,0 +1,230 @@ +############################################## + +# cube1 +dims <- c(syear = 6, time = 3, latitude = 2, ensemble = 25) +cube1 <- NULL +cube1$data <- array(rnorm(600, -204.1, 78.1), dim = dims) +class(cube1) <- 's2dv_cube' + +# dat1 +dims <- c(syear = 6, time = 2, latitude = 2, ensemble = 25) +dimscor <- c(syear = 1, time = 2, latitude = 2, ensemble = 25) +dimscor1_1 <- c(syear = 1, time = 3, latitude = 2, ensemble = 25) +set.seed(1) +dat1 <- array(rnorm(600, -194.5, 64.8), dim = dims) +set.seed(2) +datcor1 <- array(rnorm(100, -217.8, 68.29), dim = dimscor) +set.seed(3) +datcor1_1 <- array(rnorm(100, -217.8, 68.29), dim = dimscor1_1) + +# dates1 +dates1 <- as.Date(c(paste0(2010:2015, "-08-16"), paste0(2010:2015, "-09-15"), + paste0(2010:2015, "-10-16"))) +dim(dates1) <- c(syear = 6, time = 3) + +# dat2 +dims2 <- c(styear = 6, ftime = 2, lat = 2, member = 25) +dimscor2_1 <- c(syear = 1, ftime = 2, lat = 2, ensemble = 25) +dimscor2_2 <- c(styear = 1, ftime = 2, lat = 2, ensemble = 25) +set.seed(1) +dat2 <- array(rnorm(600, -194.5, 64.8), dim = dims2) +set.seed(2) +datcor2_1 <- array(rnorm(100, -194.5, 64.8), dim = dimscor2_1) +set.seed(2) +datcor2_2 <- array(rnorm(100, -194.5, 64.8), dim = dimscor2_2) + +# dat3 +dims3 <- c(syear = 6, time = 2, lat = 2, ensemble = 25) +set.seed(1) +dat3 <- array(abs(rnorm(600, 21.19, 25.64)), dim = dims) + + +############################################## + +test_that("1. Initial checks CST_PeriodStandardization", { + # Check 's2dv_cube' + expect_error( + CST_PeriodStandardization(data = NULL), + "Parameter 'data' cannot be NULL." + ) + expect_error( + CST_PeriodStandardization(data = array(10)), + "Parameter 'data' must be of 's2dv_cube' class." + ) +}) + +############################################## + +test_that("1. Initial checks PeriodStandardization", { + # data + expect_error( + PeriodStandardization(data = NULL), + "Parameter 'data' must be a numeric array." + ) + expect_error( + PeriodStandardization(data = array(1)), + "Parameter 'data' must have dimension names." + ) + # data_cor + expect_error( + PeriodStandardization(data = dat1, data_cor = 1), + "Parameter 'data_cor' must be a numeric array." + ) + expect_error( + PeriodStandardization(data = dat1, data_cor = array(1:2)), + "Parameter 'data_cor' must have dimension names." + ) + # time_dim + expect_error( + PeriodStandardization(data = dat1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + PeriodStandardization(data = dat2), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + PeriodStandardization(data = dat2, data_cor = dat1, time_dim = 'ftime'), + "Parameter 'time_dim' is not found in 'data_cor' dimension." + ) + # leadtime_dim + expect_error( + PeriodStandardization(data = dat1, leadtime_dim = 1), + "Parameter 'leadtime_dim' must be a character string." + ) + expect_error( + PeriodStandardization(data = dat2, time_dim = 'ftime'), + "Parameter 'leadtime_dim' is not found in 'data' dimension." + ) + expect_error( + PeriodStandardization(data = dat2, data_cor = datcor2_1, time_dim = 'ftime', + leadtime_dim = 'styear'), + "Parameter 'leadtime_dim' is not found in 'data_cor' dimension." + ) + # memb_dim + expect_error( + PeriodStandardization(data = dat1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + PeriodStandardization(data = dat2, time_dim = 'styear', leadtime_dim = 'ftime'), + "Parameter 'memb_dim' is not found in 'data' dimension." + ) + expect_error( + PeriodStandardization(data = dat2, data_cor = datcor2_2, time_dim = 'styear', + leadtime_dim = 'ftime', memb_dim = 'member'), + "Parameter 'memb_dim' is not found in 'data_cor' dimension." + ) + # data_cor (2) + expect_error( + PeriodStandardization(data = dat1, data_cor = datcor1_1), + paste0("Parameter 'data' and 'data_cor' have dimension 'leadtime_dim' ", + "of different length.") + ) + # ref_period + expect_warning( + PeriodStandardization(data = dat1, ref_period = list(1,2)), + paste0("Parameter 'dates' is not provided so 'ref_period' can't be ", + "used.") + ) + expect_warning( + PeriodStandardization(data = dat1, ref_period = list(2020, 2021), + dates = dates1), + paste0("Parameter 'ref_period' contain years outside the dates. ", + "It will not be used.") + ) + # handle_infinity + # method + # distribution + # na.rm + expect_error( + PeriodStandardization(data = dat1, na.rm = 1.5), + "Parameter 'na.rm' must be logical." + ) + # ncores + expect_error( + PeriodStandardization(data = dat1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## + +test_that("2. Output checks", { + # CST_PeriodStandardization + SPEI_s2dv_cube <- CST_PeriodStandardization(data = cube1) + expect_equal( + names(SPEI_s2dv_cube), + c('data', 'attrs') + ) + # PeriodStandardization + SPEI <- PeriodStandardization(data = dat1) + expect_equal( + dim(SPEI), + c(syear = 6, time = 2, latitude = 2, ensemble = 25) + ) + expect_equal( + SPEI[,1,1,1], + c(-0.4842599, 0.4072574, -0.8119087, 1.5490196, 0.5467044, -0.7719460), + tolerance = 0.0001 + ) + SPEIcor <- PeriodStandardization(data = dat1, data_cor = datcor1) + expect_equal( + dim(SPEIcor), + c(syear = 1, time = 2, latitude = 2, ensemble = 25) + ) + expect_equal( + SPEIcor[,,1,1], + c(-1.232981, -0.309125), + tolerance = 0.0001 + ) + # time_dim, leadtime_dim, memb_dim + expect_equal( + PeriodStandardization(data = dat2, time_dim = 'ftime', + leadtime_dim = 'styear', memb_dim = 'member')[1:4], + c(-0.8229475, 0.1918119, -0.7627081, 0.9955730), + tolerance = 0.0001 + ) + # ref_period + dates <- dates1 + ref_period = list(2011, 2014) + # handle_infinity + expect_equal( + any(is.infinite(PeriodStandardization(data = dat1, handle_infinity = T))), + FALSE + ) + # method + expect_equal( + PeriodStandardization(data = dat1, method = 'non-parametric')[,1,1,1], + c(-0.5143875, 0.3492719, -0.7163839, 1.6413758, 0.4580046, -0.6949654), + tolerance = 0.0001 + ) + # distribution + expect_equal( + all(is.na(PeriodStandardization(data = dat1, distribution = 'Gamma'))), + TRUE + ) + expect_equal( + PeriodStandardization(data = dat3, distribution = 'Gamma')[1:5], + c(-1.2059075, 0.3285372, -3.1558450, 1.5034088, 0.5123442) + ) + # na.rm + dat1[1,1,1,1] <- NA + expect_equal( + all(is.na(PeriodStandardization(data = dat1, na.rm = FALSE)[,1,1,1])), + TRUE + ) + expect_equal( + !all(is.na(PeriodStandardization(data = dat1, na.rm = TRUE)[,1,1,1])), + TRUE + ) + expect_equal( + any(is.na(PeriodStandardization(data = dat1, data_cor = datcor1, na.rm = TRUE))), + FALSE + ) + expect_equal( + any(is.na(PeriodStandardization(data = dat1, data_cor = datcor1, na.rm = FALSE))), + TRUE + ) + # ncores +})