diff --git a/NAMESPACE b/NAMESPACE index 7bee168e0f5ab17ef1e95495c07d7eed6d8857f3..bafe27bad1f0612d7521b11d0ab8a8e715c722a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(AccumulationExceedingThreshold) +export(CST_AccumulationExceedingThreshold) export(CST_PeriodAccumulation) export(CST_PeriodMean) export(CST_SelectPeriodOnData) diff --git a/R/AccumulationExceedingThreshold.R b/R/AccumulationExceedingThreshold.R new file mode 100644 index 0000000000000000000000000000000000000000..3782d3b6d211dd3dd6f43dda84c28fac6354e6d5 --- /dev/null +++ b/R/AccumulationExceedingThreshold.R @@ -0,0 +1,202 @@ +#'Accumulation of a variable when Exceeding (not exceeding) a Threshold +#' +#'The accumulation (sum) of a variable in the days (or time steps) that the variable is exceeding (or not exceeding) a threshold during a period. The threshold provided must be +#'in the same units than the variable units, i.e. to use a percentile as a scalar, +#'the function \code{Threshold} or \code{QThreshold} may be needed. +#'Providing mean daily temperature data, the following agriculture indices for heat stress can be obtained by using this function: +#'\itemize{ +#' \item\code{GDD}{Summation of daily differences between daily average temperatures and 10°C between April 1st and October 31st}} +#' +#'@param data a 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools. +#'@param threshold a 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible. +#'@param op a opartor '>' (by default), '<', '>=' or '<='. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. +#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A 's2dv_cube' object containing the indicator in the element \code{data}. +#' +#'@import multiApply +#'@examples +#'exp <- CSTools::lonlat_data$exp +#'DOT <- CST_AccumulationExceedingThreshold(exp, threshold = 280) +#'# Assuming exp$data is already (tasmax + tasmin)/2 - 10 +#'exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +#'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'GDD <- CST_AccumulationExceedingThreshold(exp, threshold = 0, +#' start = list(1, 4), end = list(31, 10)) +#' +#'@export +CST_AccumulationExceedingThreshold <- function(data, threshold, op = '>', + start = NULL, end = NULL, + time_dim = 'ftime', + na.rm = FALSE, ncores = NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + # when subsetting is needed, dimensions are also needed: + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$Dates$start))) { + if (length(data$Dates$start) != dim(data$data)[time_dim]) { + if (length(data$Dates$start) == + prod(dim(data$data)[time_dim] * dim(data$data)['sdate'])) { + dim(data$Dates$start) <- c(dim(data$data)[time_dim], + dim(data$data)['sdate']) + } + } else { + warning("Dimensions in 'data' element 'Dates$start' are missed and", + "all data would be used.") + } + } + } + if (inherits(threshold, 's2dv_cube')) { + threshold <- threshold$data + } + total <- AccumulationExceedingThreshold(data$data, data$Dates[[1]], + threshold = threshold, op = op, + start = start, end = end, time_dim = time_dim, + na.rm = na.rm, ncores = ncores) + data$data <- total + if (!is.null(start) && !is.null(end)) { + data$Dates <- SelectPeriodOnDates(dates = data$Dates$start, + start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + return(data) +} +#'Accumulation of a variable when Exceeding (not exceeding) a Threshold +#' +#'The accumulation (sum) of a variable in the days (or time steps) that the variable is exceeding (or not exceeding) a threshold during a period. The threshold provided must be +#'in the same units than the variable units, i.e. to use a percentile as a scalar, +#'the function \code{Threshold} or \code{QThreshold} may be needed. +#'Providing mean daily temperature data, the following agriculture indices for heat stress can be obtained by using this function: +#'\itemize{ +#' \item\code{GDD}{Summation of daily differences between daily average temperatures and 10°C between April 1st and October 31st}} +#' +#'@param data a multidimensional array with named dimensions. +#'@param threshold a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. +#'@param op a opartor '>' (by default), '<', '>=' or '<='. +#'@param dates a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. +#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A multidimensional array with named dimensions. +#' +#'@import multiApply +#'@examples +#'exp <- CSTools::lonlat_data$exp$data +#'AET <- AccumulationExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +#'# Assuming data is already (tasmax + tasmin)/2 - 10 +#'data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), +#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'GDD <- AccumulationExceedingThreshold(data, threshold = 0, start = list(1, 4), end = list(31, 10)) +#'@export +AccumulationExceedingThreshold <- function(data, threshold, op = '>', + dates = NULL, start = NULL, end = NULL, + time_dim = 'time', na.rm = FALSE, + ncores = NULL) { + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + if (!is.array(data)) { + dim(data) <- length(data) + names(dim(data)) <- time_dim + } + if (is.null(threshold)) { + stop("Parameter 'threshold' cannot be NULL.") + } + if (!is.numeric(threshold)) { + stop("Parameter 'threshold' must be numeric.") + } + if (!is.array(threshold) && length(threshold) > 1) { + dim(threshold) <- length(threshold) + names(dim(threshold)) <- time_dim + } else if (length(threshold) == 1) { + dim(threshold) <- NULL + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have named dimensions.") + } + if (is.null(names(dim(threshold))) && length(threshold) > 1) { + stop("Parameter 'threshold' must have named dimensions.") + } + # This check doen't seem necessary. It limits flexibility. + #common_dims <- which(names(dim(data)) %in% names(dim(threshold))) + #if (length(threshold) > 1) { + # if (any(dim(data)[common_dims] != + # dim(threshold)[which(names(dim(threshold)) %in% names(dim(data)))])) { + # stop("Parameter 'data' and 'threshold' must have the same length on common dimensions.") + # } + #} + if (!is.null(dates)) { + if (!is.null(start) && !is.null(end)) { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (all(time_dim %in% names(dim(threshold)))) { + if (dim(threshold)[time_dim] == dim(data)[time_dim]) { + threshold <- SelectPeriodOnData(threshold, dates, start, end, + time_dim = time_dim, ncores = ncores) + } + } + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } + } + if (is.null(dim(threshold))) { + total <- Apply(list(data), target_dims = time_dim, + fun = .sumexceedthreshold, + y = threshold, op = op, na.rm = na.rm, + ncores = ncores)$output1 + } else if (all(time_dim %in% names(dim(threshold)))) { + total <- Apply(list(data, threshold), + target_dims = list(time_dim, time_dim), + fun = .sumexceedthreshold, op = op, na.rm = na.rm, + ncores = ncores)$output1 + } else { + total <- Apply(list(data, threshold), + target_dims = list(time_dim, NULL), + fun = .sumexceedthreshold, op = op, na.rm = na.rm, + ncores = ncores)$output1 + } + return(total) +} + +#x <- 1:10 +#y <- 3 +#.sumexceedthreshold(x, y, '>', T) +.sumexceedthreshold <- function(x, y, op, na.rm) { + if (op == '>') { + res <- sum(x[x > y], na.rm = na.rm) + } else if (op == '<') { + res <- sum(x[x < y], na.rm = na.rm) + } else if (op == '<=') { + res <- sum(x[x <= y], na.rm = na.rm) + } else { + res <- sum(x[x >= y], na.rm = na.rm) + } + return(res) +} + diff --git a/man/AccumulationExceedingThreshold.Rd b/man/AccumulationExceedingThreshold.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3e1218df3a8d5442a06658dee8f7459c133a2315 --- /dev/null +++ b/man/AccumulationExceedingThreshold.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AccumulationExceedingThreshold.R +\name{AccumulationExceedingThreshold} +\alias{AccumulationExceedingThreshold} +\title{Accumulation of a variable when Exceeding (not exceeding) a Threshold} +\usage{ +AccumulationExceedingThreshold( + data, + threshold, + op = ">", + dates = NULL, + start = NULL, + end = NULL, + time_dim = "time", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{a multidimensional array with named dimensions.} + +\item{threshold}{a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length.} + +\item{op}{a opartor '>' (by default), '<', '>=' or '<='.} + +\item{dates}{a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} + +\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} +} +\value{ +A multidimensional array with named dimensions. +} +\description{ +The accumulation (sum) of a variable in the days (or time steps) that the variable is exceeding (or not exceeding) a threshold during a period. The threshold provided must be +in the same units than the variable units, i.e. to use a percentile as a scalar, +the function \code{Threshold} or \code{QThreshold} may be needed. +Providing mean daily temperature data, the following agriculture indices for heat stress can be obtained by using this function: +\itemize{ + \item\code{GDD}{Summation of daily differences between daily average temperatures and 10°C between April 1st and October 31st}} +} +\examples{ +exp <- CSTools::lonlat_data$exp$data +AET <- AccumulationExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +# Assuming data is already (tasmax + tasmin)/2 - 10 +data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), + c(memb = 5, sdate = 3, time = 214, lon = 2)) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +GDD <- AccumulationExceedingThreshold(data, threshold = 0, start = list(1, 4), end = list(31, 10)) +} diff --git a/man/CST_AccumulationExceedingThreshold.Rd b/man/CST_AccumulationExceedingThreshold.Rd new file mode 100644 index 0000000000000000000000000000000000000000..00eaef4ec438a7a97171ec5ef6fcc09ba7909507 --- /dev/null +++ b/man/CST_AccumulationExceedingThreshold.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AccumulationExceedingThreshold.R +\name{CST_AccumulationExceedingThreshold} +\alias{CST_AccumulationExceedingThreshold} +\title{Accumulation of a variable when Exceeding (not exceeding) a Threshold} +\usage{ +CST_AccumulationExceedingThreshold( + data, + threshold, + op = ">", + start = NULL, + end = NULL, + time_dim = "ftime", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{a 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools.} + +\item{threshold}{a 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible.} + +\item{op}{a opartor '>' (by default), '<', '>=' or '<='.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} + +\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} +} +\value{ +A 's2dv_cube' object containing the indicator in the element \code{data}. +} +\description{ +The accumulation (sum) of a variable in the days (or time steps) that the variable is exceeding (or not exceeding) a threshold during a period. The threshold provided must be +in the same units than the variable units, i.e. to use a percentile as a scalar, +the function \code{Threshold} or \code{QThreshold} may be needed. +Providing mean daily temperature data, the following agriculture indices for heat stress can be obtained by using this function: +\itemize{ + \item\code{GDD}{Summation of daily differences between daily average temperatures and 10°C between April 1st and October 31st}} +} +\examples{ +exp <- CSTools::lonlat_data$exp +DOT <- CST_AccumulationExceedingThreshold(exp, threshold = 280) +# Assuming exp$data is already (tasmax + tasmin)/2 - 10 +exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +GDD <- CST_AccumulationExceedingThreshold(exp, threshold = 0, + start = list(1, 4), end = list(31, 10)) + +} diff --git a/tests/testthat/test-AccumulationExceedingThreshold.R b/tests/testthat/test-AccumulationExceedingThreshold.R new file mode 100644 index 0000000000000000000000000000000000000000..03367fa0bb41c08328018a44885ccccbbf5f1461 --- /dev/null +++ b/tests/testthat/test-AccumulationExceedingThreshold.R @@ -0,0 +1,86 @@ +context("Generic tests") +test_that("Sanity checks", { + #source("csindicators/R/AccumulationExceedingThreshold.R") + expect_error(AccumulationExceedingThreshold(NULL), + "Parameter 'data' cannot be NULL.") + expect_error(AccumulationExceedingThreshold('x'), + "Parameter 'data' must be numeric.") + data <- 1:20 + expect_error(AccumulationExceedingThreshold(data, NULL), + "Parameter 'threshold' cannot be NULL.") + expect_error(AccumulationExceedingThreshold(data, 'x'), + "Parameter 'threshold' must be numeric.") + threshold <- 10 + expect_equal(AccumulationExceedingThreshold(data, threshold), 155) + dim(data) <- c(2, 10) + expect_error(AccumulationExceedingThreshold(data, threshold), + "Parameter 'data' must have named dimensions.") + names(dim(data)) <- c('lat', 'time') + threshold <- array(1:2, 2) + expect_error(AccumulationExceedingThreshold(data, threshold), + "Parameter 'threshold' must have named dimensions.") + dim(threshold) <- c(time = 2) + data <- array(1:40, c(x = 2, ftime = 20)) + expect_error(AccumulationExceedingThreshold(data, threshold), + "Could not find dimension 'time' in 1th object provided in 'data'.") + threshold <- 10 + expect_equal(AccumulationExceedingThreshold(data, threshold, time_dim = 'ftime'), + array(c(375, 390), c(x = 2))) + dim(threshold) <- c(member = 1, ftime = 1) + expect_equal(AccumulationExceedingThreshold(data, threshold, time_dim = 'ftime'), + array(c(375, 390), c(x = 2))) + expect_equal(AccumulationExceedingThreshold(data, threshold, time_dim = 'x'), + array(c(rep(0,5), seq(23, 79, 4)), c(ftime = 20))) + expect_error(AccumulationExceedingThreshold(data, threshold, + time_dim = 'x', ncores = 'Z'), + "Parameter 'ncores' must be numeric") + + expect_equal(AccumulationExceedingThreshold(data, threshold, time_dim = 2), + array(c(375, 390), c(x = 2))) + # dimensions: + data <- array(1:20, c(time = 5, sdate = 2, lat = 2)) + # does this case made sense? + threshold <- array(1:5, c(time = 5)) + expect_equal(dim(AccumulationExceedingThreshold(data, threshold)), + c(sdate = 2, lat = 2)) + threshold <- array(1:2, c(lat = 2)) + expect_equal(dim(AccumulationExceedingThreshold(data, threshold)), + c(sdate = 2, lat = 2)) + data <- array(1:60, c(time = 5, fyear = 3, sdate = 2, lat = 2)) + expect_equal(dim(AccumulationExceedingThreshold(data, threshold, + time_dim = c('time', 'fyear'))), + c(sdate = 2, lat = 2)) + +}) + +test_that("Seasonal forecasts", { + + exp <- CSTools::lonlat_data$exp + exp$data <- exp$data[,1:4,1:2,,,] + res <- CST_AccumulationExceedingThreshold(exp, threshold = 280) + expect_equal(round(res$data[,2,2,2]), + c(0, 280, 281, 281)) + # GDD + exp <- array(NA, dim = c(member = 6, sdate = 3, ftime = 214, lat =4, lon = 4)) + exp1 <- drop(CSTools::lonlat_prec$data) * 86400000 + exp[,,1:31,,] <- exp1 + 10; exp[,,32:62,,] <- exp1 + 11 + exp[,,63:93,,] <- exp1 + 12; exp[,,94:124,,] <- exp1 + 13 + exp[,,125:155,,] <- exp1 + 14; exp[,,156:186,,] <- exp1 + 15 + exp[,,187:214,,] <- exp1[,,1:28,,] + 16 + Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), + as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "%d-%m-%Y"), + as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "%d-%m-%Y"), + as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) + GDD <- AccumulationExceedingThreshold(exp - 17, threshold = 0, dates = Dates, time_dim = 'ftime', + start = list(1, 4), end = list(31, 10), na.rm = TRUE) + expect_equal(round(GDD[,1,1,1]), + c(538, 372, 116, 525, 220, 330)) + expect_equal(dim(GDD), + c(member = 6, sdate = 3, lat =4, lon = 4)) + expect_error(AccumulationExceedingThreshold(exp - 17, threshold = 0, dates = Dates, start = list(1, 4), end = list(31, 10)), + "Could not find dimension 'time' in 1th object provided in 'data'.") + expect_equal(all(is.na(AccumulationExceedingThreshold(exp - 17, threshold = 0, dates = Dates, time_dim = 'ftime',start = list(1, 4), end = list(31, 10)))), + all(is.na(c(NA, NA)))) +})