From e5331abc0515572be15152cef127888249355b68 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 26 Jan 2021 19:43:41 +0100 Subject: [PATCH 01/16] TotalTimeExceedingThreshold with tests --- .Rbuildignore | 2 +- NAMESPACE | 2 + R/TotalTimeExceedingThreshold.R | 194 ++++++++++++++++++ man/CST_TotalTimeExceedingThreshold.Rd | 64 ++++++ man/TotalTimeExceedingThreshold.Rd | 67 ++++++ tests/testthat.R | 4 + .../test-TotalTimeExceedingThreshold.R | 27 +++ 7 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 R/TotalTimeExceedingThreshold.R create mode 100644 man/CST_TotalTimeExceedingThreshold.Rd create mode 100644 man/TotalTimeExceedingThreshold.Rd create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-TotalTimeExceedingThreshold.R diff --git a/.Rbuildignore b/.Rbuildignore index fa596e7..b2d8e5f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,4 +5,4 @@ ./.nc$ .*^(?!data)\.RData$ .*\.gitlab-ci.yml$ -^tests$ +#^tests$ diff --git a/NAMESPACE b/NAMESPACE index 80f1ec6..c3b2fb2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,9 @@ # Generated by roxygen2: do not edit by hand export(CST_PeriodAccumulation) +export(CST_TotalTimeExceedingThreshold) export(PeriodAccumulation) export(SelectPeriodOnData) export(SelectPeriodOnDates) +export(TotalTimeExceedingThreshold) import(multiApply) diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R new file mode 100644 index 0000000..49fceda --- /dev/null +++ b/R/TotalTimeExceedingThreshold.R @@ -0,0 +1,194 @@ +#'Total Time of a variable Exceeding (not exceeding) a Threshold +#' +#'The Total Time of a variable exceeding (or not) a Threshold returns the number of days +#'(if the data provided is daily, or the corresponding units to the data frequency provided) +#' that a variable is exceeding a threshold during a period. The threshold provided must be +#'in the same units than the variable units, i.e. to use a percentile as a threshold, +#'the function \code{Threshold} or \code{QThreshold} may be needed (see examples). +#'Providing maximum temperature daily data, the following agriculture indices for heat stress can be obtained by using this function: +#'\itemize{ +#' \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} +#' \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} +#' \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} +#'} +#' +#'@param data an 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools. +#'@param threshold an 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible. +#'@param op a opartor '>' (by default), '<', '>=' or '<='. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. +#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A 's2dv_cube' object containing the indicator in the element \code{data}. +#' +#'@import multiApply +#'@examples +#'exp <- CSTools::lonlat_data$exp +#'DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +#'exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), +#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'#threshold <- QThreshold() +#'SU35 <- CST_TotalTimeExceedingThreshold(exp, threshold = 35) +#'@export +CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', + start = NULL, end = NULL, + time_dim = 'time', + na.rm = FALSE, ncores = NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + # when subsetting is needed, dimensions are also needed: + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$Dates$start))) { + if (length(data$Dates$start) != dim(data$data)[time_dim]) { + if (length(data$Dates$start) == + prod(dim(data$data)[time_dim] * dim(data$data)['sdate'])) { + dim(data$Dates$start) <- c(dim(data$data)[time_dim], + dim(data$data)['sdate']) + } + } else { + warning("Dimensions in 'data' element 'Dates$start' are missed and", + "all data would be used.") + } + } + } + total <- TotalTimeExceedingThreshold(data$data, data$Dates[[1]], + threshold = threshold, op = op, + start = start, end = end, time_dim = time_dim, + na.rm = na.rm, ncores = ncores) + data$data <- total + if (!is.null(start) && !is.null(end)) { + data$Dates <- SelectPeriodOnDates(dates = data$Dates$start, + start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + return(data) +} +#'Total Time of a variable Exceeding (not exceeding) a Threshold +#' +#'The Total Time of a variable exceeding (or not) a Threshold returns the number of days +#'(if the data provided is daily, or the corresponding units to the data frequency provided) +#' that a variable is exceeding a threshold during a period. The threshold provided must be +#'in the same units than the variable units, i.e. to use a percentile as a threshold, +#'the function \code{Threshold} or \code{QThreshold} may be needed (see examples). +#'Providing maximum temperature daily data, the following agriculture indices for heat stress can be obtained by using this function: +#'\itemize{ +#' \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} +#' \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} +#' \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} +#'} +#' +#'@param data a multidimensional array with named dimensions. +#'@param threshold a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. +#'@param op a opartor '>' (by default), '<', '>=' or '<='. +#'@param dates a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. +#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A multidimensional array with named dimensions. +#' +#'@import multiApply +#'@examples +#'exp <- CSTools::lonlat_data$exp$data +#'DOT <- TotalTimeExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +#'data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), +#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'#threshold <- QThreshold() +#'SU35 <- TotalTimeExceedingThreshold(data, threshold = 35) +#'@export +TotalTimeExceedingThreshold <- function(data, threshold, op = '>', + dates = NULL, start = NULL, end = NULL, + time_dim = 'time', na.rm = FALSE, + ncores = NULL) { + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + if (!is.array(data)) { + dim(data) <- length(data) + names(dim(data)) <- time_dim + } + if (is.null(threshold)) { + stop("Parameter 'threshold' cannot be NULL.") + } + if (!is.numeric(threshold)) { + stop("Parameter 'threshold' must be numeric.") + } + if (!is.array(threshold) && length(threshold) > 1) { + dim(threshold) <- length(threshold) + names(dim(threshold)) <- time_dim + } else if (length(threshold) == 1) { + dim(threshold) <- NULL + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have named dimensions.") + } + if (is.null(names(dim(threshold))) && length(threshold) > 1) { + stop("Parameter 'threshold' must have named dimensions.") + } + common_dims <- which(names(dim(data)) %in% names(dim(threshold))) + if (length(threshold) > 1) { + if (any(dim(data)[common_dims] != + dim(threshold)[which(names(dim(threshold)) %in% names(dim(data)))])) { + stop("Parameter 'data' and 'threshold' must have the same length on common dimensions.") + } + } + if (is.null(dates)) { + warning("Parameter 'dates' is NULL and the index of the ", + "full data provided in 'data' is computed.") + } else { + if (!is.null(start) && !is.null(end)) { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } + } + if (is.null(dim(threshold))) { + total <- Apply(list(data), target_dims = time_dim, + fun = .exceedthreshold, + y = threshold, op = op, na.rm = na.rm, + ncores = ncores)$output1 + } else { + total <- Apply(list(data, threshold), + target_dims = list(time_dim, NULL), + fun = .exceedthreshold, op = op, na.rm = na.rm, + ncores = ncores)$output1 + } + return(total) +} +.exceedthreshold <- function(x, y, op, na.rm) { + if (op == '>') { + res <- sum(x > y, na.rm = na.rm) + } else if (op == '<') { + res <- sum(x < y, na.rm = na.rm) + } else if (op == '<=') { + res <- sum(x <= y, na.rm = na.rm) + } else { + res <- sum(x >= y, na.rm = na.rm) + } + return(res) +} + diff --git a/man/CST_TotalTimeExceedingThreshold.Rd b/man/CST_TotalTimeExceedingThreshold.Rd new file mode 100644 index 0000000..c21aa52 --- /dev/null +++ b/man/CST_TotalTimeExceedingThreshold.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TotalTimeExceedingThreshold.R +\name{CST_TotalTimeExceedingThreshold} +\alias{CST_TotalTimeExceedingThreshold} +\title{Total Time of a variable Exceeding (not exceeding) a Threshold} +\usage{ +CST_TotalTimeExceedingThreshold( + data, + threshold, + op = ">", + start = NULL, + end = NULL, + time_dim = "time", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{an 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools.} + +\item{threshold}{an 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible.} + +\item{op}{a opartor '>' (by default), '<', '>=' or '<='.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} + +\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} +} +\value{ +A 's2dv_cube' object containing the indicator in the element \code{data}. +} +\description{ +The Total Time of a variable exceeding (or not) a Threshold returns the number of days +(if the data provided is daily, or the corresponding units to the data frequency provided) +that a variable is exceeding a threshold during a period. The threshold provided must be +in the same units than the variable units, i.e. to use a percentile as a threshold, +the function \code{Threshold} or \code{QThreshold} may be needed (see examples). +Providing maximum temperature daily data, the following agriculture indices for heat stress can be obtained by using this function: +\itemize{ + \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} + \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} + \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} +} +} +\examples{ +exp <- CSTools::lonlat_data$exp +DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), + c(memb = 5, sdate = 3, time = 214, lon = 2)) +exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +#threshold <- QThreshold() +SU35 <- CST_TotalTimeExceedingThreshold(exp, threshold = 35) +} diff --git a/man/TotalTimeExceedingThreshold.Rd b/man/TotalTimeExceedingThreshold.Rd new file mode 100644 index 0000000..c91c15c --- /dev/null +++ b/man/TotalTimeExceedingThreshold.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TotalTimeExceedingThreshold.R +\name{TotalTimeExceedingThreshold} +\alias{TotalTimeExceedingThreshold} +\title{Total Time of a variable Exceeding (not exceeding) a Threshold} +\usage{ +TotalTimeExceedingThreshold( + data, + threshold, + op = ">", + dates = NULL, + start = NULL, + end = NULL, + time_dim = "time", + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{data}{a multidimensional array with named dimensions.} + +\item{threshold}{a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length.} + +\item{op}{a opartor '>' (by default), '<', '>=' or '<='.} + +\item{dates}{a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} + +\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} +} +\value{ +A multidimensional array with named dimensions. +} +\description{ +The Total Time of a variable exceeding (or not) a Threshold returns the number of days +(if the data provided is daily, or the corresponding units to the data frequency provided) +that a variable is exceeding a threshold during a period. The threshold provided must be +in the same units than the variable units, i.e. to use a percentile as a threshold, +the function \code{Threshold} or \code{QThreshold} may be needed (see examples). +Providing maximum temperature daily data, the following agriculture indices for heat stress can be obtained by using this function: +\itemize{ + \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} + \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} + \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} +} +} +\examples{ +exp <- CSTools::lonlat_data$exp$data +DOT <- TotalTimeExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), + c(memb = 5, sdate = 3, time = 214, lon = 2)) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +#threshold <- QThreshold() +SU35 <- TotalTimeExceedingThreshold(data, threshold = 35) +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..79cf4a7 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(CSIndicators) + +test_check("CSIndicators") diff --git a/tests/testthat/test-TotalTimeExceedingThreshold.R b/tests/testthat/test-TotalTimeExceedingThreshold.R new file mode 100644 index 0000000..b710794 --- /dev/null +++ b/tests/testthat/test-TotalTimeExceedingThreshold.R @@ -0,0 +1,27 @@ +context("Generic tests") +test_that("Sanity checks", { + #source("csindicators/R/TotalTimeExceedingThreshold.R") + data <- 1:20 + threshold <- 10 + expect_warning(TotalTimeExceedingThreshold(data, threshold), + "Parameter 'dates' is NULL and the index of the full data provided in 'data' is computed.") + expect_equal(TotalTimeExceedingThreshold(data, threshold), 10) + data <- array(1:40, c(x = 2, ftime = 20)) + expect_error(TotalTimeExceedingThreshold(data, threshold), + "Could not find dimension 'time' in 1th object provided in 'data'.") + expect_equal(TotalTimeExceedingThreshold(data, threshold, time_dim = 'ftime'), + array(c(15, 15), c(x = 2))) + dim(threshold) <- c(member = 1, ftime = 1) + expect_equal(TotalTimeExceedingThreshold(data, threshold, time_dim = 'ftime'), + array(c(15, 15), c(x = 2))) + expect_equal(TotalTimeExceedingThreshold(data, threshold, time_dim = 'x'), + array(c(rep(0,5), rep(2,15)), c(ftime = 20))) + expect_error(TotalTimeExceedingThreshold(data, threshold, + time_dim = 'x', ncores = 'Z'), + "Parameter 'ncores' must be numeric") + + expect_equal(TotalTimeExceedingThreshold(data, threshold, time_dim = 2), + array(c(15,15), c(x = 2))) + +}) + -- GitLab From 8c4af8f9dda3a6bc74100297fbe545f88e7ffa2a Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 27 Jan 2021 12:07:37 +0100 Subject: [PATCH 02/16] AbsToProbs function --- NAMESPACE | 2 + R/AbsToProbs.R | 136 ++++++++++++++++++++++++++++++++++++++++++ man/AbsToProbs.Rd | 53 ++++++++++++++++ man/CST_AbsToProbs.Rd | 52 ++++++++++++++++ 4 files changed, 243 insertions(+) create mode 100644 R/AbsToProbs.R create mode 100644 man/AbsToProbs.Rd create mode 100644 man/CST_AbsToProbs.Rd diff --git a/NAMESPACE b/NAMESPACE index c3b2fb2..4d286b1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(AbsToProbs) +export(CST_AbsToProbs) export(CST_PeriodAccumulation) export(CST_TotalTimeExceedingThreshold) export(PeriodAccumulation) diff --git a/R/AbsToProbs.R b/R/AbsToProbs.R new file mode 100644 index 0000000..14ca5da --- /dev/null +++ b/R/AbsToProbs.R @@ -0,0 +1,136 @@ +#'Transform ensemble forecast into probabilities +#' +#'The Cumulative Distribution Function of a forecast is used to obtain the probabilities of each value in the ensemble. If multiple initializations (start dates) are provided, the function will create the Cumulative Distribution Function excluding the corresponding initialization. +#' +#'@param data an 's2dv_cube' object as provided function \code{CST_Load} in package CSTools. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period. +#'@param memb_dim a character string indicating the name of the dimension in which the ensemble members are stored. +#'@param sdate_dim a character string indicating the name of the dimension in which the initialization dates are stored. +#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A 's2dv_cube' object containing the probabilites in the element \code{data}. +#' +#'@import multiApply +#' +#'@examples +#'exp <- CSTools::lonlat_prec +#'exp_probs <- CST_AbsToProbs(exp) +#'exp$data <- array(rnorm(5 * 3 * 214 * 2), +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +#'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'exp_probs <- CST_AbsToProbs(exp, start = list(21, 4), end = list(21, 6)) +#'@export +CST_AbsToProbs <- function(data, start = NULL, end = NULL, + time_dim = 'ftime', memb_dim = 'memb', + sdate_dim = 'sdate', ncores = NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + # when subsetting is needed, dimensions are also needed: + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$Dates$start))) { + if (length(data$Dates$start) != dim(data$data)[time_dim]) { + if (length(data$Dates$start) == + prod(dim(data$data)[time_dim] * dim(data$data)['sdate'])) { + dim(data$Dates$start) <- c(dim(data$data)[time_dim], + dim(data$data)['sdate']) + } + } else { + warning("Dimensions in 'data' element 'Dates$start' are missed and", + "all data would be used.") + } + } + } + probs <- AbsToProbs(data$data, data$Dates[[1]], start, end, + time_dim = time_dim, memb_dim = memb_dim, + sdate_dim = sdate_dim, ncores = ncores) + data$data <- probs + if (!is.null(start) && !is.null(end)) { + data$Dates <- SelectPeriodOnDates(dates = data$Dates[[1]], + start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + return(data) +} +#'Transform ensemble forecast into probabilities +#' +#'The Cumulative Distribution Function of a forecast is used to obtain the probabilities of each value in the ensemble. If multiple initializations (start dates) are provided, the function will create the Cumulative Distribution Function excluding the corresponding initialization. +#' +#'@param data a multidimensional array with named dimensions. +#'@param dates a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period. +#'@param memb_dim a character string indicating the name of the dimension in which the ensemble members are stored. +#'@param sdate_dim a character string indicating the name of the dimension in which the initialization dates are stored. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A multidimensional array with named dimensions. +#' +#'@import multiApply +#' +#'@examples +#'exp <- CSTools::lonlat_prec$data +#'exp_probs <- AbsToProbs(exp) +#'data <- array(rnorm(5 * 3 * 214 * 2), +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +#'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'exp_probs <- AbsToProbs(exp, start = list(21, 4), end = list(21, 6)) +#'@export +AbsToProbs <- function(data, dates = NULL, start = NULL, end = NULL, time_dim = 'time', + memb_dim = 'member', + sdate_dim = 'sdate', ncores = NULL) { + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + if (!is.array(data)) { + dim(data) <- c(length(data), 1) + names(dim(data)) <- c(memb_dim, sdate_dim) + if (!is.null(start) && !is.null(end)) { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } + } + probs <- Apply(list(data), target_dims = c(memb_dim, sdate_dim), fun = .abstoprobs, + ncores = ncores)$output1 + return(probs) +} +.abstoprobs <- function(data) { + if (dim(data)[2] > 1 ) { # Several sdates + qres <- unlist( + lapply(1:(dim(data)[1]), function(x) { # dim 1: member + lapply(1:(dim(data)[2]), function(y) { # dim 2: sdate + ecdf(as.vector(data[,-y]))(data[x, y]) + }) + })) + dim(qres) <- c(dim(data)[2], dim(data)[1]) + } else { # One sdate + qres <- unlist( + lapply(1:(dim(data)[1]), function(x) { # dim 1: member + ecdf(as.vector(data))(data[x, 1]) + })) + dim(qres) <- c(dim(data)[2], dim(data)[1]) + } + return(qres) +} diff --git a/man/AbsToProbs.Rd b/man/AbsToProbs.Rd new file mode 100644 index 0000000..2618978 --- /dev/null +++ b/man/AbsToProbs.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AbsToProbs.R +\name{AbsToProbs} +\alias{AbsToProbs} +\title{Transform ensemble forecast into probabilities} +\usage{ +AbsToProbs( + data, + dates = NULL, + start = NULL, + end = NULL, + time_dim = "time", + memb_dim = "member", + sdate_dim = "sdate", + ncores = NULL +) +} +\arguments{ +\item{data}{a multidimensional array with named dimensions.} + +\item{dates}{a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} + +\item{memb_dim}{a character string indicating the name of the dimension in which the ensemble members are stored.} + +\item{sdate_dim}{a character string indicating the name of the dimension in which the initialization dates are stored.} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} +} +\value{ +A multidimensional array with named dimensions. +} +\description{ +The Cumulative Distribution Function of a forecast is used to obtain the probabilities of each value in the ensemble. If multiple initializations (start dates) are provided, the function will create the Cumulative Distribution Function excluding the corresponding initialization. +} +\examples{ +exp <- CSTools::lonlat_prec$data +exp_probs <- AbsToProbs(exp) +data <- array(rnorm(5 * 3 * 214 * 2), + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +exp_probs <- AbsToProbs(exp, start = list(21, 4), end = list(21, 6)) +} diff --git a/man/CST_AbsToProbs.Rd b/man/CST_AbsToProbs.Rd new file mode 100644 index 0000000..7fd43a5 --- /dev/null +++ b/man/CST_AbsToProbs.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AbsToProbs.R +\name{CST_AbsToProbs} +\alias{CST_AbsToProbs} +\title{Transform ensemble forecast into probabilities} +\usage{ +CST_AbsToProbs( + data, + start = NULL, + end = NULL, + time_dim = "ftime", + memb_dim = "memb", + sdate_dim = "sdate", + ncores = NULL +) +} +\arguments{ +\item{data}{an 's2dv_cube' object as provided function \code{CST_Load} in package CSTools.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} + +\item{memb_dim}{a character string indicating the name of the dimension in which the ensemble members are stored.} + +\item{sdate_dim}{a character string indicating the name of the dimension in which the initialization dates are stored.} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} + +\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} +} +\value{ +A 's2dv_cube' object containing the probabilites in the element \code{data}. +} +\description{ +The Cumulative Distribution Function of a forecast is used to obtain the probabilities of each value in the ensemble. If multiple initializations (start dates) are provided, the function will create the Cumulative Distribution Function excluding the corresponding initialization. +} +\examples{ +exp <- CSTools::lonlat_prec +exp_probs <- CST_AbsToProbs(exp) +exp$data <- array(rnorm(5 * 3 * 214 * 2), + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +exp_probs <- CST_AbsToProbs(exp, start = list(21, 4), end = list(21, 6)) +} -- GitLab From 593f4036af52b58d2acb95722efd4356c1e932f2 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 27 Jan 2021 12:37:12 +0100 Subject: [PATCH 03/16] tests for AbsToProbs --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/AbsToProbs.R | 9 +++++---- man/AbsToProbs.Rd | 2 +- man/CST_AbsToProbs.Rd | 6 ++---- tests/testthat/test-AbsToProbs.R | 13 +++++++++++++ 6 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 tests/testthat/test-AbsToProbs.R diff --git a/DESCRIPTION b/DESCRIPTION index 45093ee..599a097 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,8 @@ Description: To be added. Depends: R (>= 3.6.1) Imports: - multiApply (>= 2.1.1) + multiApply (>= 2.1.1), + stats Suggests: testthat, CSTools diff --git a/NAMESPACE b/NAMESPACE index 4d286b1..179f82a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,3 +9,4 @@ export(SelectPeriodOnData) export(SelectPeriodOnDates) export(TotalTimeExceedingThreshold) import(multiApply) +importFrom(stats,ecdf) diff --git a/R/AbsToProbs.R b/R/AbsToProbs.R index 14ca5da..d2bcfbd 100644 --- a/R/AbsToProbs.R +++ b/R/AbsToProbs.R @@ -8,18 +8,18 @@ #'@param time_dim a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period. #'@param memb_dim a character string indicating the name of the dimension in which the ensemble members are stored. #'@param sdate_dim a character string indicating the name of the dimension in which the initialization dates are stored. -#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). #'@param ncores an integer indicating the number of cores to use in parallel computation. #' #'@return A 's2dv_cube' object containing the probabilites in the element \code{data}. #' #'@import multiApply +#'@importFrom stats ecdf #' #'@examples #'exp <- CSTools::lonlat_prec #'exp_probs <- CST_AbsToProbs(exp) #'exp$data <- array(rnorm(5 * 3 * 214 * 2), -#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) #'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), @@ -29,7 +29,7 @@ #'exp_probs <- CST_AbsToProbs(exp, start = list(21, 4), end = list(21, 6)) #'@export CST_AbsToProbs <- function(data, start = NULL, end = NULL, - time_dim = 'ftime', memb_dim = 'memb', + time_dim = 'ftime', memb_dim = 'member', sdate_dim = 'sdate', ncores = NULL) { if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube', ", @@ -77,12 +77,13 @@ CST_AbsToProbs <- function(data, start = NULL, end = NULL, #'@return A multidimensional array with named dimensions. #' #'@import multiApply +#'@importFrom stats ecdf #' #'@examples #'exp <- CSTools::lonlat_prec$data #'exp_probs <- AbsToProbs(exp) #'data <- array(rnorm(5 * 3 * 214 * 2), -#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) #'Dates <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), diff --git a/man/AbsToProbs.Rd b/man/AbsToProbs.Rd index 2618978..7b56a78 100644 --- a/man/AbsToProbs.Rd +++ b/man/AbsToProbs.Rd @@ -42,7 +42,7 @@ The Cumulative Distribution Function of a forecast is used to obtain the probabi exp <- CSTools::lonlat_prec$data exp_probs <- AbsToProbs(exp) data <- array(rnorm(5 * 3 * 214 * 2), - c(memb = 5, sdate = 3, ftime = 214, lon = 2)) + c(member = 5, sdate = 3, ftime = 214, lon = 2)) Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), diff --git a/man/CST_AbsToProbs.Rd b/man/CST_AbsToProbs.Rd index 7fd43a5..ec1bd8f 100644 --- a/man/CST_AbsToProbs.Rd +++ b/man/CST_AbsToProbs.Rd @@ -9,7 +9,7 @@ CST_AbsToProbs( start = NULL, end = NULL, time_dim = "ftime", - memb_dim = "memb", + memb_dim = "member", sdate_dim = "sdate", ncores = NULL ) @@ -28,8 +28,6 @@ CST_AbsToProbs( \item{sdate_dim}{a character string indicating the name of the dimension in which the initialization dates are stored.} \item{ncores}{an integer indicating the number of cores to use in parallel computation.} - -\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} } \value{ A 's2dv_cube' object containing the probabilites in the element \code{data}. @@ -41,7 +39,7 @@ The Cumulative Distribution Function of a forecast is used to obtain the probabi exp <- CSTools::lonlat_prec exp_probs <- CST_AbsToProbs(exp) exp$data <- array(rnorm(5 * 3 * 214 * 2), - c(memb = 5, sdate = 3, ftime = 214, lon = 2)) + c(member = 5, sdate = 3, ftime = 214, lon = 2)) exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), diff --git a/tests/testthat/test-AbsToProbs.R b/tests/testthat/test-AbsToProbs.R new file mode 100644 index 0000000..5ddbd21 --- /dev/null +++ b/tests/testthat/test-AbsToProbs.R @@ -0,0 +1,13 @@ +context("Generic tests") +test_that("Sanity checks", { + #source("csindicators/R/AbsToProbs.R") + expect_error(AbsToProbs('x'), "Parameter 'data' must be numeric.") + expect_equal(AbsToProbs(1), array(1, c(sdate = 1, member = 1))) + expect_equal(AbsToProbs(1, memb_dim = 'x'), array(1, c(sdate = 1, x = 1))) + expect_error(AbsToProbs(data = NULL), "Parameter 'data' cannot be NULL.") + expect_error(AbsToProbs(1, dates = '2000-01-01', end = 3, start = 4), + "Parameter 'start' and 'end' must be lists indicating the day and the month of the period start and end.") + expect_equal(AbsToProbs(1:10), array(seq(0.1, 1, 0.1), c(sdate = 1, member = 10))) + data <- array(1:24, c(member = 3, sdate = 2, lon = 4)) + expect_equal(AbsToProbs(data), array(rep(0:1,12), c(sdate = 2, member = 3, lon = 4))) +}) -- GitLab From 34ebc2c7ab96967df8de5ce6f119ba1d32c105c8 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 28 Jan 2021 13:42:06 +0100 Subject: [PATCH 04/16] QThreshold added --- DESCRIPTION | 3 +- NAMESPACE | 3 + R/QThreshold.R | 205 ++++++++++++++++++++++++++++++++++++++++++ man/CST_QThreshold.Rd | 63 +++++++++++++ man/QThreshold.Rd | 64 +++++++++++++ 5 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 R/QThreshold.R create mode 100644 man/CST_QThreshold.Rd create mode 100644 man/QThreshold.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 599a097..a1b7396 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,8 @@ Depends: R (>= 3.6.1) Imports: multiApply (>= 2.1.1), - stats + stats, + ClimProjDiags Suggests: testthat, CSTools diff --git a/NAMESPACE b/NAMESPACE index 179f82a..6f35745 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,10 +3,13 @@ export(AbsToProbs) export(CST_AbsToProbs) export(CST_PeriodAccumulation) +export(CST_QThreshold) export(CST_TotalTimeExceedingThreshold) export(PeriodAccumulation) +export(QThreshold) export(SelectPeriodOnData) export(SelectPeriodOnDates) export(TotalTimeExceedingThreshold) import(multiApply) +importFrom(ClimProjDiags,Subset) importFrom(stats,ecdf) diff --git a/R/QThreshold.R b/R/QThreshold.R new file mode 100644 index 0000000..115c1bc --- /dev/null +++ b/R/QThreshold.R @@ -0,0 +1,205 @@ +#'Transform an absolute threshold into probabilities +#' +#'From a user perspective, an absolute threshold can be very useful for a specific needs (e.g.: grape variety). +#' However, this absolute threshold could be transform to a relative threshold in order to get its frequency in a given dataset. +#'Therefore, the function \code{QThreshold} returns the probability of an absolute threshold. +#'This is done by computing the Cumulative Distribution Function of a sample and leaving-one-ot. +#' The sample used will depend on the dimensions of the data provided and the dimension names provided in sdate_dim and memb_dim parameters: +#'\itemize{ +#' \item{Wheter a forecast (hindcast) has dimensions member and start date, and both must be used in the sample, their names should be passed in sdate_dim and memb_dim.} +#' \item{Wheter a forecast (hindcast) has dimensions member and start date, and only start date must be used in the sample (the calculation is done in each separate member), memb_dim can be set to NULL.} +#' \item{Wheter a reference (observations) has start date dimension, the sample used is the start date dimension.} +#' \item{Wheter a reference (observations) doesn't have start date dimension, the sample used must be especified in sdate_dim parameter.}} +#' +#'@param data an 's2dv_cube' object as provided function \code{CST_Load} in package CSTools. +#'@param threshold an 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period. +#'@param memb_dim a character string indicating the name of the dimension in which the ensemble members are stored. +#'@param sdate_dim a character string indicating the name of the dimension in which the initialization dates are stored. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A 's2dv_cube' object containing the probabilites in the element \code{data}. +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#' +#'@examples +#'threshold <- 26 +#'exp <- CSTools::lonlat_prec +#'exp_probs <- CST_QThreshold(exp, threshold) +#'exp$data <- array(rnorm(5 * 3 * 214 * 2), +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'exp_probs <- CST_QThreshold(exp, threshold, start = list(21, 4), end = list(21, 6)) +#'@export +CST_QThreshold <- function(data, threshold, start = NULL, end = NULL, + time_dim = 'ftime', memb_dim = 'member', sdate_dim = 'sdate', + ncores = NULL) { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + # when subsetting is needed, dimensions are also needed: + if (!is.null(start) && !is.null(end)) { + if (is.null(dim(data$Dates$start))) { + if (length(data$Dates$start) != dim(data$data)[time_dim]) { + if (length(data$Dates$start) == + prod(dim(data$data)[time_dim] * dim(data$data)['sdate'])) { + dim(data$Dates$start) <- c(dim(data$data)[time_dim], + dim(data$data)['sdate']) + } + } else { + warning("Dimensions in 'data' element 'Dates$start' are missed and", + "all data would be used.") + } + } + } + if (inherits(threshold, 's2dv_cube')) { + threshold <- threshold$data + } + probs <- QThreshold(data$data, threshold, data$Dates[[1]], start, end, + time_dim = time_dim, memb_dim = memb_dim, + sdate_dim = sdate_dim, ncores = ncores) + data$data <- probs + if (!is.null(start) && !is.null(end)) { + data$Dates <- SelectPeriodOnDates(dates = data$Dates[[1]], + start = start, end = end, + time_dim = time_dim, ncores = ncores) + } + return(data) +} +#'Transform an absolute threshold into probabilities +#' +#'From a user perspective, an absolute threshold can be very useful for a specific needs (e.g.: grape variety). +#' However, this absolute threshold could be transform to a relative threshold in order to get its frequency in a given dataset. +#'Therefore, the function \code{QThreshold} returns the probability of an absolute threshold. +#'This is done by computing the Cumulative Distribution Function of a sample and leaving-one-ot. +#' The sample used will depend on the dimensions of the data provided and the dimension names provided in sdate_dim and memb_dim parameters: +#'\itemize{ +#' \item{Wheter a forecast (hindcast) has dimensions member and start date, and both must be used in the sample, their names should be passed in sdate_dim and memb_dim.} +#' \item{Wheter a forecast (hindcast) has dimensions member and start date, and only start date must be used in the sample (the calculation is done in each separate member), memb_dim can be set to NULL.} +#' \item{Wheter a reference (observations) has start date dimension, the sample used is the start date dimension.} +#' \item{Wheter a reference (observations) doesn't have start date dimension, the sample used must be especified in sdate_dim parameter.}} +#' +#'@param data a multidimensional array with named dimensions. +#'@param threshold a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. +#'@param dates a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided. +#'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. +#'@param time_dim a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period. +#'@param memb_dim a character string indicating the name of the dimension in which the ensemble members are stored. +#'@param sdate_dim a character string indicating the name of the dimension in which the initialization dates are stored. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return A multidimensional array with named dimensions. +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@examples +#'threshold = 25 +#'data <- array(rnorm(25 * 3 * 214 * 2, mean = 26), +#' c(member = 25, sdate = 3, time = 214, lon = 2)) +#'thres_q <- QThreshold(data, threshold) +#'data <- array(rnorm(1 * 3 * 214 * 2), c(member = 1, sdate = 3, time = 214, lon = 2)) +#'threshold <- array(1:428, c(time = 214, lon = 2)) +#'res <- QThreshold(data, threshold) +#'threshold <- array(1:50, c(member = 25, lon = 2)) +#'# it can work using the version for separated members. +#'threshold <- array(1:75, c(member = 25, sdate = 3)) +#'@export +QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, + time_dim = 'time', memb_dim = 'member', sdate_dim = 'sdate', + ncores = NULL) { + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be numeric.") + } + + if (!is.array(data)) { + dim(data) <- c(length(data), 1) + names(dim(data)) <- c(memb_dim, sdate_dim) + } + if (!is.null(dates)) { + if (!is.null(start) && !is.null(end)) { + if (!any(c(is.list(start), is.list(end)))) { + stop("Parameter 'start' and 'end' must be lists indicating the ", + "day and the month of the period start and end.") + } + if (time_dim %in% names(dim(threshold))) { + if (dim(threshold)[time_dim] == dim(data)[time_dim]) { + if (!is.null(dim(dates)) && sdate_dim %in% dim(dates)) { + dates_thres <- Subset(dates, along = sdate_dim, indices = 1) + threshold <- SelectPeriodOnData(threshold, dates_thres, start, end, + time_dim = time_dim, ncores = ncores) + } else { + threshold <- SelectPeriodOnData(threshold, dates, start, end, + time_dim = time_dim, ncores = ncores) + } + } + } + data <- SelectPeriodOnData(data, dates, start, end, + time_dim = time_dim, ncores = ncores) + } + } + if (length(threshold) == 1) { + if (memb_dim %in% names(dim(data))) { + probs <- Apply(list(data), target_dims = c(memb_dim, sdate_dim), + fun = .qthreshold_exp, + threshold, ncores = ncores)$output1 + } else { + probs <- Apply(list(data), target_dims = sdate_dim, fun = .qthreshold_exp, + threshold, ncores = ncores)$output1 + } + } else { + target_thres <- NULL + if (sdate_dim %in% names(dim(threshold))) { + stop("Parameter threshold cannot have dimension 'sdate_dim'.") + } + if (memb_dim %in% names(dim(data))) { + if (memb_dim %in% names(dim(threshold))) { + # comparison member by member + probs <- Apply(list(data, threshold), + target_dims = list(sdate_dim, NULL), + fun = .qthreshold_obs, ncores = ncores)$output1 + } else { + probs <- Apply(list(data, threshold), + target_dims = list(c(memb_dim, sdate_dim), NULL), + fun = .qthreshold_exp, ncores = ncores)$output1 + } + } else { + probs <- Apply(list(data, threshold), target_dims = list(sdate_dim, NULL), + fun = .qthreshold_obs, ncores = ncores)$output1 + } + } + return(probs) +} +# By splitting the atomic function, a conditional repetition is avoided +# inside the Apply loops +.qthreshold_obs <- function(data, threshold) { + # dims data: sdate + dims <- dim(data) + # no 'member' involving + qres <- unlist(lapply(1:dims, function(x) { + ecdf(data[-x])(threshold)})) + dim(qres) <- c(dims) + return(qres) +} +.qthreshold_exp <- function(data, threshold) { + qres <- unlist( + lapply(1:(dim(data)[1]), function(x) { # dim 1: member + lapply(1:(dim(data)[2]), function(y) { # dim 2: sdate + ecdf(as.vector(data[,-y]))(threshold) + }) + })) + dim(qres) <- c(dim(data)[2], dim(data)[1]) + return(qres) +} diff --git a/man/CST_QThreshold.Rd b/man/CST_QThreshold.Rd new file mode 100644 index 0000000..744724a --- /dev/null +++ b/man/CST_QThreshold.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/QThreshold.R +\name{CST_QThreshold} +\alias{CST_QThreshold} +\title{Transform an absolute threshold into probabilities} +\usage{ +CST_QThreshold( + data, + threshold, + start = NULL, + end = NULL, + time_dim = "ftime", + memb_dim = "member", + sdate_dim = "sdate", + ncores = NULL +) +} +\arguments{ +\item{data}{an 's2dv_cube' object as provided function \code{CST_Load} in package CSTools.} + +\item{threshold}{an 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} + +\item{memb_dim}{a character string indicating the name of the dimension in which the ensemble members are stored.} + +\item{sdate_dim}{a character string indicating the name of the dimension in which the initialization dates are stored.} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} +} +\value{ +A 's2dv_cube' object containing the probabilites in the element \code{data}. +} +\description{ +From a user perspective, an absolute threshold can be very useful for a specific needs (e.g.: grape variety). +However, this absolute threshold could be transform to a relative threshold in order to get its frequency in a given dataset. +Therefore, the function \code{QThreshold} returns the probability of an absolute threshold. +This is done by computing the Cumulative Distribution Function of a sample and leaving-one-ot. +The sample used will depend on the dimensions of the data provided and the dimension names provided in sdate_dim and memb_dim parameters: +\itemize{ + \item{Wheter a forecast (hindcast) has dimensions member and start date, and both must be used in the sample, their names should be passed in sdate_dim and memb_dim.} + \item{Wheter a forecast (hindcast) has dimensions member and start date, and only start date must be used in the sample (the calculation is done in each separate member), memb_dim can be set to NULL.} + \item{Wheter a reference (observations) has start date dimension, the sample used is the start date dimension.} + \item{Wheter a reference (observations) doesn't have start date dimension, the sample used must be especified in sdate_dim parameter.}} +} +\examples{ +threshold <- 26 +exp <- CSTools::lonlat_prec +exp_probs <- CST_QThreshold(exp, threshold) +exp$data <- array(rnorm(5 * 3 * 214 * 2), + c(member = 5, sdate = 3, ftime = 214, lon = 2)) +exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +exp_probs <- CST_QThreshold(exp, threshold, start = list(21, 4), end = list(21, 6)) +} diff --git a/man/QThreshold.Rd b/man/QThreshold.Rd new file mode 100644 index 0000000..257f21e --- /dev/null +++ b/man/QThreshold.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/QThreshold.R +\name{QThreshold} +\alias{QThreshold} +\title{Transform an absolute threshold into probabilities} +\usage{ +QThreshold( + data, + threshold, + dates = NULL, + start = NULL, + end = NULL, + time_dim = "time", + memb_dim = "member", + sdate_dim = "sdate", + ncores = NULL +) +} +\arguments{ +\item{data}{a multidimensional array with named dimensions.} + +\item{threshold}{a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length.} + +\item{dates}{a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided.} + +\item{start}{an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} + +\item{time_dim}{a character string indicating the name of the temporal dimension. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. This dimension is required to subset the data in a requested period.} + +\item{memb_dim}{a character string indicating the name of the dimension in which the ensemble members are stored.} + +\item{sdate_dim}{a character string indicating the name of the dimension in which the initialization dates are stored.} + +\item{ncores}{an integer indicating the number of cores to use in parallel computation.} +} +\value{ +A multidimensional array with named dimensions. +} +\description{ +From a user perspective, an absolute threshold can be very useful for a specific needs (e.g.: grape variety). +However, this absolute threshold could be transform to a relative threshold in order to get its frequency in a given dataset. +Therefore, the function \code{QThreshold} returns the probability of an absolute threshold. +This is done by computing the Cumulative Distribution Function of a sample and leaving-one-ot. +The sample used will depend on the dimensions of the data provided and the dimension names provided in sdate_dim and memb_dim parameters: +\itemize{ + \item{Wheter a forecast (hindcast) has dimensions member and start date, and both must be used in the sample, their names should be passed in sdate_dim and memb_dim.} + \item{Wheter a forecast (hindcast) has dimensions member and start date, and only start date must be used in the sample (the calculation is done in each separate member), memb_dim can be set to NULL.} + \item{Wheter a reference (observations) has start date dimension, the sample used is the start date dimension.} + \item{Wheter a reference (observations) doesn't have start date dimension, the sample used must be especified in sdate_dim parameter.}} +} +\examples{ +threshold = 25 +data <- array(rnorm(25 * 3 * 214 * 2, mean = 26), + c(member = 25, sdate = 3, time = 214, lon = 2)) +thres_q <- QThreshold(data, threshold) +data <- array(rnorm(1 * 3 * 214 * 2), c(member = 1, sdate = 3, time = 214, lon = 2)) +threshold <- array(1:428, c(time = 214, lon = 2)) +res <- QThreshold(data, threshold) +threshold <- array(1:50, c(member = 25, lon = 2)) +# it can work using the version for separated members. +threshold <- array(1:75, c(member = 25, sdate = 3)) +} -- GitLab From 954b0dafe8109a0359d9421b826d48dc4f60714f Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 28 Jan 2021 16:17:33 +0100 Subject: [PATCH 05/16] tests QThreshold --- R/QThreshold.R | 23 +++++++- R/TotalTimeExceedingThreshold.R | 13 +++-- tests/testthat/test-QThreshold.R | 57 +++++++++++++++++++ .../test-TotalTimeExceedingThreshold.R | 34 ++++++++++- 4 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/test-QThreshold.R diff --git a/R/QThreshold.R b/R/QThreshold.R index 115c1bc..366cec2 100644 --- a/R/QThreshold.R +++ b/R/QThreshold.R @@ -128,6 +128,27 @@ QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, dim(data) <- c(length(data), 1) names(dim(data)) <- c(memb_dim, sdate_dim) } + if (is.null(threshold)) { + stop("Parameter 'threshold' cannot be NULL.") + } + if (!is.numeric(threshold)) { + stop("Parameter 'threshold' must be numeric.") + } + if (!is.array(threshold) && length(threshold) > 1) { + dim(threshold) <- length(threshold) + names(dim(threshold)) <- time_dim + } else if (length(threshold) == 1) { + dim(threshold) <- NULL + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have named dimensions.") + } + if (is.null(names(dim(threshold))) && length(threshold) > 1) { + stop("Parameter 'threshold' must have named dimensions.") + } + if (is.null(memb_dim)) { + memb_dim <- 99999 + } if (!is.null(dates)) { if (!is.null(start) && !is.null(end)) { if (!any(c(is.list(start), is.list(end)))) { @@ -156,7 +177,7 @@ QThreshold <- function(data, threshold, dates = NULL, start = NULL, end = NULL, fun = .qthreshold_exp, threshold, ncores = ncores)$output1 } else { - probs <- Apply(list(data), target_dims = sdate_dim, fun = .qthreshold_exp, + probs <- Apply(list(data), target_dims = sdate_dim, fun = .qthreshold_obs, threshold, ncores = ncores)$output1 } } else { diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 49fceda..91ffc3f 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -153,16 +153,19 @@ TotalTimeExceedingThreshold <- function(data, threshold, op = '>', stop("Parameter 'data' and 'threshold' must have the same length on common dimensions.") } } - if (is.null(dates)) { - warning("Parameter 'dates' is NULL and the index of the ", - "full data provided in 'data' is computed.") - } else { + if (!is.null(dates)) { if (!is.null(start) && !is.null(end)) { if (!any(c(is.list(start), is.list(end)))) { stop("Parameter 'start' and 'end' must be lists indicating the ", "day and the month of the period start and end.") } - data <- SelectPeriodOnData(data, dates, start, end, + if (time_dim %in% names(dim(threshold))) { + if (dim(threshold)[time_dim] == dim(data)[time_dim]) { + threshold <- SelectPeriodOnData(threshold, dates_thres, start, end, + time_dim = time_dim, ncores = ncores) + } + } + data <- SelectPeriodOnData(data, dates, start, end, time_dim = time_dim, ncores = ncores) } } diff --git a/tests/testthat/test-QThreshold.R b/tests/testthat/test-QThreshold.R new file mode 100644 index 0000000..b6bf593 --- /dev/null +++ b/tests/testthat/test-QThreshold.R @@ -0,0 +1,57 @@ +context("Generic tests") +test_that("Sanity checks", { + #source("csindicators/R/QThreshold.R") + expect_error(QThreshold(NULL), + "Parameter 'data' cannot be NULL.") + expect_error(QThreshold('x'), + "Parameter 'data' must be numeric.") + data <- 1:20 + expect_error(QThreshold(data, NULL), + "Parameter 'threshold' cannot be NULL.") + expect_error(QThreshold(data, 'x'), + "Parameter 'threshold' must be numeric.") + threshold <- 10 + expect_error(QThreshold(data, threshold), + "'x' must have 1 or more non-missing values") + dim(data) <- c(2, 10) + expect_error(QThreshold(data, threshold), + "Parameter 'data' must have named dimensions.") + names(dim(data)) <- c('lat', 'sdate') + threshold <- array(1:2, 2) + expect_error(QThreshold(data, threshold), + "Parameter 'threshold' must have named dimensions.") + dim(threshold) <- c(time = 2) + + data <- array(1:40, c(x = 2, sdate = 20)) + threshold <- 10 + expect_equal(dim(QThreshold(data, threshold)), c(sdate = 20, x = 2)) + data <- array(1:40, c(x = 2, ftime = 20)) + expect_error(QThreshold(data, threshold), + "Could not find dimension 'sdate' in 1th object provided in 'data'.") + expect_equal(dim(QThreshold(data, threshold, sdate_dim = 'ftime')), + c(ftime = 20, x = 2)) + dim(threshold) <- c(member = 1, ftime = 1) + expect_equal(dim(QThreshold(data, threshold, sdate_dim = 'ftime')), + c(ftime = 20, x = 2)) + expect_equal(dim(QThreshold(data, threshold, memb_dim = 'x', sdate_dim = 'ftime')), + c(ftime = 20, x = 2)) + expect_error(QThreshold(data, threshold, + sdate_dim = 'x', ncores = 'Z'), + "Parameter 'ncores' must be numeric") + + # dimensions: + data <- array(1:20, c(time = 5, sdate = 2, lat = 2)) + # does this case made sense? + threshold <- array(1:5, c(time = 5)) + expect_equal(dim(QThreshold(data, threshold)), + c(sdate = 2, time = 5, lat = 2)) + threshold <- array(1:2, c(lat = 2)) + expect_equal(dim(QThreshold(data, threshold)), + c(sdate = 2, time = 5, lat = 2)) + data <- array(1:60, c(time = 5, member = 3, sdate = 2, lat = 2)) + expect_equal(dim(QThreshold(data, threshold)), + c(sdate = 2, member = 3, time = 5, lat = 2)) + expect_equal(dim(QThreshold(data, threshold, memb_dim = NULL)), + c(sdate = 2, time = 5, member = 3, lat = 2)) + +}) diff --git a/tests/testthat/test-TotalTimeExceedingThreshold.R b/tests/testthat/test-TotalTimeExceedingThreshold.R index b710794..fa5b911 100644 --- a/tests/testthat/test-TotalTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalTimeExceedingThreshold.R @@ -1,14 +1,31 @@ context("Generic tests") test_that("Sanity checks", { #source("csindicators/R/TotalTimeExceedingThreshold.R") + expect_error(TotalTimeExceedingThreshold(NULL), + "Parameter 'data' cannot be NULL.") + expect_error(TotalTimeExceedingThreshold('x'), + "Parameter 'data' must be numeric.") data <- 1:20 + expect_error(TotalTimeExceedingThreshold(data, NULL), + "Parameter 'threshold' cannot be NULL.") + expect_error(TotalTimeExceedingThreshold(data, 'x'), + "Parameter 'threshold' must be numeric.") threshold <- 10 - expect_warning(TotalTimeExceedingThreshold(data, threshold), - "Parameter 'dates' is NULL and the index of the full data provided in 'data' is computed.") expect_equal(TotalTimeExceedingThreshold(data, threshold), 10) + dim(data) <- c(2, 10) + expect_error(TotalTimeExceedingThreshold(data, threshold), + "Parameter 'data' must have named dimensions.") + names(dim(data)) <- c('lat', 'time') + threshold <- array(1:2, 2) + expect_error(TotalTimeExceedingThreshold(data, threshold), + "Parameter 'threshold' must have named dimensions.") + dim(threshold) <- c(time = 2) + expect_error(TotalTimeExceedingThreshold(data, threshold), + "Parameter 'data' and 'threshold' must have the same length on common dimensions.") data <- array(1:40, c(x = 2, ftime = 20)) expect_error(TotalTimeExceedingThreshold(data, threshold), "Could not find dimension 'time' in 1th object provided in 'data'.") + threshold <- 10 expect_equal(TotalTimeExceedingThreshold(data, threshold, time_dim = 'ftime'), array(c(15, 15), c(x = 2))) dim(threshold) <- c(member = 1, ftime = 1) @@ -22,6 +39,19 @@ test_that("Sanity checks", { expect_equal(TotalTimeExceedingThreshold(data, threshold, time_dim = 2), array(c(15,15), c(x = 2))) + # dimensions: + data <- array(1:20, c(time = 5, sdate = 2, lat = 2)) + # does this case made sense? + threshold <- array(1:5, c(time = 5)) + expect_equal(dim(TotalTimeExceedingThreshold(data, threshold)), + c(sdate = 2, lat = 2, time = 5)) + threshold <- array(1:2, c(lat = 2)) + expect_equal(dim(TotalTimeExceedingThreshold(data, threshold)), + c(sdate = 2, lat = 2)) + data <- array(1:60, c(time = 5, fyear = 3, sdate = 2, lat = 2)) + expect_equal(dim(TotalTimeExceedingThreshold(data, threshold, + time_dim = c('time', 'fyear'))), + c(sdate = 2, lat = 2)) }) -- GitLab From 017d7afda721a73cff14e4d4838612fc10cb5428 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 28 Jan 2021 16:36:06 +0100 Subject: [PATCH 06/16] fix variable name --- R/TotalTimeExceedingThreshold.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 91ffc3f..92048e2 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -161,7 +161,7 @@ TotalTimeExceedingThreshold <- function(data, threshold, op = '>', } if (time_dim %in% names(dim(threshold))) { if (dim(threshold)[time_dim] == dim(data)[time_dim]) { - threshold <- SelectPeriodOnData(threshold, dates_thres, start, end, + threshold <- SelectPeriodOnData(threshold, dates, start, end, time_dim = time_dim, ncores = ncores) } } -- GitLab From 4069249d31e18ba985b119f093803ea8b636ef3f Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 28 Jan 2021 16:57:11 +0100 Subject: [PATCH 07/16] threshold to be an array --- R/TotalTimeExceedingThreshold.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 92048e2..3fcc87d 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -61,6 +61,9 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', } } } + if (inherits(threshold, 's2dv_cube')) { + threshold <- threshold$data + } total <- TotalTimeExceedingThreshold(data$data, data$Dates[[1]], threshold = threshold, op = op, start = start, end = end, time_dim = time_dim, -- GitLab From dca58ca5a0bff3f20955fb777d2c2ee2867c48f8 Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Wed, 3 Feb 2021 15:40:03 +0100 Subject: [PATCH 08/16] threshold criteria revised & percentile examples added --- R/TotalTimeExceedingThreshold.R | 70 +++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 49fceda..49d5ac4 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -1,24 +1,25 @@ -#'Total Time of a variable Exceeding (not exceeding) a Threshold +#'Total Time of a variable Exceeding (not exceeding) a Threshold #' -#'The Total Time of a variable exceeding (or not) a Threshold returns the number of days +#'The Total Time of a variable exceeding (or not) a Threshold returns the total number of days #'(if the data provided is daily, or the corresponding units to the data frequency provided) #' that a variable is exceeding a threshold during a period. The threshold provided must be -#'in the same units than the variable units, i.e. to use a percentile as a threshold, +#'in the same units than the variable units, i.e. to use a percentile as a scalar, #'the function \code{Threshold} or \code{QThreshold} may be needed (see examples). #'Providing maximum temperature daily data, the following agriculture indices for heat stress can be obtained by using this function: #'\itemize{ -#' \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} +#' \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C in the seven months from the start month given (e.g. from April to October for start month of April).} #' \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} #' \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} +#' \item\code{Spr32}{Total count of days when daily maximum temperatures exceed 32 between April 21st and June 21st} #'} #' -#'@param data an 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools. -#'@param threshold an 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible. +#'@param data a 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools. +#'@param threshold a 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible. #'@param op a opartor '>' (by default), '<', '>=' or '<='. #'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. #'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. #'@param time_dim a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. -#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). +#'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). #'@param ncores an integer indicating the number of cores to use in parallel computation. #' #'@return A 's2dv_cube' object containing the indicator in the element \code{data}. @@ -35,12 +36,39 @@ #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'#threshold <- QThreshold() #'SU35 <- CST_TotalTimeExceedingThreshold(exp, threshold = 35) +#' +#'# For the case of comparing between percentiles +#'exp <- CSTools::lonlat_data$exp +#'exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 28, sd = 10), +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'obs <- CSTools::lonlat_data$obs +#'obs$data <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), +#' c(sdate = 3, time = 214, lon = 2)) +#'obs$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), +#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), +#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), +#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), +#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'exp_percentile <- AbsToProbs(exp$data) # convert foreasts to probability +#'obs_percentile <- QThreshold(obs$data, threshold = 35) # convert the fixed threshold to percentile with given observations +#'data <- exp # convert the data (exp_percentile) to a s2dv_cube object +#'data$data <- aperm(exp_percentile, c(2,1,3,4)) +#'data$Dates[[1]] <- exp$Dates[[1]] +#'names(dim(obs_percentile)) <- c('sdate', 'ftime', 'lon') # provide the correct name of dimension 'ftime' +#'SU35_percentile <- CST_TotalTimeExceedingThreshold(data, threshold = obs_percentile) # compare percentiles between forecasts and observations +#' #'@export CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', start = NULL, end = NULL, - time_dim = 'time', + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube', ", @@ -73,9 +101,9 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', } return(data) } -#'Total Time of a variable Exceeding (not exceeding) a Threshold +#'Total Time of a variable Exceeding (not exceeding) a Threshold #' -#'The Total Time of a variable exceeding (or not) a Threshold returns the number of days +#'The Total Time of a variable exceeding (or not) a Threshold returns the total number of days #'(if the data provided is daily, or the corresponding units to the data frequency provided) #' that a variable is exceeding a threshold during a period. The threshold provided must be #'in the same units than the variable units, i.e. to use a percentile as a threshold, @@ -85,6 +113,7 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #' \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} #' \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} #' \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} +#' \item\code{Spr32}{Total count of days when daily maximum temperatures exceed 32 between April 21st and June 21st} #'} #' #'@param data a multidimensional array with named dimensions. @@ -111,12 +140,22 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'#threshold <- QThreshold() #'SU35 <- TotalTimeExceedingThreshold(data, threshold = 35) +#' +#'# For the case of comparing between percentiles +#'exp <- array(rnorm(5 * 3 * 214 * 2, mean = 28, sd = 10), +#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#'obs <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), +#' c(sdate = 3, time = 214, lon = 2)) +#'exp_percentile <- AbsToProbs(exp) # convert foreasts to probability +#'obs_percentile <- QThreshold(obs, threshold = 35) # convert the fixed threshold to percentile with given observations +#'SU35_percentile <- TotalTimeExceedingThreshold(aperm(exp_percentile, c(2,1,3,4)), +#' obs_percentile) # compare percentiles between forecasts and observations +#' #'@export TotalTimeExceedingThreshold <- function(data, threshold, op = '>', dates = NULL, start = NULL, end = NULL, - time_dim = 'time', na.rm = FALSE, + time_dim = 'ftime', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") @@ -171,6 +210,11 @@ TotalTimeExceedingThreshold <- function(data, threshold, op = '>', fun = .exceedthreshold, y = threshold, op = op, na.rm = na.rm, ncores = ncores)$output1 + } else if (time_dim %in% names(dim(threshold))) { + total <- Apply(list(data, threshold), + target_dims = list(time_dim, time_dim), + fun = .exceedthreshold, op = op, na.rm = na.rm, + ncores = ncores)$output1 } else { total <- Apply(list(data, threshold), target_dims = list(time_dim, NULL), -- GitLab From cefd072505f965ec50af6766c9fa47712f90765f Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Mon, 8 Feb 2021 03:44:45 +0100 Subject: [PATCH 09/16] percentile examples added in documentation --- R/TotalTimeExceedingThreshold.R | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index 8243c66..e1bb97b 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -4,7 +4,7 @@ #'(if the data provided is daily, or the corresponding units to the data frequency provided) #' that a variable is exceeding a threshold during a period. The threshold provided must be #'in the same units than the variable units, i.e. to use a percentile as a scalar, -#'the function \code{Threshold} or \code{QThreshold} may be needed (see examples). +#'the function \code{AbsToProbs} or \code{QThreshold} may be needed (see examples). #'Providing maximum temperature daily data, the following agriculture indices for heat stress can be obtained by using this function: #'\itemize{ #' \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C in the seven months from the start month given (e.g. from April to October for start month of April).} @@ -14,7 +14,7 @@ #'} #' #'@param data a 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools. -#'@param threshold a 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible. +#'@param threshold a 's2dv_cube' object as output of a 'CST_' function in the same units as parameter \code{data} and with the common dimensions of the element \code{data} of the same length (e.g. an array with the same lengths of longitude and latitude). A single scalar is also possible (for the case of comparing all grid points with the same scalar). #'@param op a opartor '>' (by default), '<', '>=' or '<='. #'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. #'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. @@ -29,7 +29,7 @@ #'exp <- CSTools::lonlat_data$exp #'DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') #'exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), -#' c(memb = 5, sdate = 3, time = 214, lon = 2)) +#' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), #' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), @@ -38,7 +38,7 @@ #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) #'SU35 <- CST_TotalTimeExceedingThreshold(exp, threshold = 35) #' -#'# For the case of comparing between percentiles +#'# For the case of comparing percentiles #'exp <- CSTools::lonlat_data$exp #'exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 28, sd = 10), #' c(member = 5, sdate = 3, ftime = 214, lon = 2)) @@ -51,18 +51,12 @@ #'obs <- CSTools::lonlat_data$obs #'obs$data <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), #' c(sdate = 3, time = 214, lon = 2)) -#'obs$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), -#' as.Date("30-11-2000", format = "%d-%m-%Y"), by = 'day'), -#' seq(as.Date("01-05-2001", format = "%d-%m-%Y"), -#' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), -#' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), -#' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) +#'obs$Dates[[1]] <- exp$Dates[[1]] #'exp_percentile <- AbsToProbs(exp$data) # convert foreasts to probability #'obs_percentile <- QThreshold(obs$data, threshold = 35) # convert the fixed threshold to percentile with given observations #'data <- exp # convert the data (exp_percentile) to a s2dv_cube object #'data$data <- aperm(exp_percentile, c(2,1,3,4)) #'data$Dates[[1]] <- exp$Dates[[1]] -#'names(dim(obs_percentile)) <- c('sdate', 'ftime', 'lon') # provide the correct name of dimension 'ftime' #'SU35_percentile <- CST_TotalTimeExceedingThreshold(data, threshold = obs_percentile) # compare percentiles between forecasts and observations #' #'@export @@ -120,12 +114,12 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #'} #' #'@param data a multidimensional array with named dimensions. -#'@param threshold a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. +#'@param threshold a multidimensional array with named dimensions in the same units as parameter \code{data} and with the common dimensions of the element \code{data} of the same length (e.g. an array with the same lengths of longitude and latitude). A single scalar is also possible (for the case of comparing all grid points with the same scalar). #'@param op a opartor '>' (by default), '<', '>=' or '<='. #'@param dates a vector of dates or a multidimensional array of dates with named dimensions matching the dimensions on parameter 'data'. By default it is NULL, to select a period this parameter must be provided. #'@param start an optional parameter to defined the initial date of the period to select from the data by providing a list of two elements: the initial date of the period and the initial month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. #'@param end an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}. -#'@param time_dim a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. +#'@param time_dim a character string indicating the name of the function to compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified. #'@param na.rm a logical value indicating whether to ignore NA values (TRUE) or not (FALSE). #'@param ncores an integer indicating the number of cores to use in parallel computation. #' @@ -147,7 +141,7 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #' #'# For the case of comparing between percentiles #'exp <- array(rnorm(5 * 3 * 214 * 2, mean = 28, sd = 10), -#' c(member = 5, sdate = 3, ftime = 214, lon = 2)) +#' c(member = 5, sdate = 3, time = 214, lon = 2)) #'obs <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), #' c(sdate = 3, time = 214, lon = 2)) #'exp_percentile <- AbsToProbs(exp) # convert foreasts to probability @@ -158,7 +152,7 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #'@export TotalTimeExceedingThreshold <- function(data, threshold, op = '>', dates = NULL, start = NULL, end = NULL, - time_dim = 'ftime', na.rm = FALSE, + time_dim = 'time', na.rm = FALSE, ncores = NULL) { if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") -- GitLab From bf4989835781f781b2c461156adb1a6d5ecfabb0 Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Mon, 8 Feb 2021 14:23:56 +0100 Subject: [PATCH 10/16] seasonal test added --- .../test-TotalTimeExceedingThreshold.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/testthat/test-TotalTimeExceedingThreshold.R b/tests/testthat/test-TotalTimeExceedingThreshold.R index fa5b911..337e848 100644 --- a/tests/testthat/test-TotalTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalTimeExceedingThreshold.R @@ -55,3 +55,22 @@ test_that("Sanity checks", { }) +test_that("Seasonal forecasts", { + # compare with scalar fixed threshold + exp <- CSTools::lonlat_data$exp + exp$data <- exp$data[1,1:3,,,,] - 247 + SU35_NoP <- CST_TotalTimeExceedingThreshold(exp, threshold = 35)$data + expect_equal(SU35_NoP[1,,15,3], c(0,1,1,1,0,0)) + # convert to percentile + obs <- CSTools::lonlat_data$obs + exp_percentile <- AbsToProbs(exp$data) + obs_percentile <- drop(QThreshold(obs$data, threshold = 35)) + data <- exp + data$data <- exp_percentile + SU35_P <- CST_TotalTimeExceedingThreshold(data, threshold = obs_percentile)$data + expect_equal(SU35_P[,2,5,5], c(3,3,3,3,3,3)) +}) + + + expect_equal( + -- GitLab From 990d8a3f2957cb0d22a207a860995eeac575bf21 Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Mon, 8 Feb 2021 14:30:24 +0100 Subject: [PATCH 11/16] test file revised --- tests/testthat/test-TotalTimeExceedingThreshold.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/testthat/test-TotalTimeExceedingThreshold.R b/tests/testthat/test-TotalTimeExceedingThreshold.R index 337e848..408764b 100644 --- a/tests/testthat/test-TotalTimeExceedingThreshold.R +++ b/tests/testthat/test-TotalTimeExceedingThreshold.R @@ -44,7 +44,7 @@ test_that("Sanity checks", { # does this case made sense? threshold <- array(1:5, c(time = 5)) expect_equal(dim(TotalTimeExceedingThreshold(data, threshold)), - c(sdate = 2, lat = 2, time = 5)) + c(sdate = 2, lat = 2)) threshold <- array(1:2, c(lat = 2)) expect_equal(dim(TotalTimeExceedingThreshold(data, threshold)), c(sdate = 2, lat = 2)) @@ -71,6 +71,4 @@ test_that("Seasonal forecasts", { expect_equal(SU35_P[,2,5,5], c(3,3,3,3,3,3)) }) - - expect_equal( -- GitLab From a3c74a9ab026e9515526f3492b63d2004ed27534 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 9 Feb 2021 18:51:19 +0100 Subject: [PATCH 12/16] Fix examples --- R/TotalTimeExceedingThreshold.R | 26 ++++++++------- man/CST_TotalTimeExceedingThreshold.Rd | 46 +++++++++++++++++++------- man/TotalTimeExceedingThreshold.Rd | 23 ++++++++++--- 3 files changed, 66 insertions(+), 29 deletions(-) diff --git a/R/TotalTimeExceedingThreshold.R b/R/TotalTimeExceedingThreshold.R index e1bb97b..4c64dbd 100644 --- a/R/TotalTimeExceedingThreshold.R +++ b/R/TotalTimeExceedingThreshold.R @@ -27,7 +27,7 @@ #'@import multiApply #'@examples #'exp <- CSTools::lonlat_data$exp -#'DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +#'DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280) #'exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), #' c(memb = 5, sdate = 3, ftime = 214, lon = 2)) #'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), @@ -39,7 +39,6 @@ #'SU35 <- CST_TotalTimeExceedingThreshold(exp, threshold = 35) #' #'# For the case of comparing percentiles -#'exp <- CSTools::lonlat_data$exp #'exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 28, sd = 10), #' c(member = 5, sdate = 3, ftime = 214, lon = 2)) #'exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "%d-%m-%Y"), @@ -48,16 +47,17 @@ #' as.Date("30-11-2001", format = "%d-%m-%Y"), by = 'day'), #' seq(as.Date("01-05-2002", format = "%d-%m-%Y"), #' as.Date("30-11-2002", format = "%d-%m-%Y"), by = 'day')) -#'obs <- CSTools::lonlat_data$obs +#'obs <- exp #'obs$data <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), #' c(sdate = 3, time = 214, lon = 2)) -#'obs$Dates[[1]] <- exp$Dates[[1]] -#'exp_percentile <- AbsToProbs(exp$data) # convert foreasts to probability -#'obs_percentile <- QThreshold(obs$data, threshold = 35) # convert the fixed threshold to percentile with given observations -#'data <- exp # convert the data (exp_percentile) to a s2dv_cube object -#'data$data <- aperm(exp_percentile, c(2,1,3,4)) -#'data$Dates[[1]] <- exp$Dates[[1]] -#'SU35_percentile <- CST_TotalTimeExceedingThreshold(data, threshold = obs_percentile) # compare percentiles between forecasts and observations +#'# convert foreasts to probability +#'exp_percentile <- CST_AbsToProbs(exp) +#'# convert the fixed threshold to percentile with given observations +#'obs_percentile <- CST_QThreshold(obs, threshold = 35) +#' +#' # compare percentiles between forecasts and observations +#'SU35_percentile <- CST_TotalTimeExceedingThreshold(exp, +#' threshold = obs_percentile) #' #'@export CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', @@ -145,9 +145,11 @@ CST_TotalTimeExceedingThreshold <- function(data, threshold, op = '>', #'obs <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), #' c(sdate = 3, time = 214, lon = 2)) #'exp_percentile <- AbsToProbs(exp) # convert foreasts to probability -#'obs_percentile <- QThreshold(obs, threshold = 35) # convert the fixed threshold to percentile with given observations +#'# convert the fixed threshold to percentile with given observations: +#'obs_percentile <- QThreshold(obs, threshold = 35) +#'# compare percentiles between forecasts and observations: #'SU35_percentile <- TotalTimeExceedingThreshold(aperm(exp_percentile, c(2,1,3,4)), -#' obs_percentile) # compare percentiles between forecasts and observations +#' obs_percentile) #' #'@export TotalTimeExceedingThreshold <- function(data, threshold, op = '>', diff --git a/man/CST_TotalTimeExceedingThreshold.Rd b/man/CST_TotalTimeExceedingThreshold.Rd index c21aa52..1f4ae84 100644 --- a/man/CST_TotalTimeExceedingThreshold.Rd +++ b/man/CST_TotalTimeExceedingThreshold.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/TotalTimeExceedingThreshold.R \name{CST_TotalTimeExceedingThreshold} \alias{CST_TotalTimeExceedingThreshold} -\title{Total Time of a variable Exceeding (not exceeding) a Threshold} +\title{Total Time of a variable Exceeding (not exceeding) a Threshold} \usage{ CST_TotalTimeExceedingThreshold( data, @@ -10,15 +10,15 @@ CST_TotalTimeExceedingThreshold( op = ">", start = NULL, end = NULL, - time_dim = "time", + time_dim = "ftime", na.rm = FALSE, ncores = NULL ) } \arguments{ -\item{data}{an 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools.} +\item{data}{a 's2dv_cube' object as provided by function \code{CST_Load} in package CSTools.} -\item{threshold}{an 's2dv_cube' object as output of a 'CST_' function in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length. A single scalar is also possible.} +\item{threshold}{a 's2dv_cube' object as output of a 'CST_' function in the same units as parameter \code{data} and with the common dimensions of the element \code{data} of the same length (e.g. an array with the same lengths of longitude and latitude). A single scalar is also possible (for the case of comparing all grid points with the same scalar).} \item{op}{a opartor '>' (by default), '<', '>=' or '<='.} @@ -28,7 +28,7 @@ CST_TotalTimeExceedingThreshold( \item{time_dim}{a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} -\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} +\item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} \item{ncores}{an integer indicating the number of cores to use in parallel computation.} } @@ -36,29 +36,51 @@ CST_TotalTimeExceedingThreshold( A 's2dv_cube' object containing the indicator in the element \code{data}. } \description{ -The Total Time of a variable exceeding (or not) a Threshold returns the number of days +The Total Time of a variable exceeding (or not) a Threshold returns the total number of days (if the data provided is daily, or the corresponding units to the data frequency provided) that a variable is exceeding a threshold during a period. The threshold provided must be -in the same units than the variable units, i.e. to use a percentile as a threshold, -the function \code{Threshold} or \code{QThreshold} may be needed (see examples). +in the same units than the variable units, i.e. to use a percentile as a scalar, +the function \code{AbsToProbs} or \code{QThreshold} may be needed (see examples). Providing maximum temperature daily data, the following agriculture indices for heat stress can be obtained by using this function: \itemize{ - \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} + \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C in the seven months from the start month given (e.g. from April to October for start month of April).} \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} + \item\code{Spr32}{Total count of days when daily maximum temperatures exceed 32 between April 21st and June 21st} } } \examples{ exp <- CSTools::lonlat_data$exp -DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280, time_dim = 'ftime') +DOT <- CST_TotalTimeExceedingThreshold(exp, threshold = 280) exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 25, sd = 3), - c(memb = 5, sdate = 3, time = 214, lon = 2)) + c(memb = 5, sdate = 3, ftime = 214, lon = 2)) exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -#threshold <- QThreshold() SU35 <- CST_TotalTimeExceedingThreshold(exp, threshold = 35) + +# For the case of comparing percentiles +exp$data <- array(rnorm(5 * 3 * 214 * 2, mean = 28, sd = 10), + c(member = 5, sdate = 3, ftime = 214, lon = 2)) +exp$Dates[[1]] <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), + as.Date("30-11-2000", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2001", format = "\%d-\%m-\%Y"), + as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), + seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), + as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) +obs <- exp +obs$data <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), + c(sdate = 3, time = 214, lon = 2)) +# convert foreasts to probability +exp_percentile <- CST_AbsToProbs(exp) +# convert the fixed threshold to percentile with given observations +obs_percentile <- CST_QThreshold(obs, threshold = 35) + +# compare percentiles between forecasts and observations +SU35_percentile <- CST_TotalTimeExceedingThreshold(exp, + threshold = obs_percentile) + } diff --git a/man/TotalTimeExceedingThreshold.Rd b/man/TotalTimeExceedingThreshold.Rd index c91c15c..213db45 100644 --- a/man/TotalTimeExceedingThreshold.Rd +++ b/man/TotalTimeExceedingThreshold.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/TotalTimeExceedingThreshold.R \name{TotalTimeExceedingThreshold} \alias{TotalTimeExceedingThreshold} -\title{Total Time of a variable Exceeding (not exceeding) a Threshold} +\title{Total Time of a variable Exceeding (not exceeding) a Threshold} \usage{ TotalTimeExceedingThreshold( data, @@ -19,7 +19,7 @@ TotalTimeExceedingThreshold( \arguments{ \item{data}{a multidimensional array with named dimensions.} -\item{threshold}{a multidimensional array with named dimensions in the same units as parameter 'data' and with the common dimensions of the element 'data' of the same length.} +\item{threshold}{a multidimensional array with named dimensions in the same units as parameter \code{data} and with the common dimensions of the element \code{data} of the same length (e.g. an array with the same lengths of longitude and latitude). A single scalar is also possible (for the case of comparing all grid points with the same scalar).} \item{op}{a opartor '>' (by default), '<', '>=' or '<='.} @@ -29,7 +29,7 @@ TotalTimeExceedingThreshold( \item{end}{an optional parameter to defined the final date of the period to select from the data by providing a list of two elements: the final day of the period and the final month of the period. By default it is set to NULL and the indicator is computed using all the data provided in \code{data}.} -\item{time_dim}{a character string indicating the name of the function to compute the indicator. By default, it is set to 'ftime'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} +\item{time_dim}{a character string indicating the name of the function to compute the indicator. By default, it is set to 'time'. More than one dimension name matching the dimensions provided in the object \code{data$data} can be specified.} \item{na.rm}{a logical value indicating whether to ignore NA values (TRUE) or not (FALSE).} @@ -39,7 +39,7 @@ TotalTimeExceedingThreshold( A multidimensional array with named dimensions. } \description{ -The Total Time of a variable exceeding (or not) a Threshold returns the number of days +The Total Time of a variable exceeding (or not) a Threshold returns the total number of days (if the data provided is daily, or the corresponding units to the data frequency provided) that a variable is exceeding a threshold during a period. The threshold provided must be in the same units than the variable units, i.e. to use a percentile as a threshold, @@ -49,6 +49,7 @@ Providing maximum temperature daily data, the following agriculture indices for \item\code{SU35}{Total count of days when daily maximum temperatures exceed 35°C} \item\code{SU36}{Total count of days when daily maximum temperatures exceed 36 between June 21st and September 21st} \item\code{SU40}{Total count of days when daily maximum temperatures exceed 40 between June 21st and September 21st} + \item\code{Spr32}{Total count of days when daily maximum temperatures exceed 32 between April 21st and June 21st} } } \examples{ @@ -62,6 +63,18 @@ Dates <- c(seq(as.Date("01-05-2000", format = "\%d-\%m-\%Y"), as.Date("30-11-2001", format = "\%d-\%m-\%Y"), by = 'day'), seq(as.Date("01-05-2002", format = "\%d-\%m-\%Y"), as.Date("30-11-2002", format = "\%d-\%m-\%Y"), by = 'day')) -#threshold <- QThreshold() SU35 <- TotalTimeExceedingThreshold(data, threshold = 35) + +# For the case of comparing between percentiles +exp <- array(rnorm(5 * 3 * 214 * 2, mean = 28, sd = 10), + c(member = 5, sdate = 3, time = 214, lon = 2)) +obs <- array(rnorm(3 * 214 * 2, mean = 27, sd = 9), + c(sdate = 3, time = 214, lon = 2)) +exp_percentile <- AbsToProbs(exp) # convert foreasts to probability +# convert the fixed threshold to percentile with given observations: +obs_percentile <- QThreshold(obs, threshold = 35) +# compare percentiles between forecasts and observations: +SU35_percentile <- TotalTimeExceedingThreshold(aperm(exp_percentile, c(2,1,3,4)), + obs_percentile) + } -- GitLab From 471213e927f41aeb36534bd92dd054edd28b0c5b Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Wed, 10 Feb 2021 04:29:20 +0100 Subject: [PATCH 13/16] AbsToProbs.R tests added --- tests/testthat/test-AbsToProbs.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/testthat/test-AbsToProbs.R b/tests/testthat/test-AbsToProbs.R index 5ddbd21..1c910b1 100644 --- a/tests/testthat/test-AbsToProbs.R +++ b/tests/testthat/test-AbsToProbs.R @@ -11,3 +11,17 @@ test_that("Sanity checks", { data <- array(1:24, c(member = 3, sdate = 2, lon = 4)) expect_equal(AbsToProbs(data), array(rep(0:1,12), c(sdate = 2, member = 3, lon = 4))) }) + +test_that("Seasonal forecasts", { + + exp <- CSTools::lonlat_data$exp$data[1,1:3,1:3,,1:5,1:5] + exp_probs <- AbsToProbs(exp) + expect_equal(dim(exp)[3:5], dim(exp_probs)[3:5]) + expect_equal(round(exp_probs[,1,1,1,1]), c(1, 0, 1)) + exp <- exp[,1,,,] # one sdate + expect_error(exp1_probs <- AbsToProbs(exp), + "Could not find dimension 'sdate' in 1th object provided in 'data'.") + exp1 <- InsertDim(exp, 2, 1, name = 'sdate') + exp1_probs <- AbsToProbs(exp1) + expect_equal(round(exp1_probs[1,,2,2,2]), c(1, 0, 1)) +}) -- GitLab From deed8475cde1296e14a36da3d729f121d862103e Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Wed, 10 Feb 2021 04:34:50 +0100 Subject: [PATCH 14/16] test fix --- tests/testthat/test-AbsToProbs.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-AbsToProbs.R b/tests/testthat/test-AbsToProbs.R index 1c910b1..9ef6df2 100644 --- a/tests/testthat/test-AbsToProbs.R +++ b/tests/testthat/test-AbsToProbs.R @@ -21,6 +21,7 @@ test_that("Seasonal forecasts", { exp <- exp[,1,,,] # one sdate expect_error(exp1_probs <- AbsToProbs(exp), "Could not find dimension 'sdate' in 1th object provided in 'data'.") + library(s2dv) exp1 <- InsertDim(exp, 2, 1, name = 'sdate') exp1_probs <- AbsToProbs(exp1) expect_equal(round(exp1_probs[1,,2,2,2]), c(1, 0, 1)) -- GitLab From 983d6b9643cc8b3faa693c7532b65da0492ca90d Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Wed, 10 Feb 2021 07:38:28 +0100 Subject: [PATCH 15/16] QThreshold test added --- tests/testthat/test-QThreshold.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/testthat/test-QThreshold.R b/tests/testthat/test-QThreshold.R index b6bf593..4ae7400 100644 --- a/tests/testthat/test-QThreshold.R +++ b/tests/testthat/test-QThreshold.R @@ -55,3 +55,24 @@ test_that("Sanity checks", { c(sdate = 2, time = 5, member = 3, lat = 2)) }) + +test_that("Seasonal forecasts", { + + obs <- CSTools::lonlat_data$obs$data - 248 + obs_percentile <- QThreshold(obs, threshold = 35) + expect_equal(dim(obs)[4:6], dim(obs_percentile)[4:6]) + expect_equal(obs_percentile[, 1, 1, 3, 20, 53], c(rep(0.4, 4), rep(0.2, 2))) + obs1 <- obs[,,2,,,] # no sdate + obs1_percentile <- QThreshold(obs1, threshold = 35) + expect_error(obs1_percentile <- QThreshold(obs1, threshold = 35), + "Could not find dimension 'sdate' in 1th object provided in 'data'.") + library(s2dv) + obs1 <- InsertDim(obs1, 1, 1, name = 'sdate') # one sdate + expect_error(obs1_percentile <- QThreshold(obs1, threshold = 35), + "'x' must have 1 or more non-missing values") + obs2 <- obs[,,,2,,] # one ftime + obs2_percentile <- QThreshold(obs2, threshold = 35) + expect_equal(dim(obs2), dim(obs2_percentile)) + expect_equal(obs2_percentile[,14,53], c(0.6, 0.4, 0.6, 0.6, 0.4, 0.4)) + +}) -- GitLab From 7dacde38d1a01ac1f322806cbce78aecca3f875a Mon Sep 17 00:00:00 2001 From: Chihchung Chou Date: Wed, 10 Feb 2021 07:51:10 +0100 Subject: [PATCH 16/16] test fix --- tests/testthat/test-QThreshold.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-QThreshold.R b/tests/testthat/test-QThreshold.R index 4ae7400..14fd5d1 100644 --- a/tests/testthat/test-QThreshold.R +++ b/tests/testthat/test-QThreshold.R @@ -63,7 +63,6 @@ test_that("Seasonal forecasts", { expect_equal(dim(obs)[4:6], dim(obs_percentile)[4:6]) expect_equal(obs_percentile[, 1, 1, 3, 20, 53], c(rep(0.4, 4), rep(0.2, 2))) obs1 <- obs[,,2,,,] # no sdate - obs1_percentile <- QThreshold(obs1, threshold = 35) expect_error(obs1_percentile <- QThreshold(obs1, threshold = 35), "Could not find dimension 'sdate' in 1th object provided in 'data'.") library(s2dv) -- GitLab