From d8bd4fc81d024f0363ceff702a5af221a12ea65f Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Nov 2020 14:00:15 +0100 Subject: [PATCH] Improve Persistence() input checks and correct output AR.lowCI and AR.highCI --- NEWS.md | 3 + R/Persistence.R | 140 +++++++++++++++++++++--------- man/Persistence.Rd | 45 ++++++---- tests/testthat/test-Persistence.R | 109 ++++++++++++++++------- 4 files changed, 212 insertions(+), 85 deletions(-) diff --git a/NEWS.md b/NEWS.md index 567b9e0..df8040a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# s2dv 0.2.0 (Release date: 2021-) +- Improve Persistence() input checks, and correct the output 'AR.lowCI' and 'AR.highCI'. + # s2dv 0.1.1 (Release date: 2020-11-16) - Change the lincense to Apache License 2.0. diff --git a/R/Persistence.R b/R/Persistence.R index 3aa0636..f569e63 100644 --- a/R/Persistence.R +++ b/R/Persistence.R @@ -8,14 +8,16 @@ #' including the time dimension along which the autoregression is computed. #' The data should start at least 40 time steps (years or days) before #' 'start'. -#'@param dates A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) -#' indicating the dates available in the observations. +#'@param dates A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) +#' in class 'Date' indicating the dates available in the observations. #'@param time_dim A character string indicating the dimension along which to #' compute the autoregression. The default value is 'time'. -#'@param start A 4-digit integer (YYYY) or a date in the ISOdate format -#' (YYYY-MM-DD) indicating the first start date of the persistence forecast. -#'@param end A 4-digit integer (YYYY) or a date in the ISOdate format -#' (YYYY-MM-DD) indicating the last start date of the persistence forecast. +#'@param start A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +#' indicating the first start date of the persistence forecast. It must be +#' between 1850 and 2020. +#'@param end A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +#' indicating the last start date of the persistence forecast. It must be +#' between 1850 and 2020. #'@param ft_start An integer indicating the forecast time for which the #' persistence forecast should be calculated, or the first forecast time of #' the average forecast times for which persistence should be calculated. @@ -37,45 +39,54 @@ #' #'@return #'A list containing: -#'\item{$persistence}{ +#'\item{$persistence} { #' A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes #' containing the persistence forecast. #'} -#'\item{$persistence.mean}{ +#'\item{$persistence.mean} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the ensemble mean persistence forecast. #'} -#'\item{$persistence.predint}{ +#'\item{$persistence.predint} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the prediction interval of the persistence forecast. #'} -#'\item{$AR.slope}{ +#'\item{$AR.slope} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the slope coefficient of the autoregression. #'} -#'\item{$AR.intercept}{ +#'\item{$AR.intercept} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the intercept coefficient of the autoregression. #'} -#'\item{$AR.lowCI}{ +#'\item{$AR.lowCI} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the lower value of the confidence interval of the #' autoregression. #'} -#'\item{$AR.highCI}{ +#'\item{$AR.highCI} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the upper value of the confidence interval of the #' autoregression. #'} #' #'@examples -#'#Building an example dataset with yearly start dates from 1920 to 2009 +#'# Case 1: year +#'# Building an example dataset with yearly start dates from 1920 to 2009 #'set.seed(1) #'obs1 <- rnorm(1 * 70 * 6 * 7) #'dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7) -#'dates <- seq(1940, 2009, 1) -#'persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, -#' nmemb = 40) +#'dates <- seq(1920, 1989, 1) +#'res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1, +#' nmemb = 40) +#'# Case 2: day +#'dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +#'start <- as.Date(ISOdate(1990, 2, 15)) +#'end <- as.Date(ISOdate(1990, 4, 1)) +#'set.seed(1) +#'data <- rnorm(1 * length(dates) * 6 * 7) +#'dim(data) <- c(member = 1, time = length(dates), lat = 6, lon = 7) +#'res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1) #' #'@import multiApply #'@export @@ -95,7 +106,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if(any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -106,27 +117,79 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, stop("Parameter 'time_dim' is not found in 'data' dimension.") } ## dates + if (is.numeric(dates)) { #(YYYY) + if (any(nchar(dates) != 4) | any(dates %% 1 != 0) | any(dates <= 0)) { + stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.")) + } + } else if (class(dates) == 'Date') { #(YYYY-MM-DD) + + } else { + stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.")) + } if (length(dates) != dim(data)[time_dim]) { stop("Parameter 'dates' must have the same length as in 'time_dim'.") } + ## dates, start, and end + if (!all(sapply(list(class(dates), class(start)), function(x) x == class(end)))) { + stop("Parameter 'dates', 'start', and 'end' should be the same format.") + } ## start -# if (!is.numeric(start) | start %% 1 != 0 | start < 0 | -# length(start) > 1 | start < 1850 | start > 2020) { -# stop("Parameter 'start' must be an integer between 1850 and 2020.") -# } -# if (start < dates[1] + 40) { -# stop("Parameter 'start' must start at least 40 time steps after the -# first start date of 'data'.") -# } + if (is.numeric(start)) { #(YYYY) + if (length(start) > 1 | any(start %% 1 != 0) | any(start < 1850) | any(start > 2020)) { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (is.na(match(start, dates))) { + stop("Parameter 'start' must be one of the values of 'dates'.") + } + if (start < dates[1] + 40) { + stop(paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.")) + } + } else if (class(start) == 'Date') { + if (length(start) > 1 | any(start < as.Date(ISOdate(1850, 1, 1))) | + any(start > as.Date(ISOdate(2021, 1, 1)))) { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (is.na(match(start, dates))) { + stop("Parameter 'start' must be one of the values of 'dates'.") + } + if (start < dates[1] + 40) { + stop(paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.")) + } + } else { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + ## end -# if (!is.numeric(end) | end %% 1 != 0 | end < 0 | -# length(end) > 1 | end < 1850 | end > 2020) { -# stop("Parameter 'end' must be an integer between 1850 and 2020.") -# } -# if (end > dates[length(dates)] + 1) { -# stop("Parameter 'end' must end at most 1 time step after the -# last start date of 'data'.") -# } + if (is.numeric(end)) { #(YYYY) + if (length(end) > 1 | any(end %% 1 != 0) | any(end < 1850) | any(end > 2020)) { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (end > dates[length(dates)] + 1) { + stop(paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.")) + } + } else if (class(end) == 'Date') { + if (length(end) > 1 | any(end < as.Date(ISOdate(1850, 1, 1))) | + any(end > as.Date(ISOdate(2020, 12, 31)))) { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (end > dates[length(dates)] + 1) { + stop(paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.")) + } + } else { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } ## ft_start if (!is.numeric(ft_start) | ft_start %% 1 != 0 | ft_start < 0 | length(ft_start) > 1) { @@ -195,7 +258,6 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, # ft_start/ft_end are indices .Persistence <- function(x, dates, time_dim = 'time', start, end, ft_start = 1, ft_end = 1, max_ft = 10, nmemb = 1, na.action = 10) { - tm <- end - start + 1 max_date <- match(start, dates) interval <- ft_end - ft_start @@ -204,7 +266,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, persistence <- matrix(NA, nrow = nmemb, ncol = tm) names(dim(persistence)) <- c('realization', time_dim) - for (sdate in tm:1){ + for (sdate in tm:1) { min_y = max_ft + ft_start max_y = max_date + sdate - 2 min_x = max_ft # for extreme case: ex. forecast years 1-10, interval = 9 @@ -243,9 +305,9 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, persistence.predint[sdate] <- stdev_reg * sqrt(1 + 1 / n + X_sq / S_sq) AR.slope[sdate] <- a AR.intercept[sdate] <- b - AR.lowCI[sdate] <- reg$regression[1] - AR.highCI[sdate] <- reg$regression[3] - persistence[ ,sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], + AR.lowCI[sdate] <- reg$conf.lower[2] + AR.highCI[sdate] <- reg$conf.upper[2] + persistence[ , sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], sd = persistence.predint[sdate]) } diff --git a/man/Persistence.Rd b/man/Persistence.Rd index 33d4868..0d6c77f 100644 --- a/man/Persistence.Rd +++ b/man/Persistence.Rd @@ -14,17 +14,19 @@ including the time dimension along which the autoregression is computed. The data should start at least 40 time steps (years or days) before 'start'.} -\item{dates}{A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) -indicating the dates available in the observations.} +\item{dates}{A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) +in class 'Date' indicating the dates available in the observations.} \item{time_dim}{A character string indicating the dimension along which to compute the autoregression. The default value is 'time'.} -\item{start}{A 4-digit integer (YYYY) or a date in the ISOdate format -(YYYY-MM-DD) indicating the first start date of the persistence forecast.} +\item{start}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +indicating the first start date of the persistence forecast. It must be +between 1850 and 2020.} -\item{end}{A 4-digit integer (YYYY) or a date in the ISOdate format -(YYYY-MM-DD) indicating the last start date of the persistence forecast.} +\item{end}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +indicating the last start date of the persistence forecast. It must be +between 1850 and 2020.} \item{ft_start}{An integer indicating the forecast time for which the persistence forecast should be calculated, or the first forecast time of @@ -52,32 +54,32 @@ computation. The default value is NULL.} } \value{ A list containing: -\item{$persistence}{ +\item{$persistence} { A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes containing the persistence forecast. } -\item{$persistence.mean}{ +\item{$persistence.mean} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the ensemble mean persistence forecast. } -\item{$persistence.predint}{ +\item{$persistence.predint} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the prediction interval of the persistence forecast. } -\item{$AR.slope}{ +\item{$AR.slope} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the slope coefficient of the autoregression. } -\item{$AR.intercept}{ +\item{$AR.intercept} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the intercept coefficient of the autoregression. } -\item{$AR.lowCI}{ +\item{$AR.lowCI} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the lower value of the confidence interval of the autoregression. } -\item{$AR.highCI}{ +\item{$AR.highCI} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the upper value of the confidence interval of the autoregression. @@ -89,13 +91,22 @@ observational data along the time dimension, with a measure of forecast uncertainty (prediction interval) based on Coelho et al., 2004.\cr\cr } \examples{ -#Building an example dataset with yearly start dates from 1920 to 2009 +# Case 1: year +# Building an example dataset with yearly start dates from 1920 to 2009 set.seed(1) obs1 <- rnorm(1 * 70 * 6 * 7) dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7) -dates <- seq(1940, 2009, 1) -persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, - nmemb = 40) +dates <- seq(1920, 1989, 1) +res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1, + nmemb = 40) +# Case 2: day +dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +start <- as.Date(ISOdate(1990, 2, 15)) +end <- as.Date(ISOdate(1990, 4, 1)) +set.seed(1) +data <- rnorm(1 * length(dates) * 6 * 7) +dim(data) <- c(member = 1, time = length(dates), lat = 6, lon = 7) +res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1) } diff --git a/tests/testthat/test-Persistence.R b/tests/testthat/test-Persistence.R index a75c518..2ce2dec 100644 --- a/tests/testthat/test-Persistence.R +++ b/tests/testthat/test-Persistence.R @@ -1,9 +1,23 @@ context("s2dv::Persistence tests") ############################################## - set.seed(1) - dat1 <- array(rnorm(540), dim = c(member = 1, time = 90, lat = 2, lon = 3)) - dates1 <- seq(1920, 2009, 1) +#dat1: year +set.seed(1) +dat1 <- rnorm(1 * 70 * 6 * 7) +dim(dat1) <- c(member = 1, time = 70, lat = 6, lon = 7) +dates1 <- seq(1920, 1989, 1) +start1 <- 1961 +end1 <- 1990 +res <- Persistence(obs1, dates = dates1, start = 1961, end = 1990, ft_start = 1, + nmemb = 40) + +#dat2: day +dates2 <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +set.seed(2) +dat2 <- rnorm(1 * length(dates2) * 6 * 7) +dim(dat2) <- c(member = 1, time = length(dates2), lat = 6, lon = 7) +start2 <- as.Date(ISOdate(1990, 2, 15)) +end2 <- as.Date(ISOdate(1990, 4, 1)) ############################################## test_that("1. Input checks", { @@ -21,10 +35,6 @@ test_that("1. Input checks", { "Parameter 'data' must have dimension names." ) expect_error( - Persistence(data = dat1, dates = seq(1900, 2009, 1)), - "Parameter 'dates' must have the same length as in 'time_dim'." - ) - expect_error( Persistence(data = dat1, dates = dates1, time_dim = 12), "Parameter 'time_dim' must be a character string." ) @@ -33,34 +43,76 @@ test_that("1. Input checks", { "Parameter 'time_dim' is not found in 'data' dimension." ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 0.5), + Persistence(data = dat1, dates = c(1:10)), + paste0("Parameter 'dates' must be a sequence of integer \\(YYYY\\) or ", + "string \\(YYYY-MM-DD\\) in class 'Date'.") + ) + expect_error( + Persistence(data = dat1, dates = seq(1900, 2009, 1)), + "Parameter 'dates' must have the same length as in 'time_dim'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = start1, end = end2), + "Parameter 'dates', 'start', and 'end' should be the same format." + ) + # start + expect_error( + Persistence(data = dat1, dates = dates1, start = 1800, end = end1), + paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1851, end = end1), + "Parameter 'start' must be one of the values of 'dates'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1921, end = end1), + paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.") + ) + # end + expect_error( + Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(2021, 1, 1))), + paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") + ) + expect_error( + Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(1990, 4, 3))), + paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.") + ) + # ft_start + expect_error( + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 0.5), "Parameter 'ft_start' must be a positive integer." ) + # ft_end expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ft_end = 12), "Parameter 'ft_end' must be a positive integer below 'max_ft'." ) + # max_ft expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ft_end = 12, max_ft = 13.5), "Parameter 'max_ft' must be a positive integer." ) + # nmemb expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, - nmemb = 0), + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, nmemb = 0), "Parameter 'nmemb' must be a positive integer." ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, na.action = T), paste0("Parameter 'na.action' must be a function for NA values or ", "a numeric indicating the number of NA values allowed ", "before returning NA.") ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ncores = 0), "Parameter 'ncores' must be a positive integer." ) @@ -69,7 +121,7 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { - res <- Persistence(dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1) + res <- Persistence(dat1, dates = dates1, start = start1, end = end1, ft_start = 1) expect_equal( names(res), @@ -78,26 +130,25 @@ test_that("2. Output checks: dat1", { ) expect_equal( dim(res$persistence), - c(realization = 1, time = 45, member = 1, lat = 2, lon = 3) + c(realization = 1, time = 30, member = 1, lat = 6, lon = 7) ) expect_equal( dim(res$persistence.mean), - c(45, member = 1, lat = 2, lon = 3) - ) - expect_equal( - mean(res$persistence), - 0.03481641, - tolerance = 0.00001 + c(30, member = 1, lat = 6, lon = 7) ) +}) + +############################################## +test_that("2. Output checks: dat1", { + res <- Persistence(dat2, dates = dates2, start = start2, end = end2, ft_start = 1) + expect_equal( - range(res$persistence), - c(-1.025059, 1.042929), - tolerance = 0.0001 + names(res), + c('persistence', 'persistence.mean', 'persistence.predint', 'AR.slope', + 'AR.intercept', 'AR.lowCI', 'AR.highCI') ) expect_equal( - range(res$AR.slope), - c(-0.2636489, 0.2334777), - tolerance = 0.0001 + dim(res$persistence), + c(realization = 1, time = 46, member = 1, lat = 6, lon = 7) ) }) - -- GitLab