diff --git a/NEWS.md b/NEWS.md index d1e42ee6217b1e08dcfbaaecf213cba769012f1f..b54387343d1c4010844ddbf95fe15a5fe5dfb524 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ - Add parameter 'memb_dim' and 'memb' in Corr(). They allow the existence of the member dimension which can have different length between exp and obs, and users can choose to do the ensemble mean first before correlation or calculate the correlation for individual member. +- Improve Persistence() input checks, and correct the output 'AR.lowCI' and 'AR.highCI'. # s2dv 0.1.1 (Release date: 2020-11-16) - Change the lincense to Apache License 2.0. diff --git a/R/Persistence.R b/R/Persistence.R index 3aa0636caf7b502e913927cd8d14bd4dd5efbd18..f569e632651ddeb72fe8dc9b76ed9bfc2dcbcc52 100644 --- a/R/Persistence.R +++ b/R/Persistence.R @@ -8,14 +8,16 @@ #' including the time dimension along which the autoregression is computed. #' The data should start at least 40 time steps (years or days) before #' 'start'. -#'@param dates A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) -#' indicating the dates available in the observations. +#'@param dates A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) +#' in class 'Date' indicating the dates available in the observations. #'@param time_dim A character string indicating the dimension along which to #' compute the autoregression. The default value is 'time'. -#'@param start A 4-digit integer (YYYY) or a date in the ISOdate format -#' (YYYY-MM-DD) indicating the first start date of the persistence forecast. -#'@param end A 4-digit integer (YYYY) or a date in the ISOdate format -#' (YYYY-MM-DD) indicating the last start date of the persistence forecast. +#'@param start A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +#' indicating the first start date of the persistence forecast. It must be +#' between 1850 and 2020. +#'@param end A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +#' indicating the last start date of the persistence forecast. It must be +#' between 1850 and 2020. #'@param ft_start An integer indicating the forecast time for which the #' persistence forecast should be calculated, or the first forecast time of #' the average forecast times for which persistence should be calculated. @@ -37,45 +39,54 @@ #' #'@return #'A list containing: -#'\item{$persistence}{ +#'\item{$persistence} { #' A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes #' containing the persistence forecast. #'} -#'\item{$persistence.mean}{ +#'\item{$persistence.mean} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the ensemble mean persistence forecast. #'} -#'\item{$persistence.predint}{ +#'\item{$persistence.predint} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the prediction interval of the persistence forecast. #'} -#'\item{$AR.slope}{ +#'\item{$AR.slope} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the slope coefficient of the autoregression. #'} -#'\item{$AR.intercept}{ +#'\item{$AR.intercept} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the intercept coefficient of the autoregression. #'} -#'\item{$AR.lowCI}{ +#'\item{$AR.lowCI} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the lower value of the confidence interval of the #' autoregression. #'} -#'\item{$AR.highCI}{ +#'\item{$AR.highCI} { #' A numeric array with same dimensions as 'persistence', except the 'memb' dimension #' which is of length 1, containing the upper value of the confidence interval of the #' autoregression. #'} #' #'@examples -#'#Building an example dataset with yearly start dates from 1920 to 2009 +#'# Case 1: year +#'# Building an example dataset with yearly start dates from 1920 to 2009 #'set.seed(1) #'obs1 <- rnorm(1 * 70 * 6 * 7) #'dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7) -#'dates <- seq(1940, 2009, 1) -#'persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, -#' nmemb = 40) +#'dates <- seq(1920, 1989, 1) +#'res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1, +#' nmemb = 40) +#'# Case 2: day +#'dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +#'start <- as.Date(ISOdate(1990, 2, 15)) +#'end <- as.Date(ISOdate(1990, 4, 1)) +#'set.seed(1) +#'data <- rnorm(1 * length(dates) * 6 * 7) +#'dim(data) <- c(member = 1, time = length(dates), lat = 6, lon = 7) +#'res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1) #' #'@import multiApply #'@export @@ -95,7 +106,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if(any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -106,27 +117,79 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, stop("Parameter 'time_dim' is not found in 'data' dimension.") } ## dates + if (is.numeric(dates)) { #(YYYY) + if (any(nchar(dates) != 4) | any(dates %% 1 != 0) | any(dates <= 0)) { + stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.")) + } + } else if (class(dates) == 'Date') { #(YYYY-MM-DD) + + } else { + stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.")) + } if (length(dates) != dim(data)[time_dim]) { stop("Parameter 'dates' must have the same length as in 'time_dim'.") } + ## dates, start, and end + if (!all(sapply(list(class(dates), class(start)), function(x) x == class(end)))) { + stop("Parameter 'dates', 'start', and 'end' should be the same format.") + } ## start -# if (!is.numeric(start) | start %% 1 != 0 | start < 0 | -# length(start) > 1 | start < 1850 | start > 2020) { -# stop("Parameter 'start' must be an integer between 1850 and 2020.") -# } -# if (start < dates[1] + 40) { -# stop("Parameter 'start' must start at least 40 time steps after the -# first start date of 'data'.") -# } + if (is.numeric(start)) { #(YYYY) + if (length(start) > 1 | any(start %% 1 != 0) | any(start < 1850) | any(start > 2020)) { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (is.na(match(start, dates))) { + stop("Parameter 'start' must be one of the values of 'dates'.") + } + if (start < dates[1] + 40) { + stop(paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.")) + } + } else if (class(start) == 'Date') { + if (length(start) > 1 | any(start < as.Date(ISOdate(1850, 1, 1))) | + any(start > as.Date(ISOdate(2021, 1, 1)))) { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (is.na(match(start, dates))) { + stop("Parameter 'start' must be one of the values of 'dates'.") + } + if (start < dates[1] + 40) { + stop(paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.")) + } + } else { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + ## end -# if (!is.numeric(end) | end %% 1 != 0 | end < 0 | -# length(end) > 1 | end < 1850 | end > 2020) { -# stop("Parameter 'end' must be an integer between 1850 and 2020.") -# } -# if (end > dates[length(dates)] + 1) { -# stop("Parameter 'end' must end at most 1 time step after the -# last start date of 'data'.") -# } + if (is.numeric(end)) { #(YYYY) + if (length(end) > 1 | any(end %% 1 != 0) | any(end < 1850) | any(end > 2020)) { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (end > dates[length(dates)] + 1) { + stop(paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.")) + } + } else if (class(end) == 'Date') { + if (length(end) > 1 | any(end < as.Date(ISOdate(1850, 1, 1))) | + any(end > as.Date(ISOdate(2020, 12, 31)))) { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (end > dates[length(dates)] + 1) { + stop(paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.")) + } + } else { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } ## ft_start if (!is.numeric(ft_start) | ft_start %% 1 != 0 | ft_start < 0 | length(ft_start) > 1) { @@ -195,7 +258,6 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, # ft_start/ft_end are indices .Persistence <- function(x, dates, time_dim = 'time', start, end, ft_start = 1, ft_end = 1, max_ft = 10, nmemb = 1, na.action = 10) { - tm <- end - start + 1 max_date <- match(start, dates) interval <- ft_end - ft_start @@ -204,7 +266,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, persistence <- matrix(NA, nrow = nmemb, ncol = tm) names(dim(persistence)) <- c('realization', time_dim) - for (sdate in tm:1){ + for (sdate in tm:1) { min_y = max_ft + ft_start max_y = max_date + sdate - 2 min_x = max_ft # for extreme case: ex. forecast years 1-10, interval = 9 @@ -243,9 +305,9 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, persistence.predint[sdate] <- stdev_reg * sqrt(1 + 1 / n + X_sq / S_sq) AR.slope[sdate] <- a AR.intercept[sdate] <- b - AR.lowCI[sdate] <- reg$regression[1] - AR.highCI[sdate] <- reg$regression[3] - persistence[ ,sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], + AR.lowCI[sdate] <- reg$conf.lower[2] + AR.highCI[sdate] <- reg$conf.upper[2] + persistence[ , sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], sd = persistence.predint[sdate]) } diff --git a/man/Persistence.Rd b/man/Persistence.Rd index 358263392df4fc16b9cf87e26db88d9402dd5449..84b846480df3f073ba876607ea94978c51397dfc 100644 --- a/man/Persistence.Rd +++ b/man/Persistence.Rd @@ -24,17 +24,19 @@ including the time dimension along which the autoregression is computed. The data should start at least 40 time steps (years or days) before 'start'.} -\item{dates}{A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) -indicating the dates available in the observations.} +\item{dates}{A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) +in class 'Date' indicating the dates available in the observations.} \item{time_dim}{A character string indicating the dimension along which to compute the autoregression. The default value is 'time'.} -\item{start}{A 4-digit integer (YYYY) or a date in the ISOdate format -(YYYY-MM-DD) indicating the first start date of the persistence forecast.} +\item{start}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +indicating the first start date of the persistence forecast. It must be +between 1850 and 2020.} -\item{end}{A 4-digit integer (YYYY) or a date in the ISOdate format -(YYYY-MM-DD) indicating the last start date of the persistence forecast.} +\item{end}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +indicating the last start date of the persistence forecast. It must be +between 1850 and 2020.} \item{ft_start}{An integer indicating the forecast time for which the persistence forecast should be calculated, or the first forecast time of @@ -62,32 +64,32 @@ computation. The default value is NULL.} } \value{ A list containing: -\item{$persistence}{ +\item{$persistence} { A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes containing the persistence forecast. } -\item{$persistence.mean}{ +\item{$persistence.mean} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the ensemble mean persistence forecast. } -\item{$persistence.predint}{ +\item{$persistence.predint} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the prediction interval of the persistence forecast. } -\item{$AR.slope}{ +\item{$AR.slope} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the slope coefficient of the autoregression. } -\item{$AR.intercept}{ +\item{$AR.intercept} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the intercept coefficient of the autoregression. } -\item{$AR.lowCI}{ +\item{$AR.lowCI} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the lower value of the confidence interval of the autoregression. } -\item{$AR.highCI}{ +\item{$AR.highCI} { A numeric array with same dimensions as 'persistence', except the 'memb' dimension which is of length 1, containing the upper value of the confidence interval of the autoregression. @@ -99,12 +101,21 @@ observational data along the time dimension, with a measure of forecast uncertainty (prediction interval) based on Coelho et al., 2004.\cr\cr } \examples{ -#Building an example dataset with yearly start dates from 1920 to 2009 +# Case 1: year +# Building an example dataset with yearly start dates from 1920 to 2009 set.seed(1) obs1 <- rnorm(1 * 70 * 6 * 7) dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7) -dates <- seq(1940, 2009, 1) -persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, - nmemb = 40) +dates <- seq(1920, 1989, 1) +res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1, + nmemb = 40) +# Case 2: day +dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +start <- as.Date(ISOdate(1990, 2, 15)) +end <- as.Date(ISOdate(1990, 4, 1)) +set.seed(1) +data <- rnorm(1 * length(dates) * 6 * 7) +dim(data) <- c(member = 1, time = length(dates), lat = 6, lon = 7) +res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1) } diff --git a/tests/testthat/test-Persistence.R b/tests/testthat/test-Persistence.R index a75c51830d7164fa4131309b5272ad15563d83fb..2ce2dec47fe1872a183c46338b3fbcf946bef5b5 100644 --- a/tests/testthat/test-Persistence.R +++ b/tests/testthat/test-Persistence.R @@ -1,9 +1,23 @@ context("s2dv::Persistence tests") ############################################## - set.seed(1) - dat1 <- array(rnorm(540), dim = c(member = 1, time = 90, lat = 2, lon = 3)) - dates1 <- seq(1920, 2009, 1) +#dat1: year +set.seed(1) +dat1 <- rnorm(1 * 70 * 6 * 7) +dim(dat1) <- c(member = 1, time = 70, lat = 6, lon = 7) +dates1 <- seq(1920, 1989, 1) +start1 <- 1961 +end1 <- 1990 +res <- Persistence(obs1, dates = dates1, start = 1961, end = 1990, ft_start = 1, + nmemb = 40) + +#dat2: day +dates2 <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +set.seed(2) +dat2 <- rnorm(1 * length(dates2) * 6 * 7) +dim(dat2) <- c(member = 1, time = length(dates2), lat = 6, lon = 7) +start2 <- as.Date(ISOdate(1990, 2, 15)) +end2 <- as.Date(ISOdate(1990, 4, 1)) ############################################## test_that("1. Input checks", { @@ -21,10 +35,6 @@ test_that("1. Input checks", { "Parameter 'data' must have dimension names." ) expect_error( - Persistence(data = dat1, dates = seq(1900, 2009, 1)), - "Parameter 'dates' must have the same length as in 'time_dim'." - ) - expect_error( Persistence(data = dat1, dates = dates1, time_dim = 12), "Parameter 'time_dim' must be a character string." ) @@ -33,34 +43,76 @@ test_that("1. Input checks", { "Parameter 'time_dim' is not found in 'data' dimension." ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 0.5), + Persistence(data = dat1, dates = c(1:10)), + paste0("Parameter 'dates' must be a sequence of integer \\(YYYY\\) or ", + "string \\(YYYY-MM-DD\\) in class 'Date'.") + ) + expect_error( + Persistence(data = dat1, dates = seq(1900, 2009, 1)), + "Parameter 'dates' must have the same length as in 'time_dim'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = start1, end = end2), + "Parameter 'dates', 'start', and 'end' should be the same format." + ) + # start + expect_error( + Persistence(data = dat1, dates = dates1, start = 1800, end = end1), + paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1851, end = end1), + "Parameter 'start' must be one of the values of 'dates'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1921, end = end1), + paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.") + ) + # end + expect_error( + Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(2021, 1, 1))), + paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") + ) + expect_error( + Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(1990, 4, 3))), + paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.") + ) + # ft_start + expect_error( + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 0.5), "Parameter 'ft_start' must be a positive integer." ) + # ft_end expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ft_end = 12), "Parameter 'ft_end' must be a positive integer below 'max_ft'." ) + # max_ft expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ft_end = 12, max_ft = 13.5), "Parameter 'max_ft' must be a positive integer." ) + # nmemb expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, - nmemb = 0), + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, nmemb = 0), "Parameter 'nmemb' must be a positive integer." ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, na.action = T), paste0("Parameter 'na.action' must be a function for NA values or ", "a numeric indicating the number of NA values allowed ", "before returning NA.") ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ncores = 0), "Parameter 'ncores' must be a positive integer." ) @@ -69,7 +121,7 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { - res <- Persistence(dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1) + res <- Persistence(dat1, dates = dates1, start = start1, end = end1, ft_start = 1) expect_equal( names(res), @@ -78,26 +130,25 @@ test_that("2. Output checks: dat1", { ) expect_equal( dim(res$persistence), - c(realization = 1, time = 45, member = 1, lat = 2, lon = 3) + c(realization = 1, time = 30, member = 1, lat = 6, lon = 7) ) expect_equal( dim(res$persistence.mean), - c(45, member = 1, lat = 2, lon = 3) - ) - expect_equal( - mean(res$persistence), - 0.03481641, - tolerance = 0.00001 + c(30, member = 1, lat = 6, lon = 7) ) +}) + +############################################## +test_that("2. Output checks: dat1", { + res <- Persistence(dat2, dates = dates2, start = start2, end = end2, ft_start = 1) + expect_equal( - range(res$persistence), - c(-1.025059, 1.042929), - tolerance = 0.0001 + names(res), + c('persistence', 'persistence.mean', 'persistence.predint', 'AR.slope', + 'AR.intercept', 'AR.lowCI', 'AR.highCI') ) expect_equal( - range(res$AR.slope), - c(-0.2636489, 0.2334777), - tolerance = 0.0001 + dim(res$persistence), + c(realization = 1, time = 46, member = 1, lat = 6, lon = 7) ) }) -