diff --git a/NAMESPACE b/NAMESPACE index b196f73d46805ea5225d923cd586a4865307a525..fb340b8829527aaf9ec0373900401664ba4f0ad4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(InsertDim) export(LeapYear) export(Load) export(MeanDims) +export(Persistence) export(PlotAno) export(PlotClim) export(PlotEquiMap) diff --git a/R/Persistence.R b/R/Persistence.R new file mode 100644 index 0000000000000000000000000000000000000000..88e15d2375193ba5d3dd03edb174dc5b1c9fb7ae --- /dev/null +++ b/R/Persistence.R @@ -0,0 +1,257 @@ +#'Compute persistence +#' +#'Compute a persistence forecast based on a lagged autoregression of +#'observational data along the time dimension, with a measure of forecast +#'uncertainty (prediction interval) based on Coelho et al., 2004.\cr\cr +#' +#'@param data A numeric array corresponding to the observational data +#' including the time dimension along which the autoregression is computed. +#' The data should start at least 40 time steps (years or days) before +#' 'start'. +#'@param dates A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) +#' indicating the dates available in the observations. +#'@param time_dim A character string indicating the dimension along which to +#' compute the autoregression. The default value is 'time'. +#'@param start A 4-digit integer (YYYY) or a date in the ISOdate format +#' (YYYY-MM-DD) indicating the first start date of the persistence forecast. +#'@param end A 4-digit integer (YYYY) or a date in the ISOdate format +#' (YYYY-MM-DD) indicating the last start date of the persistence forecast. +#'@param ft_start An integer indicating the forecast time for which the +#' persistence forecast should be calculated, or the first forecast time of +#' the average forecast times for which persistence should be calculated. +#'@param ft_end An (optional) integer indicating the last forecast time of the +#' average forecast times for which persistence should be calculated in the +#' case of a multi-timestep average persistence. The default value is +#' 'ft_start'. +#'@param max_ft An integer indicating the maximum forecast time possible for +#' 'data'. For example, for decadal prediction 'max_ft' would correspond to 10 +#' (years). The default value is 10. +#'@param nmemb An integer indicating the number of ensemble members to generate +#' for the persistence forecast. The default value is 1. +#'@param na.action A function or an integer. A function (e.g., na.omit, +#' na.exclude, na.fail, na.pass) indicates what should happen when the data +#' contain NAs. A numeric indicates the maximum number of NA position allowed +#' to compute regression. The default value is 10. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{$persistence}{ +#' A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes +#' containing the persistence forecast. +#'} +#'\item{$persistence.mean}{ +#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension +#' which is of length 1, containing the ensemble mean persistence forecast. +#'} +#'\item{$persistence.predint}{ +#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension +#' which is of length 1, containing the prediction interval of the persistence forecast. +#'} +#'\item{$AR.slope}{ +#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension +#' which is of length 1, containing the slope coefficient of the autoregression. +#'} +#'\item{$AR.intercept}{ +#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension +#' which is of length 1, containing the intercept coefficient of the autoregression. +#'} +#'\item{$AR.lowCI}{ +#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension +#' which is of length 1, containing the lower value of the confidence interval of the +#' autoregression. +#'} +#'\item{$AR.highCI}{ +#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension +#' which is of length 1, containing the upper value of the confidence interval of the +#' autoregression. +#'} +#' +#'@examples +#'#Building an example dataset with yearly start dates from 1920 to 2009 +#'obs1 <- 1 : (1 * 90 * 6 * 7) +#'dim(obs1) <- c(member = 1, time = 90, lat = 6, lon = 7) +#'dates <- seq(1920, 2009, 1) +#'persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, +#' nmemb = 40) +#' +#'@author Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} +#'@import multiApply +#'@export +Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, + ft_end = ft_start, max_ft = 10, nmemb = 1, na.action = 10, + ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## dates + if (length(dates) != dim(data)[time_dim]) { + stop("Parameter 'dates' must have the same length as in 'time_dim'.") + } + ## start +# if (!is.numeric(start) | start %% 1 != 0 | start < 0 | +# length(start) > 1 | start < 1850 | start > 2020) { +# stop("Parameter 'start' must be an integer between 1850 and 2020.") +# } +# if (start < dates[1] + 40) { +# stop("Parameter 'start' must start at least 40 time steps after the +# first start date of 'data'.") +# } + ## end +# if (!is.numeric(end) | end %% 1 != 0 | end < 0 | +# length(end) > 1 | end < 1850 | end > 2020) { +# stop("Parameter 'end' must be an integer between 1850 and 2020.") +# } +# if (end > dates[length(dates)] + 1) { +# stop("Parameter 'end' must end at most 1 time step after the +# last start date of 'data'.") +# } + ## ft_start + if (!is.numeric(ft_start) | ft_start %% 1 != 0 | ft_start < 0 | + length(ft_start) > 1) { + stop("Parameter 'ft_start' must be a positive integer.") + } + ## ft_end + if (!is.numeric(ft_end) | ft_end %% 1 != 0 | ft_end < 0 | + length(ft_end) > 1 | ft_end > max_ft) { + stop("Parameter 'ft_end' must be a positive integer below 'max_ft'.") + } + ## max_ft + if (!is.numeric(max_ft) | max_ft %% 1 != 0 | max_ft < 0 | + length(max_ft) > 1) { + stop("Parameter 'max_ft' must be a positive integer.") + } + ## nmemb + if (!is.numeric(nmemb) | nmemb %% 1 != 0 | nmemb <= 0 | + length(nmemb) > 1) { + stop("Parameter 'nmemb' must be a positive integer.") + } + ## na.action + if (!is.function(na.action) & !is.numeric(na.action)) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) + } + if (is.numeric(na.action)) { + if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { + stop(paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.")) + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Persistence + dim_names <- names(dim(data)) + + + output <- Apply(list(data), + target_dims = time_dim, + fun = .Persistence, +# output_dims = output_dims, + dates = dates, + time_dim = time_dim, + start = start, + end = end, + ft_start = ft_start, + ft_end = ft_end, + max_ft = max_ft, + nmemb = nmemb, + na.action = na.action, + ncores = ncores) + + return(output) +} +# x could be a vector timeseries +# start/end is a year (4-digit numeric) or a date (ISOdate) +# ft_start/ft_end are indices +.Persistence <- function(x, dates, time_dim = 'time', start, end, ft_start = 1, + ft_end = 1, max_ft = 10, nmemb = 1, na.action = 10) { + + tm <- end - start + 1 + max_date <- match(start, dates) + interval <- ft_end - ft_start + persistence.mean <- persistence.predint <- NULL + AR.slope <- AR.intercept <- AR.lowCI <- AR.highCI <- NULL + persistence <- matrix(NA, nrow = nmemb, ncol = tm) + names(dim(persistence)) <- c('realization', time_dim) + + for (sdate in tm:1){ + min_y = max_ft + ft_start + max_y = max_date + sdate - 2 + min_x = max_ft # for extreme case: ex. forecast years 1-10, interval = 9 + max_x = max_date + sdate - 2 - ft_start + + regdates = max_y - min_y + 1 + + for (val_x in min_x:max_x) { + tmp_x <- mean(x[(val_x - interval):val_x]) + if (val_x == min_x){ + obs_x <- tmp_x + } else { + obs_x <- c(obs_x, tmp_x) + } + } + + for (val_y in min_y:max_y) { + tmp_y <- mean(x[val_y:(val_y + interval)]) + if (val_y == min_y) { + obs_y <- tmp_y + } else { + obs_y <- c(obs_y, tmp_y) + } + } + reg <- .Regression(obs_y, obs_x, na.action = na.action) + len <- length(obs_x) + a <- reg$regression[2] # slope + b <- reg$regression[1] # intercept + CI <- abs(reg$conf.upper[2] - reg$conf.lower[2]) # confidence interval + stdev_reg <- CI / 1.96 + n <- max_x + X_sq <- (obs_x[len] - mean(obs_x)) ** 2 + S_sq <- sum((obs_x[1:len] - mean(obs_x)) ** 2) + + persistence.mean[sdate] <- a * mean(x[(max_y - interval):max_y]) + b + persistence.predint[sdate] <- stdev_reg * sqrt(1 + 1 / n + X_sq / S_sq) + AR.slope[sdate] <- a + AR.intercept[sdate] <- b + AR.lowCI[sdate] <- reg$regression[1] + AR.highCI[sdate] <- reg$regression[3] + persistence[ ,sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], + sd = persistence.predint[sdate]) + } + + return(list(persistence = persistence, persistence.mean = persistence.mean, + persistence.predint = persistence.predint, AR.slope = AR.slope, + AR.intercept = AR.intercept, AR.lowCI = AR.lowCI, + AR.highCI = AR.highCI)) + +} diff --git a/man/Persistence.Rd b/man/Persistence.Rd new file mode 100644 index 0000000000000000000000000000000000000000..4cbacf17c99367f42895258d861923db05953586 --- /dev/null +++ b/man/Persistence.Rd @@ -0,0 +1,103 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Persistence.R +\name{Persistence} +\alias{Persistence} +\title{Compute persistence} +\usage{ +Persistence(data, dates, time_dim = "time", start, end, ft_start, + ft_end = ft_start, max_ft = 10, nmemb = 1, na.action = 10, + ncores = NULL) +} +\arguments{ +\item{data}{A numeric array corresponding to the observational data +including the time dimension along which the autoregression is computed. +The data should start at least 40 time steps (years or days) before +'start'.} + +\item{dates}{A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) +indicating the dates available in the observations.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the autoregression. The default value is 'time'.} + +\item{start}{A 4-digit integer (YYYY) or a date in the ISOdate format +(YYYY-MM-DD) indicating the first start date of the persistence forecast.} + +\item{end}{A 4-digit integer (YYYY) or a date in the ISOdate format +(YYYY-MM-DD) indicating the last start date of the persistence forecast.} + +\item{ft_start}{An integer indicating the forecast time for which the +persistence forecast should be calculated, or the first forecast time of +the average forecast times for which persistence should be calculated.} + +\item{ft_end}{An (optional) integer indicating the last forecast time of the +average forecast times for which persistence should be calculated in the +case of a multi-timestep average persistence. The default value is +'ft_start'.} + +\item{max_ft}{An integer indicating the maximum forecast time possible for +'data'. For example, for decadal prediction 'max_ft' would correspond to 10 +(years). The default value is 10.} + +\item{nmemb}{An integer indicating the number of ensemble members to generate +for the persistence forecast. The default value is 1.} + +\item{na.action}{A function or an integer. A function (e.g., na.omit, +na.exclude, na.fail, na.pass) indicates what should happen when the data +contain NAs. A numeric indicates the maximum number of NA position allowed +to compute regression. The default value is 10.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{$persistence}{ + A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes + containing the persistence forecast. +} +\item{$persistence.mean}{ + A numeric array with same dimensions as 'persistence', except the 'memb' dimension + which is of length 1, containing the ensemble mean persistence forecast. +} +\item{$persistence.predint}{ + A numeric array with same dimensions as 'persistence', except the 'memb' dimension + which is of length 1, containing the prediction interval of the persistence forecast. +} +\item{$AR.slope}{ + A numeric array with same dimensions as 'persistence', except the 'memb' dimension + which is of length 1, containing the slope coefficient of the autoregression. +} +\item{$AR.intercept}{ + A numeric array with same dimensions as 'persistence', except the 'memb' dimension + which is of length 1, containing the intercept coefficient of the autoregression. +} +\item{$AR.lowCI}{ + A numeric array with same dimensions as 'persistence', except the 'memb' dimension + which is of length 1, containing the lower value of the confidence interval of the + autoregression. +} +\item{$AR.highCI}{ + A numeric array with same dimensions as 'persistence', except the 'memb' dimension + which is of length 1, containing the upper value of the confidence interval of the + autoregression. +} +} +\description{ +Compute a persistence forecast based on a lagged autoregression of +observational data along the time dimension, with a measure of forecast +uncertainty (prediction interval) based on Coelho et al., 2004.\cr\cr +} +\examples{ +#Building an example dataset with yearly start dates from 1920 to 2009 +obs1 <- 1 : (1 * 90 * 6 * 7) +dim(obs1) <- c(member = 1, time = 90, lat = 6, lon = 7) +dates <- seq(1920, 2009, 1) +persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, + nmemb = 40) + +} +\author{ +Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} +} + diff --git a/tests/testthat/test-Persistence.R b/tests/testthat/test-Persistence.R new file mode 100644 index 0000000000000000000000000000000000000000..a75c51830d7164fa4131309b5272ad15563d83fb --- /dev/null +++ b/tests/testthat/test-Persistence.R @@ -0,0 +1,103 @@ +context("s2dv::Persistence tests") + +############################################## + set.seed(1) + dat1 <- array(rnorm(540), dim = c(member = 1, time = 90, lat = 2, lon = 3)) + dates1 <- seq(1920, 2009, 1) + +############################################## +test_that("1. Input checks", { + + expect_error( + Persistence(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Persistence(data = 'a'), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Persistence(data = array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + expect_error( + Persistence(data = dat1, dates = seq(1900, 2009, 1)), + "Parameter 'dates' must have the same length as in 'time_dim'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, time_dim = 12), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Persistence(data = dat1, dates = dates1, time_dim = 'ftime'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 0.5), + "Parameter 'ft_start' must be a positive integer." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + ft_end = 12), + "Parameter 'ft_end' must be a positive integer below 'max_ft'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + ft_end = 12, max_ft = 13.5), + "Parameter 'max_ft' must be a positive integer." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + nmemb = 0), + "Parameter 'nmemb' must be a positive integer." + ) + + expect_error( + Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + na.action = T), + paste0("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + res <- Persistence(dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1) + + expect_equal( + names(res), + c('persistence', 'persistence.mean', 'persistence.predint', 'AR.slope', + 'AR.intercept', 'AR.lowCI', 'AR.highCI') + ) + expect_equal( + dim(res$persistence), + c(realization = 1, time = 45, member = 1, lat = 2, lon = 3) + ) + expect_equal( + dim(res$persistence.mean), + c(45, member = 1, lat = 2, lon = 3) + ) + expect_equal( + mean(res$persistence), + 0.03481641, + tolerance = 0.00001 + ) + expect_equal( + range(res$persistence), + c(-1.025059, 1.042929), + tolerance = 0.0001 + ) + expect_equal( + range(res$AR.slope), + c(-0.2636489, 0.2334777), + tolerance = 0.0001 + ) +}) +