Commit 84613b95 authored by aho's avatar aho
Browse files

Merge branch 'develop-persistence_check' into 'master'

Develop persistence check

See merge request !38
parents f329134a 4a36c100
Pipeline #5130 passed with stage
in 3 minutes and 52 seconds
......@@ -2,6 +2,7 @@
- Add parameter 'memb_dim' and 'memb' in Corr(). They allow the existence of the member dimension
which can have different length between exp and obs, and users can choose to do the ensemble mean
first before correlation or calculate the correlation for individual member.
- Improve Persistence() input checks, and correct the output 'AR.lowCI' and 'AR.highCI'.
# s2dv 0.1.1 (Release date: 2020-11-16)
- Change the lincense to Apache License 2.0.
......
......@@ -8,14 +8,16 @@
#' including the time dimension along which the autoregression is computed.
#' The data should start at least 40 time steps (years or days) before
#' 'start'.
#'@param dates A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD)
#' indicating the dates available in the observations.
#'@param dates A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD)
#' in class 'Date' indicating the dates available in the observations.
#'@param time_dim A character string indicating the dimension along which to
#' compute the autoregression. The default value is 'time'.
#'@param start A 4-digit integer (YYYY) or a date in the ISOdate format
#' (YYYY-MM-DD) indicating the first start date of the persistence forecast.
#'@param end A 4-digit integer (YYYY) or a date in the ISOdate format
#' (YYYY-MM-DD) indicating the last start date of the persistence forecast.
#'@param start A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date'
#' indicating the first start date of the persistence forecast. It must be
#' between 1850 and 2020.
#'@param end A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date'
#' indicating the last start date of the persistence forecast. It must be
#' between 1850 and 2020.
#'@param ft_start An integer indicating the forecast time for which the
#' persistence forecast should be calculated, or the first forecast time of
#' the average forecast times for which persistence should be calculated.
......@@ -37,45 +39,54 @@
#'
#'@return
#'A list containing:
#'\item{$persistence}{
#'\item{$persistence} {
#' A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes
#' containing the persistence forecast.
#'}
#'\item{$persistence.mean}{
#'\item{$persistence.mean} {
#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension
#' which is of length 1, containing the ensemble mean persistence forecast.
#'}
#'\item{$persistence.predint}{
#'\item{$persistence.predint} {
#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension
#' which is of length 1, containing the prediction interval of the persistence forecast.
#'}
#'\item{$AR.slope}{
#'\item{$AR.slope} {
#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension
#' which is of length 1, containing the slope coefficient of the autoregression.
#'}
#'\item{$AR.intercept}{
#'\item{$AR.intercept} {
#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension
#' which is of length 1, containing the intercept coefficient of the autoregression.
#'}
#'\item{$AR.lowCI}{
#'\item{$AR.lowCI} {
#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension
#' which is of length 1, containing the lower value of the confidence interval of the
#' autoregression.
#'}
#'\item{$AR.highCI}{
#'\item{$AR.highCI} {
#' A numeric array with same dimensions as 'persistence', except the 'memb' dimension
#' which is of length 1, containing the upper value of the confidence interval of the
#' autoregression.
#'}
#'
#'@examples
#'#Building an example dataset with yearly start dates from 1920 to 2009
#'# Case 1: year
#'# Building an example dataset with yearly start dates from 1920 to 2009
#'set.seed(1)
#'obs1 <- rnorm(1 * 70 * 6 * 7)
#'dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7)
#'dates <- seq(1940, 2009, 1)
#'persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1,
#' nmemb = 40)
#'dates <- seq(1920, 1989, 1)
#'res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1,
#' nmemb = 40)
#'# Case 2: day
#'dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1)
#'start <- as.Date(ISOdate(1990, 2, 15))
#'end <- as.Date(ISOdate(1990, 4, 1))
#'set.seed(1)
#'data <- rnorm(1 * length(dates) * 6 * 7)
#'dim(data) <- c(member = 1, time = length(dates), lat = 6, lon = 7)
#'res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1)
#'
#'@import multiApply
#'@export
......@@ -95,7 +106,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start,
dim(data) <- c(length(data))
names(dim(data)) <- time_dim
}
if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) {
if(any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) {
stop("Parameter 'data' must have dimension names.")
}
## time_dim
......@@ -106,27 +117,79 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start,
stop("Parameter 'time_dim' is not found in 'data' dimension.")
}
## dates
if (is.numeric(dates)) { #(YYYY)
if (any(nchar(dates) != 4) | any(dates %% 1 != 0) | any(dates <= 0)) {
stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ",
"string (YYYY-MM-DD) in class 'Date'."))
}
} else if (class(dates) == 'Date') { #(YYYY-MM-DD)
} else {
stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ",
"string (YYYY-MM-DD) in class 'Date'."))
}
if (length(dates) != dim(data)[time_dim]) {
stop("Parameter 'dates' must have the same length as in 'time_dim'.")
}
## dates, start, and end
if (!all(sapply(list(class(dates), class(start)), function(x) x == class(end)))) {
stop("Parameter 'dates', 'start', and 'end' should be the same format.")
}
## start
# if (!is.numeric(start) | start %% 1 != 0 | start < 0 |
# length(start) > 1 | start < 1850 | start > 2020) {
# stop("Parameter 'start' must be an integer between 1850 and 2020.")
# }
# if (start < dates[1] + 40) {
# stop("Parameter 'start' must start at least 40 time steps after the
# first start date of 'data'.")
# }
if (is.numeric(start)) { #(YYYY)
if (length(start) > 1 | any(start %% 1 != 0) | any(start < 1850) | any(start > 2020)) {
stop(paste0("Parameter 'start' must be an integer or a string in class ",
"'Date' between 1850 and 2020."))
}
if (is.na(match(start, dates))) {
stop("Parameter 'start' must be one of the values of 'dates'.")
}
if (start < dates[1] + 40) {
stop(paste0("Parameter 'start' must start at least 40 time steps after ",
"the first 'dates'."))
}
} else if (class(start) == 'Date') {
if (length(start) > 1 | any(start < as.Date(ISOdate(1850, 1, 1))) |
any(start > as.Date(ISOdate(2021, 1, 1)))) {
stop(paste0("Parameter 'start' must be an integer or a string in class ",
"'Date' between 1850 and 2020."))
}
if (is.na(match(start, dates))) {
stop("Parameter 'start' must be one of the values of 'dates'.")
}
if (start < dates[1] + 40) {
stop(paste0("Parameter 'start' must start at least 40 time steps after ",
"the first 'dates'."))
}
} else {
stop(paste0("Parameter 'start' must be an integer or a string in class ",
"'Date' between 1850 and 2020."))
}
## end
# if (!is.numeric(end) | end %% 1 != 0 | end < 0 |
# length(end) > 1 | end < 1850 | end > 2020) {
# stop("Parameter 'end' must be an integer between 1850 and 2020.")
# }
# if (end > dates[length(dates)] + 1) {
# stop("Parameter 'end' must end at most 1 time step after the
# last start date of 'data'.")
# }
if (is.numeric(end)) { #(YYYY)
if (length(end) > 1 | any(end %% 1 != 0) | any(end < 1850) | any(end > 2020)) {
stop(paste0("Parameter 'end' must be an integer or a string in class ",
"'Date' between 1850 and 2020."))
}
if (end > dates[length(dates)] + 1) {
stop(paste0("Parameter 'end' must end at most 1 time steps after ",
"the last 'dates'."))
}
} else if (class(end) == 'Date') {
if (length(end) > 1 | any(end < as.Date(ISOdate(1850, 1, 1))) |
any(end > as.Date(ISOdate(2020, 12, 31)))) {
stop(paste0("Parameter 'end' must be an integer or a string in class ",
"'Date' between 1850 and 2020."))
}
if (end > dates[length(dates)] + 1) {
stop(paste0("Parameter 'end' must end at most 1 time steps after ",
"the last 'dates'."))
}
} else {
stop(paste0("Parameter 'end' must be an integer or a string in class ",
"'Date' between 1850 and 2020."))
}
## ft_start
if (!is.numeric(ft_start) | ft_start %% 1 != 0 | ft_start < 0 |
length(ft_start) > 1) {
......@@ -195,7 +258,6 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start,
# ft_start/ft_end are indices
.Persistence <- function(x, dates, time_dim = 'time', start, end, ft_start = 1,
ft_end = 1, max_ft = 10, nmemb = 1, na.action = 10) {
tm <- end - start + 1
max_date <- match(start, dates)
interval <- ft_end - ft_start
......@@ -204,7 +266,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start,
persistence <- matrix(NA, nrow = nmemb, ncol = tm)
names(dim(persistence)) <- c('realization', time_dim)
for (sdate in tm:1){
for (sdate in tm:1) {
min_y = max_ft + ft_start
max_y = max_date + sdate - 2
min_x = max_ft # for extreme case: ex. forecast years 1-10, interval = 9
......@@ -243,9 +305,9 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start,
persistence.predint[sdate] <- stdev_reg * sqrt(1 + 1 / n + X_sq / S_sq)
AR.slope[sdate] <- a
AR.intercept[sdate] <- b
AR.lowCI[sdate] <- reg$regression[1]
AR.highCI[sdate] <- reg$regression[3]
persistence[ ,sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate],
AR.lowCI[sdate] <- reg$conf.lower[2]
AR.highCI[sdate] <- reg$conf.upper[2]
persistence[ , sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate],
sd = persistence.predint[sdate])
}
......
......@@ -24,17 +24,19 @@ including the time dimension along which the autoregression is computed.
The data should start at least 40 time steps (years or days) before
'start'.}
\item{dates}{A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD)
indicating the dates available in the observations.}
\item{dates}{A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD)
in class 'Date' indicating the dates available in the observations.}
\item{time_dim}{A character string indicating the dimension along which to
compute the autoregression. The default value is 'time'.}
\item{start}{A 4-digit integer (YYYY) or a date in the ISOdate format
(YYYY-MM-DD) indicating the first start date of the persistence forecast.}
\item{start}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date'
indicating the first start date of the persistence forecast. It must be
between 1850 and 2020.}
\item{end}{A 4-digit integer (YYYY) or a date in the ISOdate format
(YYYY-MM-DD) indicating the last start date of the persistence forecast.}
\item{end}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date'
indicating the last start date of the persistence forecast. It must be
between 1850 and 2020.}
\item{ft_start}{An integer indicating the forecast time for which the
persistence forecast should be calculated, or the first forecast time of
......@@ -62,32 +64,32 @@ computation. The default value is NULL.}
}
\value{
A list containing:
\item{$persistence}{
\item{$persistence} {
A numeric array with dimensions 'memb', time (start dates), latitudes and longitudes
containing the persistence forecast.
}
\item{$persistence.mean}{
\item{$persistence.mean} {
A numeric array with same dimensions as 'persistence', except the 'memb' dimension
which is of length 1, containing the ensemble mean persistence forecast.
}
\item{$persistence.predint}{
\item{$persistence.predint} {
A numeric array with same dimensions as 'persistence', except the 'memb' dimension
which is of length 1, containing the prediction interval of the persistence forecast.
}
\item{$AR.slope}{
\item{$AR.slope} {
A numeric array with same dimensions as 'persistence', except the 'memb' dimension
which is of length 1, containing the slope coefficient of the autoregression.
}
\item{$AR.intercept}{
\item{$AR.intercept} {
A numeric array with same dimensions as 'persistence', except the 'memb' dimension
which is of length 1, containing the intercept coefficient of the autoregression.
}
\item{$AR.lowCI}{
\item{$AR.lowCI} {
A numeric array with same dimensions as 'persistence', except the 'memb' dimension
which is of length 1, containing the lower value of the confidence interval of the
autoregression.
}
\item{$AR.highCI}{
\item{$AR.highCI} {
A numeric array with same dimensions as 'persistence', except the 'memb' dimension
which is of length 1, containing the upper value of the confidence interval of the
autoregression.
......@@ -99,12 +101,21 @@ observational data along the time dimension, with a measure of forecast
uncertainty (prediction interval) based on Coelho et al., 2004.\cr\cr
}
\examples{
#Building an example dataset with yearly start dates from 1920 to 2009
# Case 1: year
# Building an example dataset with yearly start dates from 1920 to 2009
set.seed(1)
obs1 <- rnorm(1 * 70 * 6 * 7)
dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7)
dates <- seq(1940, 2009, 1)
persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1,
nmemb = 40)
dates <- seq(1920, 1989, 1)
res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1,
nmemb = 40)
# Case 2: day
dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1)
start <- as.Date(ISOdate(1990, 2, 15))
end <- as.Date(ISOdate(1990, 4, 1))
set.seed(1)
data <- rnorm(1 * length(dates) * 6 * 7)
dim(data) <- c(member = 1, time = length(dates), lat = 6, lon = 7)
res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1)
}
context("s2dv::Persistence tests")
##############################################
set.seed(1)
dat1 <- array(rnorm(540), dim = c(member = 1, time = 90, lat = 2, lon = 3))
dates1 <- seq(1920, 2009, 1)
#dat1: year
set.seed(1)
dat1 <- rnorm(1 * 70 * 6 * 7)
dim(dat1) <- c(member = 1, time = 70, lat = 6, lon = 7)
dates1 <- seq(1920, 1989, 1)
start1 <- 1961
end1 <- 1990
res <- Persistence(obs1, dates = dates1, start = 1961, end = 1990, ft_start = 1,
nmemb = 40)
#dat2: day
dates2 <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1)
set.seed(2)
dat2 <- rnorm(1 * length(dates2) * 6 * 7)
dim(dat2) <- c(member = 1, time = length(dates2), lat = 6, lon = 7)
start2 <- as.Date(ISOdate(1990, 2, 15))
end2 <- as.Date(ISOdate(1990, 4, 1))
##############################################
test_that("1. Input checks", {
......@@ -21,10 +35,6 @@ test_that("1. Input checks", {
"Parameter 'data' must have dimension names."
)
expect_error(
Persistence(data = dat1, dates = seq(1900, 2009, 1)),
"Parameter 'dates' must have the same length as in 'time_dim'."
)
expect_error(
Persistence(data = dat1, dates = dates1, time_dim = 12),
"Parameter 'time_dim' must be a character string."
)
......@@ -33,34 +43,76 @@ test_that("1. Input checks", {
"Parameter 'time_dim' is not found in 'data' dimension."
)
expect_error(
Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 0.5),
Persistence(data = dat1, dates = c(1:10)),
paste0("Parameter 'dates' must be a sequence of integer \\(YYYY\\) or ",
"string \\(YYYY-MM-DD\\) in class 'Date'.")
)
expect_error(
Persistence(data = dat1, dates = seq(1900, 2009, 1)),
"Parameter 'dates' must have the same length as in 'time_dim'."
)
expect_error(
Persistence(data = dat1, dates = dates1, start = start1, end = end2),
"Parameter 'dates', 'start', and 'end' should be the same format."
)
# start
expect_error(
Persistence(data = dat1, dates = dates1, start = 1800, end = end1),
paste0("Parameter 'start' must be an integer or a string in class ",
"'Date' between 1850 and 2020.")
)
expect_error(
Persistence(data = dat1, dates = dates1, start = 1851, end = end1),
"Parameter 'start' must be one of the values of 'dates'."
)
expect_error(
Persistence(data = dat1, dates = dates1, start = 1921, end = end1),
paste0("Parameter 'start' must start at least 40 time steps after ",
"the first 'dates'.")
)
# end
expect_error(
Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(2021, 1, 1))),
paste0("Parameter 'end' must be an integer or a string in class ",
"'Date' between 1850 and 2020.")
)
expect_error(
Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(1990, 4, 3))),
paste0("Parameter 'end' must end at most 1 time steps after ",
"the last 'dates'.")
)
# ft_start
expect_error(
Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 0.5),
"Parameter 'ft_start' must be a positive integer."
)
# ft_end
expect_error(
Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1,
Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1,
ft_end = 12),
"Parameter 'ft_end' must be a positive integer below 'max_ft'."
)
# max_ft
expect_error(
Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1,
Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1,
ft_end = 12, max_ft = 13.5),
"Parameter 'max_ft' must be a positive integer."
)
# nmemb
expect_error(
Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1,
nmemb = 0),
Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, nmemb = 0),
"Parameter 'nmemb' must be a positive integer."
)
expect_error(
Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1,
Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1,
na.action = T),
paste0("Parameter 'na.action' must be a function for NA values or ",
"a numeric indicating the number of NA values allowed ",
"before returning NA.")
)
expect_error(
Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1,
Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1,
ncores = 0),
"Parameter 'ncores' must be a positive integer."
)
......@@ -69,7 +121,7 @@ test_that("1. Input checks", {
##############################################
test_that("2. Output checks: dat1", {
res <- Persistence(dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1)
res <- Persistence(dat1, dates = dates1, start = start1, end = end1, ft_start = 1)
expect_equal(
names(res),
......@@ -78,26 +130,25 @@ test_that("2. Output checks: dat1", {
)
expect_equal(
dim(res$persistence),
c(realization = 1, time = 45, member = 1, lat = 2, lon = 3)
c(realization = 1, time = 30, member = 1, lat = 6, lon = 7)
)
expect_equal(
dim(res$persistence.mean),
c(45, member = 1, lat = 2, lon = 3)
)
expect_equal(
mean(res$persistence),
0.03481641,
tolerance = 0.00001
c(30, member = 1, lat = 6, lon = 7)
)
})
##############################################
test_that("2. Output checks: dat1", {
res <- Persistence(dat2, dates = dates2, start = start2, end = end2, ft_start = 1)
expect_equal(
range(res$persistence),
c(-1.025059, 1.042929),
tolerance = 0.0001
names(res),
c('persistence', 'persistence.mean', 'persistence.predint', 'AR.slope',
'AR.intercept', 'AR.lowCI', 'AR.highCI')
)
expect_equal(
range(res$AR.slope),
c(-0.2636489, 0.2334777),
tolerance = 0.0001
dim(res$persistence),
c(realization = 1, time = 46, member = 1, lat = 6, lon = 7)
)
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment