diff --git a/DESCRIPTION b/DESCRIPTION index f88f28de7dbc8bf5ebf855c8b62d86043ce24318..f61512a0e6181afc99dc2581548aad8caf862372 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,9 @@ Version: 0.0.1 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), - person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = "aut")) + person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = "aut"), + person("Andrea", "Manrique", , "andrea.manrique@bsc.es", role = "ctb"), + person("Carlos", "Delgado", , "carlos.delgado@bsc.es", role = "ctb")) Description: The advanced version of package 's2dverification'. It is intended for 'seasonal to decadal' (s2d) climate forecast verification, but it can also be used in other kinds of forecasts or general climate analysis. diff --git a/NAMESPACE b/NAMESPACE index 3e6f87645156f7310c1c1712c085526afa91823b..b196f73d46805ea5225d923cd586a4865307a525 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,6 +32,7 @@ export(PlotSection) export(PlotStereoMap) export(RMS) export(RMSSS) +export(RandomWalkTest) export(Regression) export(Reorder) export(Season) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R new file mode 100644 index 0000000000000000000000000000000000000000..0771331ca953b5199b95dcdd73e06fc2c41ce0ac --- /dev/null +++ b/R/RandomWalkTest.R @@ -0,0 +1,85 @@ +#'Random walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common reference) based on Random Walks, with significance estimate at the 5% +#'confidence level, as in DelSole and Tippett (2015). +#' +#'@param skill_A A numerical array of the time series of the skill with the +#' forecaster A's. +#'@param skill_B A numerical array of the time series of the skill with the +#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill positively oriented). If $score is positive +#' forecaster A is better than forecaster B, and if $score is negative +#' forecaster B is better than forecaster B. +#'} +#'\item{$signif}{ +#' A logical array with the same dimensions as the input arrays except +#' 'time_dim'. Whether the difference is significant or not at the 5% +#' significance level. +#'} +#' +#'@examples +#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' skill_A <- abs(fcst_A - reference) +#' skill_B <- abs(fcst_B - reference) +#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +#' +#'@author Andrea Manrique \email{andrea.manrique@bsc.es} +#'@author Carlos Delgado-Torres \email{carlos.delgado@bsc.es} +#' +#'@import multiApply +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ + + ## Check inputs + if (is.null(skill_A) | is.null(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if(!is.numeric(skill_A) | !is.numeric(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A),dim(skill_B))){ + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + if(!is.character(time_dim)){ + stop("Parameter 'time_dim' must be a character string.") + } + if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | length(ncores) > 1){ + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- multiApply::Apply(data = list(skill_A, skill_B), + target_dims = time_dim, + fun = .RandomWalkTest, + ncores = ncores) + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B){ + + score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + + ## TRUE if significant (if last value is above or below 2*sqrt(N)) + signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), + yes = TRUE, no = FALSE) + + return(list("score"=score[length(skill_A)],"signif"=signif)) +} diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd new file mode 100644 index 0000000000000000000000000000000000000000..29465179a26fd40758787270263ec72069192030 --- /dev/null +++ b/man/RandomWalkTest.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RandomWalkTest.R +\name{RandomWalkTest} +\alias{RandomWalkTest} +\title{Random walk test for skill differences} +\usage{ +RandomWalkTest(skill_A, skill_B, time_dim = "sdate", ncores = NULL) +} +\arguments{ +\item{skill_A}{A numerical array of the time series of the skill with the +forecaster A's.} + +\item{skill_B}{A numerical array of the time series of the skill with the +forecaster B's. The dimensions should be identical as parameter 'skill_A'.} + +\item{time_dim}{A character string indicating the name of the dimension along +which the tests are computed. The default value is 'sdate'.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of 2: +\item{$score}{ + A numerical array with the same dimensions as the input arrays except + 'time_dim'. The number of times that forecaster A has been better than + forecaster B minus the number of times that forecaster B has been better + than forecaster A (for skill positively oriented). If $score is positive + forecaster A is better than forecaster B, and if $score is negative + forecaster B is better than forecaster B. +} +\item{$signif}{ + A logical array with the same dimensions as the input arrays except + 'time_dim'. Whether the difference is significant or not at the 5% + significance level. +} +} +\description{ +Forecast comparison of the skill obtained with 2 forecasts (with respect to a +common reference) based on Random Walks, with significance estimate at the 5% +confidence level, as in DelSole and Tippett (2015). +} +\examples{ +fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) +fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) +reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +skill_A <- abs(fcst_A - reference) +skill_B <- abs(fcst_B - reference) +RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) + +} +\author{ +Andrea Manrique \email{andrea.manrique@bsc.es} + +Carlos Delgado-Torres \email{carlos.delgado@bsc.es} +} + diff --git a/tests/testthat/test-RandomWalkTest.R b/tests/testthat/test-RandomWalkTest.R new file mode 100644 index 0000000000000000000000000000000000000000..d50b9ce9118910e7f7558dae1687b2f6467c0439 --- /dev/null +++ b/tests/testthat/test-RandomWalkTest.R @@ -0,0 +1,81 @@ +context("s2dv::RandomWalkTest tests") + +############################################## + set.seed(1) + dat1_A <- array(rnorm(64), dim = c(sdate = 4, ftime = 4, lat = 2, lon = 2)) + set.seed(2) + dat1_B <- array(rnorm(64), dim = c(sdate = 4, ftime = 4, lat = 2, lon = 2)) + + +############################################## +test_that("1. Input checks", { + + expect_error( + RandomWalkTest(c(), dat1_B), + "Parameters 'skill_A' and 'skill_B' cannot be NULL." + ) + expect_error( + RandomWalkTest(dat1_A, 'a'), + "Parameters 'skill_A' and 'skill_B' must be a numerical array." + ) + expect_error( + RandomWalkTest(array(1:10, dim = c(2, 5)), array(1:12, dim = c(2, 6))), + "Parameters 'skill_A' and 'skill_B' must have the same dimensions." + ) + expect_error( + RandomWalkTest(1:10, 2:11, time_dim = 12), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + RandomWalkTest(dat1_A, dat1_B, time_dim = 'date'), + "Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimension." + ) + expect_error( + RandomWalkTest(dat1_A, dat1_B, ncores = T), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + res <- RandomWalkTest(dat1_A, dat1_B) + + expect_equal( + length(res), + 2 + ) + expect_equal( + names(res), + c("score", "signif") + ) + expect_equal( + dim(res$score), + c(ftime = 4, lat = 2, lon = 2) + ) + expect_equal( + dim(res$signif), + c(ftime = 4, lat = 2, lon = 2) + ) + expect_equal( + is.numeric(res$score), + TRUE + ) + expect_equal( + is.logical(res$signif), + TRUE + ) + expect_equal( + mean(res$score, na.rm = T), + 0 + ) + expect_equal( + res$score[, 1, 1], + c(0, 0, -2, -2) + ) + expect_equal( + res$score[, 1, 2], + c(0, 4, 2, 0) + ) + +})