diff --git a/R/CRPSS.R b/R/CRPSS.R index e2b0df6edbd4cd1227c0e5c34991e013422c5048..6b1ed171a681e3e2b790060f47db1c556969a1b8 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -7,11 +7,10 @@ #'infinite and 1. If the CRPSS is positive, it indicates that the forecast has #'higher skill than the reference forecast, while a negative value means that it #'has a lower skill. Examples of reference forecasts are the climatological -#'forecast (same probabilities for all categories for all time steps), -#'persistence, a previous model version, or another model. It is computed as -#'CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained -#'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, -#'2016). +#'forecast, persistence, a previous model version, or another model. It is +#'computed as 'CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is +#'obtained based on a Random Walk test at the 95% confidence level (DelSole and +#'Tippett, 2016). #' #'@param exp A named numerical array of the forecast with at least time #' dimension. @@ -22,9 +21,12 @@ #' least time and member dimension. The dimensions must be the same as 'exp' #' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, #' it should not have dataset dimension. If there is corresponding reference -#' for each experiement, the dataset dimension must have the same length as in +#' for each experiment, the dataset dimension must have the same length as in #' 'exp'. If 'ref' is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. +#' forecast. To build the climatological forecast, the observed values along +#' the whole time period are used as different members for all time steps. The +#' parameter 'clim.cross.val' controls whether to build it using +#' cross-validation. The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension @@ -36,6 +38,10 @@ #'@param Fair A logical indicating whether to compute the FairCRPSS (the #' potential CRPSS that the forecast would have with an infinite ensemble #' size). The default value is FALSE. +#'@param clim.cross.val A logical indicating whether to build the climatological +#' forecast in cross-validation (i.e. excluding the observed value of the time +#' step when building the probabilistic distribution function for that +#' particular time step). Only used if 'ref' is NULL. The default value is TRUE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -67,7 +73,7 @@ #'@importFrom ClimProjDiags Subset #'@export CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - Fair = FALSE, ncores = NULL) { + Fair = FALSE, clim.cross.val = TRUE, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -159,9 +165,13 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } } ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { + if (!is.logical(Fair) | is.na(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## clim.cross.val + if (!is.logical(clim.cross.val) | is.na(clim.cross.val) | length(clim.cross.val) != 1) { + stop("Parameter 'clim.cross.val' must be either TRUE or FALSE.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -193,13 +203,14 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, Fair = Fair, + clim.cross.val = clim.cross.val, ncores = ncores) return(output) } .CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - Fair = FALSE) { + Fair = FALSE, clim.cross.val = TRUE) { # exp: [sdate, memb, (dat)] # obs: [sdate, (dat)] @@ -228,12 +239,13 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', obs_time_len <- dim(obs)[time_dim] if (is.null(dat_dim)) { - ## Without cross-validation: - ## ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - ## With cross-validation (excluding the value of that year to create ref for that year): - ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) - for (i in 1:obs_time_len) { - ref[i, ] <- obs[-i] + if (isFALSE(clim.cross.val)) { ## Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } } names(dim(ref)) <- c(time_dim, memb_dim) @@ -247,12 +259,13 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', names(dim(crps_ref)) <- c(time_dim, 'nobs') for (i_obs in 1:nobs) { - ## Without cross-validation: - ## ref <- array(data = rep(obs[, i_obs], each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - ## With cross-validation (excluding the value of that year to create ref for that year): - ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) - for (i in 1:obs_time_len) { - ref[i, ] <- obs[-i, i_obs] + if (isFALSE(clim.cross.val)) { ## Without cross-validation + ref <- array(data = rep(obs[, i_obs], each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i, i_obs] + } } names(dim(ref)) <- c(time_dim, memb_dim) diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd index 31bf501ec19771f8a86703bfa83fc567daaf4b0f..3fa66b0a7f17f70594f255faebbad42c2b592664 100644 --- a/man/CRPSS.Rd +++ b/man/CRPSS.Rd @@ -12,6 +12,7 @@ CRPSS( memb_dim = "member", dat_dim = NULL, Fair = FALSE, + clim.cross.val = TRUE, ncores = NULL ) } @@ -27,9 +28,12 @@ and 'dat_dim'.} least time and member dimension. The dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should not have dataset dimension. If there is corresponding reference -for each experiement, the dataset dimension must have the same length as in +for each experiment, the dataset dimension must have the same length as in 'exp'. If 'ref' is NULL, the climatological forecast is used as reference -forecast. The default value is NULL.} +forecast. To build the climatological forecast, the observed values along +the whole time period are used as different members for all time steps. The +parameter 'clim.cross.val' controls whether to build it using +cross-validation. The default value is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} @@ -46,6 +50,11 @@ The default value is NULL.} potential CRPSS that the forecast would have with an infinite ensemble size). The default value is FALSE.} +\item{clim.cross.val}{A logical indicating whether to build the climatological +forecast in cross-validation (i.e. excluding the observed value of the time +step when building the probabilistic distribution function for that +particular time step). Only used if 'ref' is NULL. The default value is TRUE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -70,11 +79,10 @@ worsening with respect to a reference forecast. The CRPSS ranges between minus infinite and 1. If the CRPSS is positive, it indicates that the forecast has higher skill than the reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts are the climatological -forecast (same probabilities for all categories for all time steps), -persistence, a previous model version, or another model. It is computed as -CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained -based on a Random Walk test at the 95% confidence level (DelSole and Tippett, -2016). +forecast, persistence, a previous model version, or another model. It is +computed as 'CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is +obtained based on a Random Walk test at the 95% confidence level (DelSole and +Tippett, 2016). } \examples{ exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index 5311724ea1c5e9414a895b4a9b2f39de08288a97..06b1bdb187289e7d025c695883b952719e15d0b5 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -101,6 +101,11 @@ test_that("1. Input checks", { CRPSS(exp1, obs1, Fair = 1), "Parameter 'Fair' must be either TRUE or FALSE." ) + # clim.cross.val + expect_error( + CRPSS(exp1, obs1, clim.cross.val = NA), + "Parameter 'clim.cross.val' must be either TRUE or FALSE." + ) # ncores expect_error( CRPSS(exp2, obs2, ncores = 1.5), @@ -176,6 +181,12 @@ test_that("2. Output checks: dat1", { c(0.3491793, 0.3379610), tolerance = 0.0001 ) + # clim.cross.val + expect_equal( + as.vector(CRPSS(exp1, obs1, ref = NULL, clim.cross.val = F)$crpss), + c(-0.1582765, -0.2390707), + tolerance = 0.0001 + ) })