From 2b83624290d1479b375833f7dade2d573f323e74 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 28 Jul 2022 11:31:50 +0200 Subject: [PATCH 01/15] corrected two typos in the documentation --- R/RPS.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 8ded53e..b0f4c97 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -25,7 +25,7 @@ #'@param indices_for_clim A vector of the indices to be taken along 'time_dim' #' for computing the thresholds between the probabilistic categories. If NULL, #' the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the +#'@param Fair A logical indicating whether to compute the FairRPS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. #'@param weights A named two-dimensional numerical array of the weights for each @@ -88,7 +88,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) + "all dimensions except 'memb_dim'.")) } ## prob_thresholds if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | -- GitLab From ebfba76086cff7b42b44ac40e8b43def3acfc975 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 28 Jul 2022 11:32:57 +0200 Subject: [PATCH 02/15] first version --- R/CRPS.R | 116 +++++++++++++++++++++++++++++++++++++ R/CRPSS.R | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 R/CRPS.R create mode 100644 R/CRPSS.R diff --git a/R/CRPS.R b/R/CRPS.R new file mode 100644 index 0000000..c3205ab --- /dev/null +++ b/R/CRPS.R @@ -0,0 +1,116 @@ +#'Compute the Continuous Ranked Probability Score +#' +#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill metric +#'to evaluate the full distribution of probabilistic forecasts. It has a negative +#'orientation (i.e., the higher-quality forecast the smaller CRPS) and it rewards +#'the forecast that has probability concentration around the observed value. In case +#'of a deterministic forecast, the CRPS is reduced to the mean absolute error. It has +#'the same units as the data. The function is based on enscrps_cpp from SpecsVerification. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param Fair A logical indicating whether to compute the FairCRPS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of CRPS with the same dimensions as "exp" except the +#''time_dim' and 'memb_dim' dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- CRPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom SpecsVerification enscrps_cpp +#'@export +CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + Fair = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + stop('Not implemented for observations with members.') + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim'.")) + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + crps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim), + output_dims = time_dim, + fun = .CRPS, Fair = Fair, + ncores = ncores)$output1 + + # Return only the mean RPS + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + + return(crps) +} + +.CRPS <- function(exp, obs, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate] + + if (Fair) { # FairCRPS + R_new <- Inf + } else {R_new <- NA} + + crps <- SpecsVerification::enscrps_cpp(ens = exp, obs = obs, R_new = R_new) + + return(crps) +} diff --git a/R/CRPSS.R b/R/CRPSS.R new file mode 100644 index 0000000..221236e --- /dev/null +++ b/R/CRPSS.R @@ -0,0 +1,169 @@ +#'Compute the Continuous Ranked Probability Skill Score +#' +#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score +#'based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The CRPSS ranges between minus infinite and 1. If the +#'CRPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill. +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. +#'The statistical significance is obtained based on a Random Walk test at the +#'95% confidence level (DelSole and Tippett, 2016). +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param Fair A logical indicating whether to compute the FairCRPSS (the +#' potential CRPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$crpss}{ +#' A numerical array of the CRPSS with the same dimensions as "exp" except the +#' 'time_dim' and 'memb_dim' dimensions. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the CRPSS with the same +#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#' +#'@import multiApply +#'@export +CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + Fair = FALSE, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + stop('Not implemented for observations with members.') + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute CRPSS + if (!is.null(ref)) { # use "ref" as reference forecast + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim, + ref = c(time_dim, memb_dim)) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim) + } + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + Fair = Fair, + ncores = ncores) + + return(output) +} + +.CRPSS <- function(exp, obs, ref = NULL, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate] + # ref: [sdate, memb] or NULL + + # CRPS of the forecast + crps_exp <- .CRPS(exp = exp, obs = obs, Fair = Fair) + + # CRPS of the reference forecast + if (is.null(ref)){ + ## using climatology as reference forecast + ## all the time steps are used as if they were members + ## then, ref dimensions are [sdate, memb], both with length(sdate) + ref <- array(data = obs, dim = c(member = length(obs))) + ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') + } + crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) + + # CRPSS + crpss <- 1 - mean(crps_exp) / mean(crps_ref) + + # Significance + sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif + + return(list(crpss = crpss, sign = sign)) +} -- GitLab From 92cbed12abe64ce92e86ca8cf2527730f59bebd6 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Torres Date: Thu, 28 Jul 2022 11:45:50 +0200 Subject: [PATCH 03/15] corrected typo in the check for 'exp' and 'obs' dimensions --- tests/testthat/test-RPS.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index 04df1bb..2029fd9 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -56,7 +56,7 @@ test_that("1. Input checks", { # exp, ref, and obs (2) expect_error( RPS(exp1, array(1:9, dim = c(sdate = 9))), - "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'memb_dim'." ) # prob_thresholds expect_error( -- GitLab From 780e068977cd7473127d7c3d4ba44c2338bbd784 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 29 Jul 2022 10:18:59 +0200 Subject: [PATCH 04/15] allow memb_dim=1 for obs --- R/CRPS.R | 9 ++++++--- R/CRPSS.R | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/R/CRPS.R b/R/CRPS.R index c3205ab..942ec9e 100644 --- a/R/CRPS.R +++ b/R/CRPS.R @@ -36,6 +36,7 @@ #' #'@import multiApply #'@importFrom SpecsVerification enscrps_cpp +#'@importFrom ClimProjDiags Subset #'@export CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', Fair = FALSE, ncores = NULL) { @@ -64,12 +65,14 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - stop('Not implemented for observations with members.') - } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", diff --git a/R/CRPSS.R b/R/CRPSS.R index 221236e..9f5edbd 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -53,6 +53,7 @@ #'res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', Fair = FALSE, ncores = NULL) { @@ -87,12 +88,14 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', stop("Parameter 'memb_dim' is not found in 'ref' dimension.") } ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - stop('Not implemented for observations with members.') - } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", -- GitLab From 67af5d0dbbdb86e64b87960b81eabc2ccc8fe49a Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 10 Aug 2022 17:36:03 +0200 Subject: [PATCH 05/15] Allow dataset dimension to be NULL in CRPS and also added test file --- NAMESPACE | 3 + R/CRPS.R | 99 +++++++++++++++++++------- man/CRPS.Rd | 63 +++++++++++++++++ tests/testthat/test-CRPS.R | 138 +++++++++++++++++++++++++++++++++++++ 4 files changed, 278 insertions(+), 25 deletions(-) create mode 100644 man/CRPS.Rd create mode 100644 tests/testthat/test-CRPS.R diff --git a/NAMESPACE b/NAMESPACE index 032ebf9..a5229e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,8 @@ export(Ano) export(Ano_CrossValid) export(BrierScore) export(CDORemap) +export(CRPS) +export(CRPSS) export(Clim) export(Cluster) export(ColorBar) @@ -92,6 +94,7 @@ import(stats) importFrom(ClimProjDiags,CombineIndices) importFrom(ClimProjDiags,Subset) importFrom(ClimProjDiags,WeightedMean) +importFrom(SpecsVerification,enscrps_cpp) importFrom(abind,abind) importFrom(abind,adrop) importFrom(easyNCDF,ArrayToNc) diff --git a/R/CRPS.R b/R/CRPS.R index 942ec9e..0635b66 100644 --- a/R/CRPS.R +++ b/R/CRPS.R @@ -1,12 +1,14 @@ +#2345678901234567890123456789012345678901234567890123456789012345678901234567890 #'Compute the Continuous Ranked Probability Score #' #'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous -#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill metric -#'to evaluate the full distribution of probabilistic forecasts. It has a negative -#'orientation (i.e., the higher-quality forecast the smaller CRPS) and it rewards -#'the forecast that has probability concentration around the observed value. In case -#'of a deterministic forecast, the CRPS is reduced to the mean absolute error. It has -#'the same units as the data. The function is based on enscrps_cpp from SpecsVerification. +#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill +#'metric to evaluate the full distribution of probabilistic forecasts. It has a +#'negative orientation (i.e., the higher-quality forecast the smaller CRPS) and +#'it rewards the forecast that has probability concentration around the observed +#'value. In case of a deterministic forecast, the CRPS is reduced to the mean +#'absolute error. It has the same units as the data. The function is based on +#'enscrps_cpp from SpecsVerification. #' #'@param exp A named numerical array of the forecast with at least time #' dimension. @@ -16,6 +18,9 @@ #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast. The default value is 'member'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between ’exp’ and ’obs’. +#' The default value is NULL. #'@param Fair A logical indicating whether to compute the FairCRPS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. @@ -38,22 +43,21 @@ #'@importFrom SpecsVerification enscrps_cpp #'@importFrom ClimProjDiags Subset #'@export -CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', +CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, ncores = NULL) { - # Check inputs ## exp and obs (1) if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') + stop("Parameter 'exp' must be a numeric array.") if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') + stop("Parameter 'obs' must be a numeric array.") if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') + stop("Parameter 'time_dim' must be a character string.") if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } @@ -64,19 +68,35 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } ## exp and obs (2) if (memb_dim %in% names(dim(obs))) { if (identical(as.numeric(dim(obs)[memb_dim]),1)){ obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).") + } } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim'.")) + "all dimensions except 'memb_dim' and 'dat_dim'.")) } ## Fair if (!is.logical(Fair) | length(Fair) > 1) { @@ -93,10 +113,11 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', ############################### crps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim), - output_dims = time_dim, - fun = .CRPS, Fair = Fair, + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim)), + fun = .CRPS, + time_dim = time_dim, dat_dim = dat_dim, + Fair = Fair, ncores = ncores)$output1 # Return only the mean RPS @@ -105,15 +126,43 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', return(crps) } -.CRPS <- function(exp, obs, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate] - +.CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE) { + + # exp: [sdate, memb, (dat_dim)] + # obs: [sdate, (dat_dim)] + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + if (Fair) { # FairCRPS R_new <- Inf } else {R_new <- NA} - - crps <- SpecsVerification::enscrps_cpp(ens = exp, obs = obs, R_new = R_new) - - return(crps) + + CRPS <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[ , , i] + obs_data <- obs[ , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) + + crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) + CRPS[,i,j] <- crps + } + } + + if (is.null(dat_dim) | (nexp == 1 & nobs == 1)) { + dim(CRPS) <- c(dim(CRPS)[time_dim]) + } + + return(CRPS) } diff --git a/man/CRPS.Rd b/man/CRPS.Rd new file mode 100644 index 0000000..d25c088 --- /dev/null +++ b/man/CRPS.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CRPS.R +\name{CRPS} +\alias{CRPS} +\title{Compute the Continuous Ranked Probability Score} +\usage{ +CRPS( + exp, + obs, + time_dim = "sdate", + memb_dim = "member", + dat_dim = NULL, + Fair = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast. The default value is 'member'.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between ’exp’ and ’obs’. +The default value is NULL.} + +\item{Fair}{A logical indicating whether to compute the FairCRPS (the +potential RPSS that the forecast would have with an infinite ensemble size). +The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numerical array of CRPS with the same dimensions as "exp" except the +'time_dim' and 'memb_dim' dimensions. +} +\description{ +The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill +metric to evaluate the full distribution of probabilistic forecasts. It has a +negative orientation (i.e., the higher-quality forecast the smaller CRPS) and +it rewards the forecast that has probability concentration around the observed +value. In case of a deterministic forecast, the CRPS is reduced to the mean +absolute error. It has the same units as the data. The function is based on +enscrps_cpp from SpecsVerification. +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +res <- CRPS(exp = exp, obs = obs) + +} +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +} diff --git a/tests/testthat/test-CRPS.R b/tests/testthat/test-CRPS.R new file mode 100644 index 0000000..d27b5b5 --- /dev/null +++ b/tests/testthat/test-CRPS.R @@ -0,0 +1,138 @@ +context("s2dv::RPS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + +# dat3 +set.seed(1) +exp3 <- array(rnorm(40), dim = c(member = 2, sdate = 10, dataset = 2)) +set.seed(2) +obs3 <- array(rnorm(30), dim = c(member = 1, sdate = 10, dataset = 3)) + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + CRPS(c()), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + CRPS(exp1, c()), + "Parameter 'obs' must be a numeric array." + ) + + # time_dim + expect_error( + CRPS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + CRPS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + CRPS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + CRPS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + ## exp and obs (2) + expect_error(CRPS(exp1, array(1:40, dim = c(sdate = 10, lat = 2, member = 2))), + "Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).", fixed = TRUE + ) + expect_error( + CRPS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'memb_dim' and 'dat_dim'." + ) + # Fair + expect_error( + CRPS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # ncores + expect_error( + CRPS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(CRPS(exp1, obs1)), + c(lat = 2) + ) + expect_equal( + as.vector(CRPS(exp1, obs1)), + c(0.5947612, 0.7511546), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPS(exp1, obs1, Fair = TRUE)), + c(0.4215127, 0.5891242), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPS(exp1, obs1, Fair = FALSE)), + c(0.5947612, 0.7511546), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(CRPS(exp2, obs2)), + NULL + ) + expect_equal( + CRPS(exp2, obs2), + 0.9350226, + tolerance = 0.0001 + ) + expect_equal( + CRPS(exp2, obs2, Fair = TRUE), + CRPS(array(exp2, dim = c(dim(exp2), dataset = 1)), array(obs2, dim = c(dim(obs2), dataset = 1)), dat_dim = 'dataset', Fair = TRUE), + tolerance = 0.0001 + ) +}) + + + +############################################## +test_that("3. Output checks: dat3", { + + expect_equal( + dim(CRPS(exp3, obs3, dat_dim = 'dataset')), + c(nexp = 2, nobs = 3) + ) + expect_equal( + as.vector(CRPS(exp3, obs3, dat_dim = 'dataset', Fair = FALSE)), + c(0.9350226, 0.8354833, 1.0047853, 0.9681745, 1.2192761, 1.0171790), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPS(exp3, obs3, dat_dim = 'dataset', Fair = TRUE)), + c(0.6701312, 0.6198684, 0.7398939, 0.7525596, 0.9543847, 0.8015641), + tolerance = 0.0001 + ) + +}) -- GitLab From 6db566bd51b78e9f316ba6f4f8789e4388dfb6a3 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 11 Aug 2022 11:21:52 +0200 Subject: [PATCH 06/15] Improvement of format and documentation; Fix minor bugs --- R/CRPS.R | 47 ++++++++++++++++++++------------------ tests/testthat/test-CRPS.R | 6 ++--- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/R/CRPS.R b/R/CRPS.R index 0635b66..cc1f5de 100644 --- a/R/CRPS.R +++ b/R/CRPS.R @@ -1,4 +1,3 @@ -#2345678901234567890123456789012345678901234567890123456789012345678901234567890 #'Compute the Continuous Ranked Probability Score #' #'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous @@ -8,28 +7,32 @@ #'it rewards the forecast that has probability concentration around the observed #'value. In case of a deterministic forecast, the CRPS is reduced to the mean #'absolute error. It has the same units as the data. The function is based on -#'enscrps_cpp from SpecsVerification. +#'enscrps_cpp from SpecsVerification. If there is more than one dataset, CRPS +#'will be computed for each pair of exp and obs data. #' #'@param exp A named numerical array of the forecast with at least time #' dimension. #'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast. The default value is 'member'. #'@param dat_dim A character string indicating the name of dataset dimension. -#' The length of this dimension can be different between ’exp’ and ’obs’. -#' The default value is NULL. +#' The length of this dimension can be different between 'exp' and 'obs'. The +#' default value is NULL. #'@param Fair A logical indicating whether to compute the FairCRPS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). +#' potential CRPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' -#'@return -#'A numerical array of CRPS with the same dimensions as "exp" except the -#''time_dim' and 'memb_dim' dimensions. +#'@return +#'A numerical array of CRPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. #' #'@references #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 @@ -80,7 +83,7 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU } ## exp and obs (2) if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') } else { stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).") @@ -104,33 +107,34 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be either NULL or a positive integer.") } } ############################### + # Compute CRPS crps <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c(time_dim, memb_dim, dat_dim), obs = c(time_dim, dat_dim)), fun = .CRPS, - time_dim = time_dim, dat_dim = dat_dim, + time_dim = time_dim, memb_dim = 'member', dat_dim = dat_dim, Fair = Fair, ncores = ncores)$output1 - # Return only the mean RPS + # Return only the mean CRPS crps <- MeanDims(crps, time_dim, na.rm = FALSE) return(crps) } .CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - Fair = FALSE) { - + Fair = FALSE) { # exp: [sdate, memb, (dat_dim)] # obs: [sdate, (dat_dim)] + + # Adjust dimensions if needed if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -141,26 +145,25 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU nobs <- as.numeric(dim(obs)[dat_dim]) } - if (Fair) { # FairCRPS - R_new <- Inf - } else {R_new <- NA} + # for FairCRPS + R_new <- ifelse(Fair, Inf, NA) CRPS <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { exp_data <- exp[ , , i] - obs_data <- obs[ , j] + obs_data <- obs[, j] if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) - CRPS[,i,j] <- crps + CRPS[, i, j] <- crps } } - if (is.null(dat_dim) | (nexp == 1 & nobs == 1)) { + if (is.null(dat_dim)) { dim(CRPS) <- c(dim(CRPS)[time_dim]) } diff --git a/tests/testthat/test-CRPS.R b/tests/testthat/test-CRPS.R index d27b5b5..972eb45 100644 --- a/tests/testthat/test-CRPS.R +++ b/tests/testthat/test-CRPS.R @@ -1,4 +1,4 @@ -context("s2dv::RPS tests") +context("s2dv::CRPS tests") ############################################## @@ -109,8 +109,8 @@ test_that("3. Output checks: dat2", { tolerance = 0.0001 ) expect_equal( - CRPS(exp2, obs2, Fair = TRUE), - CRPS(array(exp2, dim = c(dim(exp2), dataset = 1)), array(obs2, dim = c(dim(obs2), dataset = 1)), dat_dim = 'dataset', Fair = TRUE), + as.vector(CRPS(exp2, obs2, Fair = TRUE)), + as.vector(CRPS(array(exp2, dim = c(dim(exp2), dataset = 1)), array(obs2, dim = c(dim(obs2), dataset = 1)), dat_dim = 'dataset', Fair = TRUE)), tolerance = 0.0001 ) }) -- GitLab From 277a8e266a086ea7b104bd50092ae33d4e7e598e Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 11 Aug 2022 11:22:17 +0200 Subject: [PATCH 07/15] Correct typo and update document --- R/RPS.R | 2 +- man/CRPS.Rd | 18 +++++++----- man/CRPSS.Rd | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ man/RPS.Rd | 4 +-- 4 files changed, 91 insertions(+), 10 deletions(-) create mode 100644 man/CRPSS.Rd diff --git a/R/RPS.R b/R/RPS.R index b0f4c97..c02cf49 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -26,7 +26,7 @@ #' for computing the thresholds between the probabilistic categories. If NULL, #' the whole period is used. The default value is NULL. #'@param Fair A logical indicating whether to compute the FairRPS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). +#' potential RPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. #'@param weights A named two-dimensional numerical array of the weights for each #' member and time. The dimension names should include 'memb_dim' and diff --git a/man/CRPS.Rd b/man/CRPS.Rd index d25c088..453c199 100644 --- a/man/CRPS.Rd +++ b/man/CRPS.Rd @@ -19,7 +19,8 @@ CRPS( dimension.} \item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} +dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +'dat_dim'.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} @@ -28,19 +29,21 @@ The default value is 'sdate'.} to compute the probabilities of the forecast. The default value is 'member'.} \item{dat_dim}{A character string indicating the name of dataset dimension. -The length of this dimension can be different between ’exp’ and ’obs’. -The default value is NULL.} +The length of this dimension can be different between 'exp' and 'obs'. The +default value is NULL.} \item{Fair}{A logical indicating whether to compute the FairCRPS (the -potential RPSS that the forecast would have with an infinite ensemble size). +potential CRPS that the forecast would have with an infinite ensemble size). The default value is FALSE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A numerical array of CRPS with the same dimensions as "exp" except the -'time_dim' and 'memb_dim' dimensions. +A numerical array of CRPS with dimensions c(nexp, nobs, the rest dimensions of +'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +experiment (i.e., dat_dim in exp), and nobs is the number of observation +(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. } \description{ The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous @@ -50,7 +53,8 @@ negative orientation (i.e., the higher-quality forecast the smaller CRPS) and it rewards the forecast that has probability concentration around the observed value. In case of a deterministic forecast, the CRPS is reduced to the mean absolute error. It has the same units as the data. The function is based on -enscrps_cpp from SpecsVerification. +enscrps_cpp from SpecsVerification. If there is more than one dataset, CRPS +will be computed for each pair of exp and obs data. } \examples{ exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd new file mode 100644 index 0000000..85705f3 --- /dev/null +++ b/man/CRPSS.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CRPSS.R +\name{CRPSS} +\alias{CRPSS} +\title{Compute the Continuous Ranked Probability Skill Score} +\usage{ +CRPSS( + exp, + obs, + ref = NULL, + time_dim = "sdate", + memb_dim = "member", + Fair = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as 'exp' except +'memb_dim'. If it is NULL, the climatological forecast is used as reference +forecast. The default value is NULL.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast and the reference forecast. The +default value is 'member'.} + +\item{Fair}{A logical indicating whether to compute the FairCRPSS (the +potential CRPSS that the forecast would have with an infinite ensemble size). +The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +\item{$crpss}{ + A numerical array of the CRPSS with the same dimensions as "exp" except the + 'time_dim' and 'memb_dim' dimensions. +} +\item{$sign}{ + A logical array of the statistical significance of the CRPSS with the same + dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +} +} +\description{ +The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score +based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to +assess whether a forecast presents an improvement or worsening with respect to +a reference forecast. The CRPSS ranges between minus infinite and 1. If the +CRPSS is positive, it indicates that the forecast has higher skill than the +reference forecast, while a negative value means that it has a lower skill. +Examples of reference forecasts are the climatological forecast (same +probabilities for all categories for all time steps), persistence, a previous +model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. +The statistical significance is obtained based on a Random Walk test at the +95% confidence level (DelSole and Tippett, 2016). +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast +res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast + +} +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +} diff --git a/man/RPS.Rd b/man/RPS.Rd index ee5c241..1d6eec2 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -37,8 +37,8 @@ corresponds to tercile equiprobable categories.} for computing the thresholds between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.} -\item{Fair}{A logical indicating whether to compute the FairRPSS (the -potential RPSS that the forecast would have with an infinite ensemble size). +\item{Fair}{A logical indicating whether to compute the FairRPS (the +potential RPS that the forecast would have with an infinite ensemble size). The default value is FALSE.} \item{weights}{A named two-dimensional numerical array of the weights for each -- GitLab From 6f32c7bb2cec0c70fd6752c00c6542588904b536 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 11 Aug 2022 12:48:24 +0200 Subject: [PATCH 08/15] Correct memb_dim = 'member' inside Apply of CRPS --- R/CRPS.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CRPS.R b/R/CRPS.R index cc1f5de..b2f38a5 100644 --- a/R/CRPS.R +++ b/R/CRPS.R @@ -119,7 +119,7 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU target_dims = list(exp = c(time_dim, memb_dim, dat_dim), obs = c(time_dim, dat_dim)), fun = .CRPS, - time_dim = time_dim, memb_dim = 'member', dat_dim = dat_dim, + time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, Fair = Fair, ncores = ncores)$output1 -- GitLab From 94392abcf4f7a2e88bc21ff249b2a2c5250b71e7 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 17 Aug 2022 09:57:31 +0200 Subject: [PATCH 09/15] Add dat_dim parameter in CRPSS and test file --- R/CRPSS.R | 222 ++++++++++++++++++++------ tests/testthat/test-CRPSS.R | 300 ++++++++++++++++++++++++++++++++++++ 2 files changed, 473 insertions(+), 49 deletions(-) create mode 100644 tests/testthat/test-CRPSS.R diff --git a/R/CRPSS.R b/R/CRPSS.R index 9f5edbd..80a709c 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -1,33 +1,40 @@ #'Compute the Continuous Ranked Probability Skill Score #' -#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score -#'based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to -#'assess whether a forecast presents an improvement or worsening with respect to -#'a reference forecast. The CRPSS ranges between minus infinite and 1. If the -#'CRPSS is positive, it indicates that the forecast has higher skill than the -#'reference forecast, while a negative value means that it has a lower skill. -#'Examples of reference forecasts are the climatological forecast (same -#'probabilities for all categories for all time steps), persistence, a previous -#'model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. -#'The statistical significance is obtained based on a Random Walk test at the -#'95% confidence level (DelSole and Tippett, 2016). +#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the +#'skill score based on the Continuous Ranked Probability Score (CRPS; Wilks, +#'2011). It can be used to assess whether a forecast presents an improvement or +#'worsening with respect to a reference forecast. The CRPSS ranges between minus +#'infinite and 1. If the CRPSS is positive, it indicates that the forecast has +#'higher skill than the reference forecast, while a negative value means that it +#'has a lower skill. Examples of reference forecasts are the climatological +#'forecast (same probabilities for all categories for all time steps), +#'persistence, a previous model version, and another model. It is computed as +#'CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained +#'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, +#'2016). #' #'@param exp A named numerical array of the forecast with at least time #' dimension. #'@param obs A named numerical array of the observation with at least time #' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. #'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. +#' least time and member dimension. The dimensions must be the same as 'exp' +#' except 'memb_dim' and 'dat_dim'. If the 'dat_dim' parameter is not NULL, the +#' corresponding dimension in 'ref' array must either be equal to the dataset +#' dimension of the 'exp' array or that 'ref' does not include the dimension of +#' 'dat_dim'. If 'ref' is NULL, the climatological forecast is used as +#' reference forecast. The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The #' default value is 'member'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. #'@param Fair A logical indicating whether to compute the FairCRPSS (the -#' potential CRPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. +#' potential CRPSS that the forecast would have with an infinite ensemble +#' size). The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -55,22 +62,32 @@ #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', +CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } if (!is.null(ref)) { if (!is.array(ref) | !is.numeric(ref)) - stop('Parameter "ref" must be a numeric array.') + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } } ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } @@ -87,27 +104,54 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { stop("Parameter 'memb_dim' is not found in 'ref' dimension.") } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } ## exp and obs (2) if (memb_dim %in% names(dim(obs))) { if (identical(as.numeric(dim(obs)[memb_dim]),1)){ obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } else {stop("Not implemented for observations with members ('obs' can have ", + "'memb_dim', but it should be of length=1).")} } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) + stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.")) } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has 'dat_dim' dimension it must be", + " equal to 'dat_dim' dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) + stop(paste0("Parameter 'exp' and 'ref' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) } } ## Fair @@ -126,47 +170,127 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', # Compute CRPSS if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim, - ref = c(time_dim, memb_dim)) + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = target_dims_ref) } else { data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim) + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim)) } output <- Apply(data, target_dims = target_dims, fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, Fair = Fair, ncores = ncores) return(output) } -.CRPSS <- function(exp, obs, ref = NULL, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate] - # ref: [sdate, memb] or NULL +.CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE) { + + # exp: [sdate, memb (dat_dim)] + # obs: [sdate, (dat_dim)] + # ref: [sdate, memb, (dat_dim)] or NULL + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } # CRPS of the forecast - crps_exp <- .CRPS(exp = exp, obs = obs, Fair = Fair) + # [sdate, (nexp), (nobs)] + crps_exp <- .CRPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, Fair = Fair) # CRPS of the reference forecast - if (is.null(ref)){ + if (is.null(ref)) { ## using climatology as reference forecast ## all the time steps are used as if they were members ## then, ref dimensions are [sdate, memb], both with length(sdate) - ref <- array(data = obs, dim = c(member = length(obs))) - ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') + if (is.null(dat_dim)) { + ref <- array(data = obs, dim = c(member = length(obs))) + ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') + crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) + } else { + ref <- array(dim = c(dim(obs)[1], member = unname(dim(obs)[1]), dim(obs)[2])) + for (j in 1:nobs) { + ref_obs <- array(data = obs[ , j], dim = c(member = length(obs[ , j]))) + ref_obs <- InsertDim(data = ref_obs, posdim = 1, lendim = length(obs[ , j]), name = 'sdate') + ref[ , , j] <- ref_obs + } + + # for FairCRPS + R_new <- ifelse(Fair, Inf, NA) + + crps_ref <- array(dim = c(dim(exp)[time_dim], nobs = nobs)) + + for (j in 1:nobs) { + ref_data <- ref[ , , j] + obs_data <- obs[ , j] + + if (is.null(dim(ref_data))) dim(ref_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) + + crps <- SpecsVerification::enscrps_cpp(ens = ref_data, obs = obs_data, R_new = R_new) + crps_ref[ , j] <- crps + } + } + + } else { + if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) { + remove_dat_dim <- TRUE + ref <- InsertDim(data = ref, posdim = length(dim(ref)) + 1 , lendim = 1, name = dat_dim) + } + crps_ref <- .CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, Fair = Fair) + if (!is.null(dat_dim)) { + if (isTRUE(remove_dat_dim)) { + dim(crps_ref) <- dim(crps_ref)[-2] + } + } + } + + if (!is.null(dat_dim)) { + + crps_exp_mean <- MeanDims(crps_exp, time_dim, na.rm = FALSE) + crps_ref_mean <- MeanDims(crps_ref, time_dim, na.rm = FALSE) + crpss <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (length(dim(crps_ref_mean)) == 1) { + for (i in 1:nexp) { + for (j in 1:nobs) { + crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[j] + sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[j])$signif + } + } + } else { + for (i in 1:nexp) { + for (j in 1:nobs) { + crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[i, j] + sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[i, j])$signif + } + } + } + + } else { + crpss <- 1 - mean(crps_exp) / mean(crps_ref) + # Significance + sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif } - crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) - - # CRPSS - crpss <- 1 - mean(crps_exp) / mean(crps_ref) - - # Significance - sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif return(list(crpss = crpss, sign = sign)) } diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R new file mode 100644 index 0000000..7221e10 --- /dev/null +++ b/tests/testthat/test-CRPSS.R @@ -0,0 +1,300 @@ +context("s2dv::CRPSS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + +# dat2_2 +set.seed(1) +exp2_2 <- array(rnorm(20), dim = c(member = 2, sdate = 10, dataset = 1)) +set.seed(2) +obs2_2 <- array(rnorm(10), dim = c(sdate = 10, dataset = 1)) +set.seed(3) +ref2_2 <- array(rnorm(20), dim = c(member = 2, sdate = 10, dataset = 1)) + +# dat3 +set.seed(1) +exp3 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) +set.seed(2) +obs3 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2)) +set.seed(3) +ref3 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) + +# dat4 +set.seed(1) +exp4 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3, lat = 2)) +set.seed(2) +obs4 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2, lat = 2)) +set.seed(3) +ref4 <- array(rnorm(60), dim = c(member = 2, sdate = 10, lat = 2)) + +# dat5 +set.seed(1) +exp5 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) +set.seed(2) +obs5 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2)) + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + CRPSS(c()), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + CRPSS(exp1, c()), + "Parameter 'obs' must be a numeric array." + ) + + # time_dim + expect_error( + CRPSS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + CRPSS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + CRPSS(exp2, obs2, array(rnorm(20), dim = c(member = 2, time = 10))), + "Parameter 'time_dim' is not found in 'ref' dimension." + ) + # memb_dim + expect_error( + CRPSS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + CRPSS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + CRPSS(exp2, obs2, array(rnorm(20), dim = c(memb = 2, sdate = 10))), + "Parameter 'memb_dim' is not found in 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + CRPSS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'memb_dim' and 'dat_dim'." + ) + expect_error( + CRPSS(exp2_2, obs2_2, array(1:9, dim = c(sdate = 9, member = 2, dataset = 3)), dat_dim = 'dataset'), + "If parameter 'ref' has 'dat_dim' dimension it must be equal to 'dat_dim' dimension of 'exp'." + ) + expect_error( + CRPSS(exp1, obs1, ref2), + "Parameter 'exp' and 'ref' must have same length of all dimensions except 'memb_dim' and 'dat_dim'." + ) + expect_error( + CRPSS(exp3, array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)), ref3, dat_dim = 'dataset'), + "Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1)." + , fixed = TRUE + ) + # Fair + expect_error( + CRPSS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # ncores + expect_error( + CRPSS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + names(CRPSS(exp1, obs1)), + c("crpss", "sign") + ) + expect_equal( + names(CRPSS(exp1, obs1, ref1)), + c("crpss", "sign") + ) + expect_equal( + dim(CRPSS(exp1, obs1)$crpss), + c(lat = 2) + ) + expect_equal( + dim(CRPSS(exp1, obs1)$sign), + c(lat = 2) + ) + expect_equal( + dim(CRPSS(exp1, obs1, ref1)$crpss), + c(lat = 2) + ) + expect_equal( + dim(CRPSS(exp1, obs1, ref1)$sign), + c(lat = 2) + ) + # ref = NULL + expect_equal( + as.vector(CRPSS(exp1, obs1)$crpss), + c(-0.1582765, -0.2390707), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1)$sign), + c(FALSE, FALSE), + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, Fair = T)$crpss), + c(0.07650872, -0.09326681), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1)$crpss), + c(-0.1582765, -0.2390707), + tolerance = 0.0001 + ) + # ref = ref + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1)$crpss), + c(0.3491793, 0.3379610), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1)$sign), + c(FALSE, FALSE) + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1, Fair = T)$crpss), + c( 0.3901440, 0.3788467), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1)$crpss), + c( 0.3491793, 0.3379610), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + names(CRPSS(exp2, obs2)), + c("crpss", "sign") + ) + expect_equal( + names(CRPSS(exp2, obs2, ref2)), + c("crpss", "sign") + ) + expect_equal( + dim(CRPSS(exp2, obs2)$crpss), + NULL + ) + expect_equal( + dim(CRPSS(exp2, obs2)$sign), + NULL + ) + expect_equal( + dim(CRPSS(exp2, obs2, ref2)$crpss), + NULL + ) + expect_equal( + dim(CRPSS(exp2, obs2, ref2)$sign), + NULL + ) + # ref = NULL + expect_equal( + as.vector(CRPSS(exp2, obs2)$crpss), + c(-0.8209236), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2)$crpss), + as.vector(CRPSS(exp2_2, obs2_2, dat_dim = 'dataset')$crpss), + tolerance = 0.0001 + ) + + expect_equal( + as.vector(CRPSS(exp2, obs2)$sign), + TRUE, + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, Fair = T)$crpss), + c(-0.468189), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, Fair = T)$crpss), + as.vector(CRPSS(exp2_2, obs2_2, dat_dim = 'dataset', Fair = T)$crpss), + tolerance = 0.0001 + ) + # ref = ref + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2)$crpss), + -0.02315361, + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2)$crpss), + as.vector(CRPSS(exp2_2, obs2_2, ref2_2, dat_dim = 'dataset')$crpss), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2)$sign), + FALSE + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2, Fair = T)$crpss), + 0.030436, + tolerance = 0.0001 + ) + +}) +############################################## +test_that("4. Output checks: dat3", { + + expect_equal( + dim(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss), + c('nexp' = 3, 'nobs' = 2) + ) + expect_equal( + mean(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss), + c(-0.7390546), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$sign)[1:3], + c(FALSE, FALSE, FALSE), + ) + expect_equal( + mean(CRPSS(exp3, obs3, dat_dim = 'dataset', Fair = T)$crpss), + c(-0.5302703), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("5. Output checks: dat4, dat2 and dat5", { + + expect_equal( + dim(CRPSS(exp4, obs4, ref4, dat_dim = 'dataset')$crpss), + c('nexp' = 3, 'nobs' = 2, 'lat' = 2) + ) + expect_equal( + CRPSS(exp2, obs2)$crpss, + CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss[1] + ) + +}) -- GitLab From 3d7113cf4ae839c43ddf51e9f54b87351f048df6 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 17 Aug 2022 10:13:01 +0200 Subject: [PATCH 10/15] Update documentation --- man/CRPSS.Rd | 41 +++++++++++++++++++++++++---------------- man/s2dv-package.Rd | 10 +++++++++- man/sampleDepthData.Rd | 6 ++---- man/sampleMap.Rd | 6 ++---- man/sampleTimeSeries.Rd | 6 ++---- 5 files changed, 40 insertions(+), 29 deletions(-) diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd index 85705f3..5ab9726 100644 --- a/man/CRPSS.Rd +++ b/man/CRPSS.Rd @@ -10,6 +10,7 @@ CRPSS( ref = NULL, time_dim = "sdate", memb_dim = "member", + dat_dim = NULL, Fair = FALSE, ncores = NULL ) @@ -22,9 +23,12 @@ dimension.} dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} \item{ref}{A named numerical array of the reference forecast data with at -least time dimension. The dimensions must be the same as 'exp' except -'memb_dim'. If it is NULL, the climatological forecast is used as reference -forecast. The default value is NULL.} +least time and member dimension. The dimensions must be the same as 'exp' +except 'memb_dim' and 'dat_dim'. If the 'dat_dim' parameter is not NULL, the +corresponding dimension in 'ref' array must either be equal to the dataset +dimension of the 'exp' array or that 'ref' does not include the dimension of +'dat_dim'. If 'ref' is NULL, the climatological forecast is used as +reference forecast. The default value is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} @@ -33,9 +37,13 @@ The default value is 'sdate'.} to compute the probabilities of the forecast and the reference forecast. The default value is 'member'.} +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + \item{Fair}{A logical indicating whether to compute the FairCRPSS (the -potential CRPSS that the forecast would have with an infinite ensemble size). -The default value is FALSE.} +potential CRPSS that the forecast would have with an infinite ensemble +size). The default value is FALSE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} @@ -51,17 +59,18 @@ computation. The default value is NULL.} } } \description{ -The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score -based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to -assess whether a forecast presents an improvement or worsening with respect to -a reference forecast. The CRPSS ranges between minus infinite and 1. If the -CRPSS is positive, it indicates that the forecast has higher skill than the -reference forecast, while a negative value means that it has a lower skill. -Examples of reference forecasts are the climatological forecast (same -probabilities for all categories for all time steps), persistence, a previous -model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. -The statistical significance is obtained based on a Random Walk test at the -95% confidence level (DelSole and Tippett, 2016). +The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the +skill score based on the Continuous Ranked Probability Score (CRPS; Wilks, +2011). It can be used to assess whether a forecast presents an improvement or +worsening with respect to a reference forecast. The CRPSS ranges between minus +infinite and 1. If the CRPSS is positive, it indicates that the forecast has +higher skill than the reference forecast, while a negative value means that it +has a lower skill. Examples of reference forecasts are the climatological +forecast (same probabilities for all categories for all time steps), +persistence, a previous model version, and another model. It is computed as +CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained +based on a Random Walk test at the 95% confidence level (DelSole and Tippett, +2016). } \examples{ exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) diff --git a/man/s2dv-package.Rd b/man/s2dv-package.Rd index 3c98a95..ffb6783 100644 --- a/man/s2dv-package.Rd +++ b/man/s2dv-package.Rd @@ -6,7 +6,15 @@ \alias{s2dv-package} \title{s2dv: A Set of Common Tools for Seasonal to Decadal Verification} \description{ -The advanced version of package 's2dverification'. It is intended for 'seasonal to decadal' (s2d) climate forecast verification, but it can also be used in other kinds of forecasts or general climate analysis. This package is specially designed for the comparison between the experimental and observational datasets. The functionality of the included functions covers from data retrieval, data post-processing, skill scores against observation, to visualization. Compared to 's2dverification', 's2dv' is more compatible with the package 'startR', able to use multiple cores for computation and handle multi-dimensional arrays with a higher flexibility. +The advanced version of package 's2dverification'. It is + intended for 'seasonal to decadal' (s2d) climate forecast verification, but + it can also be used in other kinds of forecasts or general climate analysis. + This package is specially designed for the comparison between the experimental + and observational datasets. The functionality of the included functions covers + from data retrieval, data post-processing, skill scores against observation, + to visualization. Compared to 's2dverification', 's2dv' is more compatible + with the package 'startR', able to use multiple cores for computation and + handle multi-dimensional arrays with a higher flexibility. } \references{ \url{https://earth.bsc.es/gitlab/es/s2dv/} diff --git a/man/sampleDepthData.Rd b/man/sampleDepthData.Rd index 47e2f1b..77e4a7a 100644 --- a/man/sampleDepthData.Rd +++ b/man/sampleDepthData.Rd @@ -5,8 +5,7 @@ \alias{sampleDepthData} \title{Sample of Experimental Data for Forecast Verification In Function Of Latitudes And Depths} -\format{ -The data set provides with a variable named 'sampleDepthData'.\cr\cr +\format{The data set provides with a variable named 'sampleDepthData'.\cr\cr sampleDepthData$exp is an array that contains the experimental data and the dimension meanings and values are:\cr @@ -19,8 +18,7 @@ but in this sample is not defined (NULL).\cr\cr sampleDepthData$depths is an array with the 7 longitudes covered by the data.\cr\cr -sampleDepthData$lat is an array with the 21 latitudes covered by the data.\cr\cr -} +sampleDepthData$lat is an array with the 21 latitudes covered by the data.\cr\cr} \usage{ data(sampleDepthData) } diff --git a/man/sampleMap.Rd b/man/sampleMap.Rd index e4ec5a5..eaf8aa5 100644 --- a/man/sampleMap.Rd +++ b/man/sampleMap.Rd @@ -4,8 +4,7 @@ \name{sampleMap} \alias{sampleMap} \title{Sample Of Observational And Experimental Data For Forecast Verification In Function Of Longitudes And Latitudes} -\format{ -The data set provides with a variable named 'sampleMap'.\cr\cr +\format{The data set provides with a variable named 'sampleMap'.\cr\cr sampleMap$mod is an array that contains the experimental data and the dimension meanings and values are:\cr c(# of experimental datasets, # of members, # of starting dates, # of lead-times, # of latitudes, # of longitudes)\cr @@ -17,8 +16,7 @@ sampleMap$obs is an array that contains the observational data and the dimension sampleMap$lat is an array with the 2 latitudes covered by the data (see examples on Load() for details on why such low resolution).\cr\cr - sampleMap$lon is an array with the 3 longitudes covered by the data (see examples on Load() for details on why such low resolution). -} + sampleMap$lon is an array with the 3 longitudes covered by the data (see examples on Load() for details on why such low resolution).} \usage{ data(sampleMap) } diff --git a/man/sampleTimeSeries.Rd b/man/sampleTimeSeries.Rd index 7f058e2..05a8e79 100644 --- a/man/sampleTimeSeries.Rd +++ b/man/sampleTimeSeries.Rd @@ -4,8 +4,7 @@ \name{sampleTimeSeries} \alias{sampleTimeSeries} \title{Sample Of Observational And Experimental Data For Forecast Verification As Area Averages} -\format{ -The data set provides with a variable named 'sampleTimeSeries'.\cr\cr +\format{The data set provides with a variable named 'sampleTimeSeries'.\cr\cr sampleTimeSeries$mod is an array that contains the experimental data and the dimension meanings and values are:\cr c(# of experimental datasets, # of members, # of starting dates, # of lead-times)\cr @@ -17,8 +16,7 @@ sampleTimeSeries$obs is an array that contains the observational data and the di sampleTimeSeries$lat is an array with the 2 latitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution).\cr\cr -sampleTimeSeries$lon is an array with the 3 longitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution). -} +sampleTimeSeries$lon is an array with the 3 longitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution).} \usage{ data(sampleTimeSeries) } -- GitLab From 38db5ce733d9aaab50b8489194e7df247d6c8197 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 17 Aug 2022 10:22:38 +0200 Subject: [PATCH 11/15] Correct documentation and minor bugs --- R/CRPSS.R | 17 ++++++++++------- man/CRPSS.Rd | 10 +++++++--- tests/testthat/test-CRPSS.R | 4 ++-- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/R/CRPSS.R b/R/CRPSS.R index 80a709c..a4b6998 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -16,7 +16,8 @@ #'@param exp A named numerical array of the forecast with at least time #' dimension. #'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' +#' and 'dat_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time and member dimension. The dimensions must be the same as 'exp' #' except 'memb_dim' and 'dat_dim'. If the 'dat_dim' parameter is not NULL, the @@ -40,8 +41,11 @@ #' #'@return #'\item{$crpss}{ -#' A numerical array of the CRPSS with the same dimensions as "exp" except the -#' 'time_dim' and 'memb_dim' dimensions. +#' A numerical array of the CRPSS with dimensions c(nexp, nobs, the rest +#' dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is +#' the number of experiment (i.e., dat_dim in exp), and nobs is the number of +#' observation (i.e., dat_dim in obs). If 'dat_dim' is NULL, nexp and nobs are +#' omitted. #'} #'\item{$sign}{ #' A logical array of the statistical significance of the CRPSS with the same @@ -116,7 +120,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } ## exp and obs (2) if (memb_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') } else {stop("Not implemented for observations with members ('obs' can have ", "'memb_dim', but it should be of length=1).")} @@ -160,8 +164,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be either NULL or a positive integer.") } } @@ -196,7 +199,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } .CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - Fair = FALSE) { + Fair = FALSE) { # exp: [sdate, memb (dat_dim)] # obs: [sdate, (dat_dim)] diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd index 5ab9726..5f3a0d4 100644 --- a/man/CRPSS.Rd +++ b/man/CRPSS.Rd @@ -20,7 +20,8 @@ CRPSS( dimension.} \item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} +dimension. The dimensions must be the same as 'exp' except 'memb_dim' +and 'dat_dim'.} \item{ref}{A named numerical array of the reference forecast data with at least time and member dimension. The dimensions must be the same as 'exp' @@ -50,8 +51,11 @@ computation. The default value is NULL.} } \value{ \item{$crpss}{ - A numerical array of the CRPSS with the same dimensions as "exp" except the - 'time_dim' and 'memb_dim' dimensions. + A numerical array of the CRPSS with dimensions c(nexp, nobs, the rest + dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is + the number of experiment (i.e., dat_dim in exp), and nobs is the number of + observation (i.e., dat_dim in obs). If 'dat_dim' is NULL, nexp and nobs are + omitted. } \item{$sign}{ A logical array of the statistical significance of the CRPSS with the same diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index 7221e10..7e1b035 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -293,8 +293,8 @@ test_that("5. Output checks: dat4, dat2 and dat5", { c('nexp' = 3, 'nobs' = 2, 'lat' = 2) ) expect_equal( - CRPSS(exp2, obs2)$crpss, - CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss[1] + as.vector(CRPSS(exp2, obs2)$crpss), + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss[1]) ) }) -- GitLab From 4b2550b1d4119e3a3c93d84b144fcc9a70bda531 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 17 Aug 2022 10:35:58 +0200 Subject: [PATCH 12/15] added remove_dat_dim <- FALSE --- R/CRPSS.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/CRPSS.R b/R/CRPSS.R index a4b6998..4ee0c48 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -256,6 +256,8 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) { remove_dat_dim <- TRUE ref <- InsertDim(data = ref, posdim = length(dim(ref)) + 1 , lendim = 1, name = dat_dim) + } else { + remove_dat_dim <- FALSE } crps_ref <- .CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, Fair = Fair) -- GitLab From 09325779a1b338b007265bfa7dbdd7936a2f8371 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 17 Aug 2022 12:41:27 +0200 Subject: [PATCH 13/15] Correct output text in CRPSS and in test file --- R/CRPS.R | 4 ++-- R/CRPSS.R | 2 +- tests/testthat/test-CRPSS.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/CRPS.R b/R/CRPS.R index b2f38a5..7dedf4f 100644 --- a/R/CRPS.R +++ b/R/CRPS.R @@ -153,13 +153,13 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU for (i in 1:nexp) { for (j in 1:nobs) { exp_data <- exp[ , , i] - obs_data <- obs[, j] + obs_data <- obs[ , j] if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) - CRPS[, i, j] <- crps + CRPS[ , i, j] <- crps } } diff --git a/R/CRPSS.R b/R/CRPSS.R index 4ee0c48..2097b8e 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -155,7 +155,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { stop(paste0("Parameter 'exp' and 'ref' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + "all dimensions except 'memb_dim'. 'dat_dim' can be NULL in 'ref'.")) } } ## Fair diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index 7e1b035..c1a34fb 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -97,7 +97,7 @@ test_that("1. Input checks", { ) expect_error( CRPSS(exp1, obs1, ref2), - "Parameter 'exp' and 'ref' must have same length of all dimensions except 'memb_dim' and 'dat_dim'." + "Parameter 'exp' and 'ref' must have same length of all dimensions except 'memb_dim'. 'dat_dim' can be NULL in 'ref'" ) expect_error( CRPSS(exp3, array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)), ref3, dat_dim = 'dataset'), -- GitLab From cd12c15fc4a9a4808c92a41045ec2275599ba1f0 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 18 Aug 2022 10:48:12 +0200 Subject: [PATCH 14/15] Improve documentation and test file CRPSS --- R/CRPSS.R | 23 +++++++++++++---------- man/CRPSS.Rd | 10 +++++----- tests/testthat/test-CRPSS.R | 8 +------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/R/CRPSS.R b/R/CRPSS.R index 2097b8e..c77ba3d 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -20,11 +20,11 @@ #' and 'dat_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time and member dimension. The dimensions must be the same as 'exp' -#' except 'memb_dim' and 'dat_dim'. If the 'dat_dim' parameter is not NULL, the -#' corresponding dimension in 'ref' array must either be equal to the dataset -#' dimension of the 'exp' array or that 'ref' does not include the dimension of -#' 'dat_dim'. If 'ref' is NULL, the climatological forecast is used as -#' reference forecast. The default value is NULL. +#' except 'memb_dim' and 'dat_dim' which is NULL if there is only one +#' reference dataset. If there are multiple reference datasets 'dat_dim' +#' dimension must have the same length as in 'exp'. If 'ref' is NULL, the +#' climatological forecast is used as reference forecast. The default value +#' is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension @@ -122,8 +122,10 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (memb_dim %in% names(dim(obs))) { if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') - } else {stop("Not implemented for observations with members ('obs' can have ", - "'memb_dim', but it should be of length=1).")} + } else { + stop("Not implemented for observations with members ('obs' can have ", + "'memb_dim', but it should be of length=1).") + } } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) @@ -138,7 +140,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", - " except 'memb_dim' and 'dat_dim'.")) + " except 'memb_dim' and 'dat_dim'.")) } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) @@ -147,7 +149,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { stop(paste0("If parameter 'ref' has 'dat_dim' dimension it must be", - " equal to 'dat_dim' dimension of 'exp'.")) + " equal to 'dat_dim' dimension of 'exp'.")) } name_ref <- name_ref[-which(name_ref == dat_dim)] } @@ -155,7 +157,8 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { stop(paste0("Parameter 'exp' and 'ref' must have same length of ", - "all dimensions except 'memb_dim'. 'dat_dim' can be NULL in 'ref'.")) + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) } } ## Fair diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd index 5f3a0d4..903a7f5 100644 --- a/man/CRPSS.Rd +++ b/man/CRPSS.Rd @@ -25,11 +25,11 @@ and 'dat_dim'.} \item{ref}{A named numerical array of the reference forecast data with at least time and member dimension. The dimensions must be the same as 'exp' -except 'memb_dim' and 'dat_dim'. If the 'dat_dim' parameter is not NULL, the -corresponding dimension in 'ref' array must either be equal to the dataset -dimension of the 'exp' array or that 'ref' does not include the dimension of -'dat_dim'. If 'ref' is NULL, the climatological forecast is used as -reference forecast. The default value is NULL.} +except 'memb_dim' and 'dat_dim' which is NULL if there is only one +reference dataset. If there are multiple reference datasets 'dat_dim' +dimension must have the same length as in 'exp'. If 'ref' is NULL, the +climatological forecast is used as reference forecast. The default value +is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index c1a34fb..68b763a 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -42,12 +42,6 @@ obs4 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2, lat = 2)) set.seed(3) ref4 <- array(rnorm(60), dim = c(member = 2, sdate = 10, lat = 2)) -# dat5 -set.seed(1) -exp5 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) -set.seed(2) -obs5 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2)) - ############################################## test_that("1. Input checks", { # exp and obs (1) @@ -97,7 +91,7 @@ test_that("1. Input checks", { ) expect_error( CRPSS(exp1, obs1, ref2), - "Parameter 'exp' and 'ref' must have same length of all dimensions except 'memb_dim'. 'dat_dim' can be NULL in 'ref'" + "Parameter 'exp' and 'ref' must have same length of all dimensions except 'memb_dim' and 'dat_dim' if there is only one reference dataset." ) expect_error( CRPSS(exp3, array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)), ref3, dat_dim = 'dataset'), -- GitLab From d879b98b02a74dc70b9a96ef6c5afe3f234e26e3 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 19 Aug 2022 16:14:21 +0200 Subject: [PATCH 15/15] Refine code, add more unit tests --- R/CRPSS.R | 86 +++++++++++++++++-------------------- man/CRPSS.Rd | 16 +++---- man/s2dv-package.Rd | 10 +---- man/sampleDepthData.Rd | 6 ++- man/sampleMap.Rd | 6 ++- man/sampleTimeSeries.Rd | 6 ++- tests/testthat/test-CRPSS.R | 44 ++++++++++++++++--- 7 files changed, 98 insertions(+), 76 deletions(-) diff --git a/R/CRPSS.R b/R/CRPSS.R index c77ba3d..a6b4a14 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -8,7 +8,7 @@ #'higher skill than the reference forecast, while a negative value means that it #'has a lower skill. Examples of reference forecasts are the climatological #'forecast (same probabilities for all categories for all time steps), -#'persistence, a previous model version, and another model. It is computed as +#'persistence, a previous model version, or another model. It is computed as #'CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained #'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, #'2016). @@ -20,11 +20,11 @@ #' and 'dat_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time and member dimension. The dimensions must be the same as 'exp' -#' except 'memb_dim' and 'dat_dim' which is NULL if there is only one -#' reference dataset. If there are multiple reference datasets 'dat_dim' -#' dimension must have the same length as in 'exp'. If 'ref' is NULL, the -#' climatological forecast is used as reference forecast. The default value -#' is NULL. +#' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, +#' it should not have dataset dimension. If there is corresponding reference +#' for each experiement, the dataset dimension must have the same length as in +#' 'exp'. If 'ref' is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension @@ -48,8 +48,8 @@ #' omitted. #'} #'\item{$sign}{ -#' A logical array of the statistical significance of the CRPSS with the same -#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +#' A logical array of the statistical significance of the CRPSS with the same +#' dimensions as $crpss. #'} #' #'@references @@ -118,21 +118,18 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', " Set it as NULL if there is no dataset dimension.") } } - ## exp and obs (2) + ## exp, obs, and ref (2) if (memb_dim %in% names(dim(obs))) { if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') } else { stop("Not implemented for observations with members ('obs' can have ", - "'memb_dim', but it should be of length=1).") + "'memb_dim', but it should be of length = 1).") } } name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] - } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] @@ -148,8 +145,8 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has 'dat_dim' dimension it must be", - " equal to 'dat_dim' dimension of 'exp'.")) + stop(paste0("If parameter 'ref' has dataset dimension it must be", + " equal to dataset dimension of 'exp'.")) } name_ref <- name_ref[-which(name_ref == dat_dim)] } @@ -177,7 +174,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', # Compute CRPSS if (!is.null(ref)) { # use "ref" as reference forecast if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { - target_dims_ref <- c(time_dim, memb_dim, dat_dim) + target_dims_ref <- c(time_dim, memb_dim, dat_dim) } else { target_dims_ref <- c(time_dim, memb_dim) } @@ -204,9 +201,9 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE) { - # exp: [sdate, memb (dat_dim)] - # obs: [sdate, (dat_dim)] - # ref: [sdate, memb, (dat_dim)] or NULL + # exp: [sdate, memb, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, memb, (dat)] or NULL if (is.null(dat_dim)) { nexp <- 1 @@ -216,46 +213,39 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', nobs <- as.numeric(dim(obs)[dat_dim]) } - # CRPS of the forecast + #----- CRPS of the forecast # [sdate, (nexp), (nobs)] crps_exp <- .CRPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, Fair = Fair) - # CRPS of the reference forecast + #----- CRPS of the reference forecast if (is.null(ref)) { ## using climatology as reference forecast ## all the time steps are used as if they were members ## then, ref dimensions are [sdate, memb], both with length(sdate) - if (is.null(dat_dim)) { - ref <- array(data = obs, dim = c(member = length(obs))) - ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') - crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) - } else { - ref <- array(dim = c(dim(obs)[1], member = unname(dim(obs)[1]), dim(obs)[2])) - for (j in 1:nobs) { - ref_obs <- array(data = obs[ , j], dim = c(member = length(obs[ , j]))) - ref_obs <- InsertDim(data = ref_obs, posdim = 1, lendim = length(obs[ , j]), name = 'sdate') - ref[ , , j] <- ref_obs - } - - # for FairCRPS - R_new <- ifelse(Fair, Inf, NA) - - crps_ref <- array(dim = c(dim(exp)[time_dim], nobs = nobs)) - for (j in 1:nobs) { - ref_data <- ref[ , , j] - obs_data <- obs[ , j] - - if (is.null(dim(ref_data))) dim(ref_data) <- c(dim(exp)[1:2]) - if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) + obs_time_len <- dim(obs)[time_dim] + if (is.null(dat_dim)) { + ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb]; obs: [sdate] + crps_ref <- .CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, Fair = Fair) + # crps_ref should be [sdate] - crps <- SpecsVerification::enscrps_cpp(ens = ref_data, obs = obs_data, R_new = R_new) - crps_ref[ , j] <- crps + } else { + crps_ref <- array(dim = c(obs_time_len, nobs)) + names(dim(crps_ref)) <- c(time_dim, 'nobs') + for (i_obs in 1:nobs) { + ref <- array(data = rep(obs[, i_obs], each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + names(dim(ref)) <- c(time_dim, memb_dim) + crps_ref[, i_obs] <- .CRPS(exp = ref, obs = ClimProjDiags::Subset(obs, dat_dim, i_obs, drop = 'selected'), + time_dim = time_dim, memb_dim = memb_dim, dat_dim = NULL, Fair = Fair) } + # crps_ref should be [sdate, nobs] } - } else { + } else { # ref is not NULL if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) { remove_dat_dim <- TRUE ref <- InsertDim(data = ref, posdim = length(dim(ref)) + 1 , lendim = 1, name = dat_dim) @@ -264,6 +254,8 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } crps_ref <- .CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, Fair = Fair) + # crps_ref should be [sdate, (nexp), (nobs)] + if (!is.null(dat_dim)) { if (isTRUE(remove_dat_dim)) { dim(crps_ref) <- dim(crps_ref)[-2] @@ -271,7 +263,9 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } } + #----- CRPSS if (!is.null(dat_dim)) { + # If ref != NULL & ref has dat_dim, crps_ref = [sdate, nexp, nobs]; else, crps_ref = [sdate, nobs] crps_exp_mean <- MeanDims(crps_exp, time_dim, na.rm = FALSE) crps_ref_mean <- MeanDims(crps_ref, time_dim, na.rm = FALSE) diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd index 903a7f5..31bf501 100644 --- a/man/CRPSS.Rd +++ b/man/CRPSS.Rd @@ -25,11 +25,11 @@ and 'dat_dim'.} \item{ref}{A named numerical array of the reference forecast data with at least time and member dimension. The dimensions must be the same as 'exp' -except 'memb_dim' and 'dat_dim' which is NULL if there is only one -reference dataset. If there are multiple reference datasets 'dat_dim' -dimension must have the same length as in 'exp'. If 'ref' is NULL, the -climatological forecast is used as reference forecast. The default value -is NULL.} +except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, +it should not have dataset dimension. If there is corresponding reference +for each experiement, the dataset dimension must have the same length as in +'exp'. If 'ref' is NULL, the climatological forecast is used as reference +forecast. The default value is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} @@ -58,8 +58,8 @@ computation. The default value is NULL.} omitted. } \item{$sign}{ - A logical array of the statistical significance of the CRPSS with the same - dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. + A logical array of the statistical significance of the CRPSS with the same + dimensions as $crpss. } } \description{ @@ -71,7 +71,7 @@ infinite and 1. If the CRPSS is positive, it indicates that the forecast has higher skill than the reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts are the climatological forecast (same probabilities for all categories for all time steps), -persistence, a previous model version, and another model. It is computed as +persistence, a previous model version, or another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained based on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016). diff --git a/man/s2dv-package.Rd b/man/s2dv-package.Rd index ffb6783..3c98a95 100644 --- a/man/s2dv-package.Rd +++ b/man/s2dv-package.Rd @@ -6,15 +6,7 @@ \alias{s2dv-package} \title{s2dv: A Set of Common Tools for Seasonal to Decadal Verification} \description{ -The advanced version of package 's2dverification'. It is - intended for 'seasonal to decadal' (s2d) climate forecast verification, but - it can also be used in other kinds of forecasts or general climate analysis. - This package is specially designed for the comparison between the experimental - and observational datasets. The functionality of the included functions covers - from data retrieval, data post-processing, skill scores against observation, - to visualization. Compared to 's2dverification', 's2dv' is more compatible - with the package 'startR', able to use multiple cores for computation and - handle multi-dimensional arrays with a higher flexibility. +The advanced version of package 's2dverification'. It is intended for 'seasonal to decadal' (s2d) climate forecast verification, but it can also be used in other kinds of forecasts or general climate analysis. This package is specially designed for the comparison between the experimental and observational datasets. The functionality of the included functions covers from data retrieval, data post-processing, skill scores against observation, to visualization. Compared to 's2dverification', 's2dv' is more compatible with the package 'startR', able to use multiple cores for computation and handle multi-dimensional arrays with a higher flexibility. } \references{ \url{https://earth.bsc.es/gitlab/es/s2dv/} diff --git a/man/sampleDepthData.Rd b/man/sampleDepthData.Rd index 77e4a7a..47e2f1b 100644 --- a/man/sampleDepthData.Rd +++ b/man/sampleDepthData.Rd @@ -5,7 +5,8 @@ \alias{sampleDepthData} \title{Sample of Experimental Data for Forecast Verification In Function Of Latitudes And Depths} -\format{The data set provides with a variable named 'sampleDepthData'.\cr\cr +\format{ +The data set provides with a variable named 'sampleDepthData'.\cr\cr sampleDepthData$exp is an array that contains the experimental data and the dimension meanings and values are:\cr @@ -18,7 +19,8 @@ but in this sample is not defined (NULL).\cr\cr sampleDepthData$depths is an array with the 7 longitudes covered by the data.\cr\cr -sampleDepthData$lat is an array with the 21 latitudes covered by the data.\cr\cr} +sampleDepthData$lat is an array with the 21 latitudes covered by the data.\cr\cr +} \usage{ data(sampleDepthData) } diff --git a/man/sampleMap.Rd b/man/sampleMap.Rd index eaf8aa5..e4ec5a5 100644 --- a/man/sampleMap.Rd +++ b/man/sampleMap.Rd @@ -4,7 +4,8 @@ \name{sampleMap} \alias{sampleMap} \title{Sample Of Observational And Experimental Data For Forecast Verification In Function Of Longitudes And Latitudes} -\format{The data set provides with a variable named 'sampleMap'.\cr\cr +\format{ +The data set provides with a variable named 'sampleMap'.\cr\cr sampleMap$mod is an array that contains the experimental data and the dimension meanings and values are:\cr c(# of experimental datasets, # of members, # of starting dates, # of lead-times, # of latitudes, # of longitudes)\cr @@ -16,7 +17,8 @@ sampleMap$obs is an array that contains the observational data and the dimension sampleMap$lat is an array with the 2 latitudes covered by the data (see examples on Load() for details on why such low resolution).\cr\cr - sampleMap$lon is an array with the 3 longitudes covered by the data (see examples on Load() for details on why such low resolution).} + sampleMap$lon is an array with the 3 longitudes covered by the data (see examples on Load() for details on why such low resolution). +} \usage{ data(sampleMap) } diff --git a/man/sampleTimeSeries.Rd b/man/sampleTimeSeries.Rd index 05a8e79..7f058e2 100644 --- a/man/sampleTimeSeries.Rd +++ b/man/sampleTimeSeries.Rd @@ -4,7 +4,8 @@ \name{sampleTimeSeries} \alias{sampleTimeSeries} \title{Sample Of Observational And Experimental Data For Forecast Verification As Area Averages} -\format{The data set provides with a variable named 'sampleTimeSeries'.\cr\cr +\format{ +The data set provides with a variable named 'sampleTimeSeries'.\cr\cr sampleTimeSeries$mod is an array that contains the experimental data and the dimension meanings and values are:\cr c(# of experimental datasets, # of members, # of starting dates, # of lead-times)\cr @@ -16,7 +17,8 @@ sampleTimeSeries$obs is an array that contains the observational data and the di sampleTimeSeries$lat is an array with the 2 latitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution).\cr\cr -sampleTimeSeries$lon is an array with the 3 longitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution).} +sampleTimeSeries$lon is an array with the 3 longitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution). +} \usage{ data(sampleTimeSeries) } diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index 68b763a..6937edb 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -87,7 +87,7 @@ test_that("1. Input checks", { ) expect_error( CRPSS(exp2_2, obs2_2, array(1:9, dim = c(sdate = 9, member = 2, dataset = 3)), dat_dim = 'dataset'), - "If parameter 'ref' has 'dat_dim' dimension it must be equal to 'dat_dim' dimension of 'exp'." + "If parameter 'ref' has dataset dimension it must be equal to dataset dimension of 'exp'." ) expect_error( CRPSS(exp1, obs1, ref2), @@ -95,7 +95,7 @@ test_that("1. Input checks", { ) expect_error( CRPSS(exp3, array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)), ref3, dat_dim = 'dataset'), - "Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1)." + "Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1)." , fixed = TRUE ) # Fair @@ -269,26 +269,56 @@ test_that("4. Output checks: dat3", { tolerance = 0.0001 ) expect_equal( - as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$sign)[1:3], - c(FALSE, FALSE, FALSE), + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss), + c(-0.8209236, -0.6270744, -1.0829403, -0.6574485, -0.5970569, -0.6488837), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$sign), + rep(FALSE, 6), ) expect_equal( mean(CRPSS(exp3, obs3, dat_dim = 'dataset', Fair = T)$crpss), c(-0.5302703), tolerance = 0.0001 ) + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset', Fair = T)$crpss), + c(-0.4681890, -0.3580685, -1.0116628, -0.3730576, -0.3965618, -0.5740819), + tolerance = 0.0001 + ) + # ref = ref3 + expect_equal( + as.vector(CRPSS(exp3, obs3, ref = ref3, dat_dim = 'dataset')$crpss), + c(-0.02315361, -0.05914715, -0.24638960, -0.09337738, 0.14668803, -0.01454008), + tolerance = 0.0001 + ) + + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss[1]), + as.vector(CRPSS(exp2, obs2)$crpss) + ) + }) ############################################## -test_that("5. Output checks: dat4, dat2 and dat5", { +test_that("5. Output checks: dat4", { expect_equal( dim(CRPSS(exp4, obs4, ref4, dat_dim = 'dataset')$crpss), c('nexp' = 3, 'nobs' = 2, 'lat' = 2) ) expect_equal( - as.vector(CRPSS(exp2, obs2)$crpss), - as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss[1]) + as.vector(CRPSS(exp4, obs4, dat_dim = 'dataset', Fair = T)$crpss)[1:6], + c(-0.4681890, -0.3580685, -1.0116628, -0.3730576, -0.3965618, -0.5740819), + tolerance = 0.0001 ) + # ref = ref3 + expect_equal( + as.vector(CRPSS(exp4, obs4, ref = ref4, dat_dim = 'dataset')$crpss)[1:6], + c(-0.02315361, 0.08576776, -0.17037744, -0.09337738, -0.05353853, -0.08772739), + tolerance = 0.0001 + ) + }) -- GitLab