diff --git a/NAMESPACE b/NAMESPACE index 032ebf98b3d409665b45f43ceea18477573f25fd..a5229e3d20dd73f6cd5ca216897e937ad3507c79 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,8 @@ export(Ano) export(Ano_CrossValid) export(BrierScore) export(CDORemap) +export(CRPS) +export(CRPSS) export(Clim) export(Cluster) export(ColorBar) @@ -92,6 +94,7 @@ import(stats) importFrom(ClimProjDiags,CombineIndices) importFrom(ClimProjDiags,Subset) importFrom(ClimProjDiags,WeightedMean) +importFrom(SpecsVerification,enscrps_cpp) importFrom(abind,abind) importFrom(abind,adrop) importFrom(easyNCDF,ArrayToNc) diff --git a/R/CRPS.R b/R/CRPS.R new file mode 100644 index 0000000000000000000000000000000000000000..7dedf4fcb90a833b1034e7a1eb038e2588fe15ca --- /dev/null +++ b/R/CRPS.R @@ -0,0 +1,171 @@ +#'Compute the Continuous Ranked Probability Score +#' +#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill +#'metric to evaluate the full distribution of probabilistic forecasts. It has a +#'negative orientation (i.e., the higher-quality forecast the smaller CRPS) and +#'it rewards the forecast that has probability concentration around the observed +#'value. In case of a deterministic forecast, the CRPS is reduced to the mean +#'absolute error. It has the same units as the data. The function is based on +#'enscrps_cpp from SpecsVerification. If there is more than one dataset, CRPS +#'will be computed for each pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. The +#' default value is NULL. +#'@param Fair A logical indicating whether to compute the FairCRPS (the +#' potential CRPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of CRPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- CRPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom SpecsVerification enscrps_cpp +#'@importFrom ClimProjDiags Subset +#'@export +CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE, ncores = NULL) { + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).") + } + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute CRPS + crps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim)), + fun = .CRPS, + time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, + Fair = Fair, + ncores = ncores)$output1 + + # Return only the mean CRPS + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + + return(crps) +} + +.CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE) { + # exp: [sdate, memb, (dat_dim)] + # obs: [sdate, (dat_dim)] + + # Adjust dimensions if needed + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # for FairCRPS + R_new <- ifelse(Fair, Inf, NA) + + CRPS <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[ , , i] + obs_data <- obs[ , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) + + crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) + CRPS[ , i, j] <- crps + } + } + + if (is.null(dat_dim)) { + dim(CRPS) <- c(dim(CRPS)[time_dim]) + } + + return(CRPS) +} diff --git a/R/CRPSS.R b/R/CRPSS.R new file mode 100644 index 0000000000000000000000000000000000000000..a6b4a1405a80156149cf16cad5c955b9135428c2 --- /dev/null +++ b/R/CRPSS.R @@ -0,0 +1,298 @@ +#'Compute the Continuous Ranked Probability Skill Score +#' +#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the +#'skill score based on the Continuous Ranked Probability Score (CRPS; Wilks, +#'2011). It can be used to assess whether a forecast presents an improvement or +#'worsening with respect to a reference forecast. The CRPSS ranges between minus +#'infinite and 1. If the CRPSS is positive, it indicates that the forecast has +#'higher skill than the reference forecast, while a negative value means that it +#'has a lower skill. Examples of reference forecasts are the climatological +#'forecast (same probabilities for all categories for all time steps), +#'persistence, a previous model version, or another model. It is computed as +#'CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained +#'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, +#'2016). +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' +#' and 'dat_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time and member dimension. The dimensions must be the same as 'exp' +#' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, +#' it should not have dataset dimension. If there is corresponding reference +#' for each experiement, the dataset dimension must have the same length as in +#' 'exp'. If 'ref' is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairCRPSS (the +#' potential CRPSS that the forecast would have with an infinite ensemble +#' size). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$crpss}{ +#' A numerical array of the CRPSS with dimensions c(nexp, nobs, the rest +#' dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is +#' the number of experiment (i.e., dat_dim in exp), and nobs is the number of +#' observation (i.e., dat_dim in obs). If 'dat_dim' is NULL, nexp and nobs are +#' omitted. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the CRPSS with the same +#' dimensions as $crpss. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have ", + "'memb_dim', but it should be of length = 1).") + } + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension it must be", + " equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute CRPSS + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim)) + } + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, + ncores = ncores) + + return(output) +} + +.CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE) { + + # exp: [sdate, memb, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, memb, (dat)] or NULL + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + #----- CRPS of the forecast + # [sdate, (nexp), (nobs)] + crps_exp <- .CRPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, Fair = Fair) + + #----- CRPS of the reference forecast + if (is.null(ref)) { + ## using climatology as reference forecast + ## all the time steps are used as if they were members + ## then, ref dimensions are [sdate, memb], both with length(sdate) + + obs_time_len <- dim(obs)[time_dim] + if (is.null(dat_dim)) { + ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb]; obs: [sdate] + crps_ref <- .CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, Fair = Fair) + # crps_ref should be [sdate] + + } else { + crps_ref <- array(dim = c(obs_time_len, nobs)) + names(dim(crps_ref)) <- c(time_dim, 'nobs') + for (i_obs in 1:nobs) { + ref <- array(data = rep(obs[, i_obs], each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + names(dim(ref)) <- c(time_dim, memb_dim) + crps_ref[, i_obs] <- .CRPS(exp = ref, obs = ClimProjDiags::Subset(obs, dat_dim, i_obs, drop = 'selected'), + time_dim = time_dim, memb_dim = memb_dim, dat_dim = NULL, Fair = Fair) + } + # crps_ref should be [sdate, nobs] + } + + } else { # ref is not NULL + if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) { + remove_dat_dim <- TRUE + ref <- InsertDim(data = ref, posdim = length(dim(ref)) + 1 , lendim = 1, name = dat_dim) + } else { + remove_dat_dim <- FALSE + } + crps_ref <- .CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, Fair = Fair) + # crps_ref should be [sdate, (nexp), (nobs)] + + if (!is.null(dat_dim)) { + if (isTRUE(remove_dat_dim)) { + dim(crps_ref) <- dim(crps_ref)[-2] + } + } + } + + #----- CRPSS + if (!is.null(dat_dim)) { + # If ref != NULL & ref has dat_dim, crps_ref = [sdate, nexp, nobs]; else, crps_ref = [sdate, nobs] + + crps_exp_mean <- MeanDims(crps_exp, time_dim, na.rm = FALSE) + crps_ref_mean <- MeanDims(crps_ref, time_dim, na.rm = FALSE) + crpss <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (length(dim(crps_ref_mean)) == 1) { + for (i in 1:nexp) { + for (j in 1:nobs) { + crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[j] + sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[j])$signif + } + } + } else { + for (i in 1:nexp) { + for (j in 1:nobs) { + crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[i, j] + sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[i, j])$signif + } + } + } + + } else { + crpss <- 1 - mean(crps_exp) / mean(crps_ref) + # Significance + sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif + } + + return(list(crpss = crpss, sign = sign)) +} diff --git a/R/RPS.R b/R/RPS.R index 654db3df0a88650ec104288987a6b06c2a8aa0b5..76c81d254bf8758327290ba8946f7dfc9e7aad77 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -30,8 +30,8 @@ #'@param indices_for_clim A vector of the indices to be taken along 'time_dim' #' for computing the thresholds between the probabilistic categories. If NULL, #' the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). +#'@param Fair A logical indicating whether to compute the FairRPS (the +#' potential RPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. #'@param weights A named numerical array of the weights for 'exp'. If 'dat_dim' #' is NULL, the dimension should include 'memb_dim' and 'time_dim'. Else, the diff --git a/man/CRPS.Rd b/man/CRPS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..453c1994608459bdb95eeb36149915b69599f19d --- /dev/null +++ b/man/CRPS.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CRPS.R +\name{CRPS} +\alias{CRPS} +\title{Compute the Continuous Ranked Probability Score} +\usage{ +CRPS( + exp, + obs, + time_dim = "sdate", + memb_dim = "member", + dat_dim = NULL, + Fair = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +'dat_dim'.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast. The default value is 'member'.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. The +default value is NULL.} + +\item{Fair}{A logical indicating whether to compute the FairCRPS (the +potential CRPS that the forecast would have with an infinite ensemble size). +The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numerical array of CRPS with dimensions c(nexp, nobs, the rest dimensions of +'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +experiment (i.e., dat_dim in exp), and nobs is the number of observation +(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +} +\description{ +The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill +metric to evaluate the full distribution of probabilistic forecasts. It has a +negative orientation (i.e., the higher-quality forecast the smaller CRPS) and +it rewards the forecast that has probability concentration around the observed +value. In case of a deterministic forecast, the CRPS is reduced to the mean +absolute error. It has the same units as the data. The function is based on +enscrps_cpp from SpecsVerification. If there is more than one dataset, CRPS +will be computed for each pair of exp and obs data. +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +res <- CRPS(exp = exp, obs = obs) + +} +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +} diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..31bf501ec19771f8a86703bfa83fc567daaf4b0f --- /dev/null +++ b/man/CRPSS.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CRPSS.R +\name{CRPSS} +\alias{CRPSS} +\title{Compute the Continuous Ranked Probability Skill Score} +\usage{ +CRPSS( + exp, + obs, + ref = NULL, + time_dim = "sdate", + memb_dim = "member", + dat_dim = NULL, + Fair = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim' +and 'dat_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time and member dimension. The dimensions must be the same as 'exp' +except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, +it should not have dataset dimension. If there is corresponding reference +for each experiement, the dataset dimension must have the same length as in +'exp'. If 'ref' is NULL, the climatological forecast is used as reference +forecast. The default value is NULL.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast and the reference forecast. The +default value is 'member'.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + +\item{Fair}{A logical indicating whether to compute the FairCRPSS (the +potential CRPSS that the forecast would have with an infinite ensemble +size). The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +\item{$crpss}{ + A numerical array of the CRPSS with dimensions c(nexp, nobs, the rest + dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is + the number of experiment (i.e., dat_dim in exp), and nobs is the number of + observation (i.e., dat_dim in obs). If 'dat_dim' is NULL, nexp and nobs are + omitted. +} +\item{$sign}{ + A logical array of the statistical significance of the CRPSS with the same + dimensions as $crpss. +} +} +\description{ +The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the +skill score based on the Continuous Ranked Probability Score (CRPS; Wilks, +2011). It can be used to assess whether a forecast presents an improvement or +worsening with respect to a reference forecast. The CRPSS ranges between minus +infinite and 1. If the CRPSS is positive, it indicates that the forecast has +higher skill than the reference forecast, while a negative value means that it +has a lower skill. Examples of reference forecasts are the climatological +forecast (same probabilities for all categories for all time steps), +persistence, a previous model version, or another model. It is computed as +CRPSS = 1 - CRPS_exp / CRPS_ref. The statistical significance is obtained +based on a Random Walk test at the 95% confidence level (DelSole and Tippett, +2016). +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast +res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast + +} +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +} diff --git a/man/RPS.Rd b/man/RPS.Rd index e5d24a08cd0c02931ceb0663a06d2cfd99a196a2..4d8236bba260d14f016ebb82304c536d545d01dd 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -43,8 +43,8 @@ corresponds to tercile equiprobable categories.} for computing the thresholds between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.} -\item{Fair}{A logical indicating whether to compute the FairRPSS (the -potential RPSS that the forecast would have with an infinite ensemble size). +\item{Fair}{A logical indicating whether to compute the FairRPS (the +potential RPS that the forecast would have with an infinite ensemble size). The default value is FALSE.} \item{weights}{A named numerical array of the weights for 'exp'. If 'dat_dim' diff --git a/tests/testthat/test-CRPS.R b/tests/testthat/test-CRPS.R new file mode 100644 index 0000000000000000000000000000000000000000..972eb45c7a7993ad3b7c535886b8f596ac09a56c --- /dev/null +++ b/tests/testthat/test-CRPS.R @@ -0,0 +1,138 @@ +context("s2dv::CRPS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + +# dat3 +set.seed(1) +exp3 <- array(rnorm(40), dim = c(member = 2, sdate = 10, dataset = 2)) +set.seed(2) +obs3 <- array(rnorm(30), dim = c(member = 1, sdate = 10, dataset = 3)) + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + CRPS(c()), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + CRPS(exp1, c()), + "Parameter 'obs' must be a numeric array." + ) + + # time_dim + expect_error( + CRPS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + CRPS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + CRPS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + CRPS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + ## exp and obs (2) + expect_error(CRPS(exp1, array(1:40, dim = c(sdate = 10, lat = 2, member = 2))), + "Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).", fixed = TRUE + ) + expect_error( + CRPS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'memb_dim' and 'dat_dim'." + ) + # Fair + expect_error( + CRPS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # ncores + expect_error( + CRPS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(CRPS(exp1, obs1)), + c(lat = 2) + ) + expect_equal( + as.vector(CRPS(exp1, obs1)), + c(0.5947612, 0.7511546), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPS(exp1, obs1, Fair = TRUE)), + c(0.4215127, 0.5891242), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPS(exp1, obs1, Fair = FALSE)), + c(0.5947612, 0.7511546), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(CRPS(exp2, obs2)), + NULL + ) + expect_equal( + CRPS(exp2, obs2), + 0.9350226, + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPS(exp2, obs2, Fair = TRUE)), + as.vector(CRPS(array(exp2, dim = c(dim(exp2), dataset = 1)), array(obs2, dim = c(dim(obs2), dataset = 1)), dat_dim = 'dataset', Fair = TRUE)), + tolerance = 0.0001 + ) +}) + + + +############################################## +test_that("3. Output checks: dat3", { + + expect_equal( + dim(CRPS(exp3, obs3, dat_dim = 'dataset')), + c(nexp = 2, nobs = 3) + ) + expect_equal( + as.vector(CRPS(exp3, obs3, dat_dim = 'dataset', Fair = FALSE)), + c(0.9350226, 0.8354833, 1.0047853, 0.9681745, 1.2192761, 1.0171790), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPS(exp3, obs3, dat_dim = 'dataset', Fair = TRUE)), + c(0.6701312, 0.6198684, 0.7398939, 0.7525596, 0.9543847, 0.8015641), + tolerance = 0.0001 + ) + +}) diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R new file mode 100644 index 0000000000000000000000000000000000000000..6937edb2332888a8368663a1660c89db944e6749 --- /dev/null +++ b/tests/testthat/test-CRPSS.R @@ -0,0 +1,324 @@ +context("s2dv::CRPSS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + +# dat2_2 +set.seed(1) +exp2_2 <- array(rnorm(20), dim = c(member = 2, sdate = 10, dataset = 1)) +set.seed(2) +obs2_2 <- array(rnorm(10), dim = c(sdate = 10, dataset = 1)) +set.seed(3) +ref2_2 <- array(rnorm(20), dim = c(member = 2, sdate = 10, dataset = 1)) + +# dat3 +set.seed(1) +exp3 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) +set.seed(2) +obs3 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2)) +set.seed(3) +ref3 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) + +# dat4 +set.seed(1) +exp4 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3, lat = 2)) +set.seed(2) +obs4 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2, lat = 2)) +set.seed(3) +ref4 <- array(rnorm(60), dim = c(member = 2, sdate = 10, lat = 2)) + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + CRPSS(c()), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + CRPSS(exp1, c()), + "Parameter 'obs' must be a numeric array." + ) + + # time_dim + expect_error( + CRPSS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + CRPSS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + CRPSS(exp2, obs2, array(rnorm(20), dim = c(member = 2, time = 10))), + "Parameter 'time_dim' is not found in 'ref' dimension." + ) + # memb_dim + expect_error( + CRPSS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + CRPSS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + CRPSS(exp2, obs2, array(rnorm(20), dim = c(memb = 2, sdate = 10))), + "Parameter 'memb_dim' is not found in 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + CRPSS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'memb_dim' and 'dat_dim'." + ) + expect_error( + CRPSS(exp2_2, obs2_2, array(1:9, dim = c(sdate = 9, member = 2, dataset = 3)), dat_dim = 'dataset'), + "If parameter 'ref' has dataset dimension it must be equal to dataset dimension of 'exp'." + ) + expect_error( + CRPSS(exp1, obs1, ref2), + "Parameter 'exp' and 'ref' must have same length of all dimensions except 'memb_dim' and 'dat_dim' if there is only one reference dataset." + ) + expect_error( + CRPSS(exp3, array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)), ref3, dat_dim = 'dataset'), + "Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1)." + , fixed = TRUE + ) + # Fair + expect_error( + CRPSS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # ncores + expect_error( + CRPSS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + names(CRPSS(exp1, obs1)), + c("crpss", "sign") + ) + expect_equal( + names(CRPSS(exp1, obs1, ref1)), + c("crpss", "sign") + ) + expect_equal( + dim(CRPSS(exp1, obs1)$crpss), + c(lat = 2) + ) + expect_equal( + dim(CRPSS(exp1, obs1)$sign), + c(lat = 2) + ) + expect_equal( + dim(CRPSS(exp1, obs1, ref1)$crpss), + c(lat = 2) + ) + expect_equal( + dim(CRPSS(exp1, obs1, ref1)$sign), + c(lat = 2) + ) + # ref = NULL + expect_equal( + as.vector(CRPSS(exp1, obs1)$crpss), + c(-0.1582765, -0.2390707), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1)$sign), + c(FALSE, FALSE), + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, Fair = T)$crpss), + c(0.07650872, -0.09326681), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1)$crpss), + c(-0.1582765, -0.2390707), + tolerance = 0.0001 + ) + # ref = ref + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1)$crpss), + c(0.3491793, 0.3379610), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1)$sign), + c(FALSE, FALSE) + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1, Fair = T)$crpss), + c( 0.3901440, 0.3788467), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp1, obs1, ref1)$crpss), + c( 0.3491793, 0.3379610), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + names(CRPSS(exp2, obs2)), + c("crpss", "sign") + ) + expect_equal( + names(CRPSS(exp2, obs2, ref2)), + c("crpss", "sign") + ) + expect_equal( + dim(CRPSS(exp2, obs2)$crpss), + NULL + ) + expect_equal( + dim(CRPSS(exp2, obs2)$sign), + NULL + ) + expect_equal( + dim(CRPSS(exp2, obs2, ref2)$crpss), + NULL + ) + expect_equal( + dim(CRPSS(exp2, obs2, ref2)$sign), + NULL + ) + # ref = NULL + expect_equal( + as.vector(CRPSS(exp2, obs2)$crpss), + c(-0.8209236), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2)$crpss), + as.vector(CRPSS(exp2_2, obs2_2, dat_dim = 'dataset')$crpss), + tolerance = 0.0001 + ) + + expect_equal( + as.vector(CRPSS(exp2, obs2)$sign), + TRUE, + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, Fair = T)$crpss), + c(-0.468189), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, Fair = T)$crpss), + as.vector(CRPSS(exp2_2, obs2_2, dat_dim = 'dataset', Fair = T)$crpss), + tolerance = 0.0001 + ) + # ref = ref + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2)$crpss), + -0.02315361, + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2)$crpss), + as.vector(CRPSS(exp2_2, obs2_2, ref2_2, dat_dim = 'dataset')$crpss), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2)$sign), + FALSE + ) + expect_equal( + as.vector(CRPSS(exp2, obs2, ref2, Fair = T)$crpss), + 0.030436, + tolerance = 0.0001 + ) + +}) +############################################## +test_that("4. Output checks: dat3", { + + expect_equal( + dim(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss), + c('nexp' = 3, 'nobs' = 2) + ) + expect_equal( + mean(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss), + c(-0.7390546), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss), + c(-0.8209236, -0.6270744, -1.0829403, -0.6574485, -0.5970569, -0.6488837), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$sign), + rep(FALSE, 6), + ) + expect_equal( + mean(CRPSS(exp3, obs3, dat_dim = 'dataset', Fair = T)$crpss), + c(-0.5302703), + tolerance = 0.0001 + ) + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset', Fair = T)$crpss), + c(-0.4681890, -0.3580685, -1.0116628, -0.3730576, -0.3965618, -0.5740819), + tolerance = 0.0001 + ) + # ref = ref3 + expect_equal( + as.vector(CRPSS(exp3, obs3, ref = ref3, dat_dim = 'dataset')$crpss), + c(-0.02315361, -0.05914715, -0.24638960, -0.09337738, 0.14668803, -0.01454008), + tolerance = 0.0001 + ) + + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset')$crpss[1]), + as.vector(CRPSS(exp2, obs2)$crpss) + ) + +}) + +############################################## +test_that("5. Output checks: dat4", { + + expect_equal( + dim(CRPSS(exp4, obs4, ref4, dat_dim = 'dataset')$crpss), + c('nexp' = 3, 'nobs' = 2, 'lat' = 2) + ) + expect_equal( + as.vector(CRPSS(exp4, obs4, dat_dim = 'dataset', Fair = T)$crpss)[1:6], + c(-0.4681890, -0.3580685, -1.0116628, -0.3730576, -0.3965618, -0.5740819), + tolerance = 0.0001 + ) + # ref = ref3 + expect_equal( + as.vector(CRPSS(exp4, obs4, ref = ref4, dat_dim = 'dataset')$crpss)[1:6], + c(-0.02315361, 0.08576776, -0.17037744, -0.09337738, -0.05353853, -0.08772739), + tolerance = 0.0001 + ) + + +})