From 70e76751692a8d54dddccb09a8b9f4dbd5ffd196 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 14 Apr 2023 17:09:10 +0200 Subject: [PATCH 1/5] exp and obs can be probabilities --- R/RPS.R | 155 ++++++++++++++++++++++++++------------ man/RPS.Rd | 45 +++++++---- tests/testthat/test-RPS.R | 14 ++++ 3 files changed, 152 insertions(+), 62 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 75619b6..31f12c6 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -9,21 +9,30 @@ #'categories. In the case of a forecast divided into two categories (the lowest #'number of categories that a probabilistic forecast can have), the RPS #'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 -#'and 1. If there is more than one dataset, RPS will be computed for each pair -#'of exp and obs data. +#'and 1.\cr +#'The function first calculates the probabilities for forecast and observation, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. #' -#'@param exp A named numerical array of the forecast with at least time and -#' member dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probability with at least time and category +#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probability with at least time and category +#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilistics, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast. The default value is 'member'. #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. @@ -33,12 +42,12 @@ #'@param Fair A logical indicating whether to compute the FairRPS (the #' potential RPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. -#'@param weights A named numerical array of the weights for 'exp'. If 'dat_dim' -#' is NULL, the dimension should include 'memb_dim' and 'time_dim'. Else, the -#' dimension should also include 'dat_dim'. The default value is NULL. The -#' ensemble should have at least 70 members or span at least 10 time steps and -#' have more than 45 members if consistency between the weighted and unweighted -#' methodologies is desired. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. #'@param cross.val A logical indicating whether to compute the thresholds between #' probabilistic categories in cross-validation. #' The default value is FALSE. @@ -55,16 +64,22 @@ #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 #' #'@examples +#'# Use synthetic data #'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) #'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' #' #'@import multiApply #'@importFrom easyVerification convert2prob #'@export -RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights = NULL, cross.val = FALSE, ncores = NULL) { +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -82,12 +97,27 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } } ## dat_dim if (!is.null(dat_dim)) { @@ -102,9 +132,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] @@ -141,7 +173,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL stop("Parameter 'cross.val' must be either TRUE or FALSE.") } ## weights - if (!is.null(weights)) { + if (!is.null(weights) & is.null(cat_dim)) { if (!is.array(weights) | !is.numeric(weights)) stop("Parameter 'weights' must be a named numeric array.") if (is.null(dat_dim)) { @@ -166,6 +198,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL } ## ncores if (!is.null(ncores)) { @@ -178,17 +214,25 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL ############################### # Compute RPS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(time_dim, dat_dim) - } else { - target_dims_obs <- c(time_dim, memb_dim, dat_dim) + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } + rps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs), fun = .RPS, dat_dim = dat_dim, time_dim = time_dim, - memb_dim = memb_dim, + memb_dim = memb_dim, cat_dim = cat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, weights = weights, cross.val = cross.val, ncores = ncores)$output1 @@ -200,16 +244,23 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL } -.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights = NULL, - cross.val = FALSE) { - +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE) { + #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] # Adjust dimensions to be [sdate, memb, dat] for both exp and obs - if (!memb_dim %in% names(dim(obs))) obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } if (is.null(dat_dim)) { nexp <- 1 @@ -232,19 +283,27 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - if (!is.null(weights)) { - weights_data <- weights[ , , i] - if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) - } else { - weights_data <- weights + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[ , , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights + } + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) } - exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) - # exp_probs: [bin, sdate] - obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) - # obs_probs: [bin, sdate] probs_exp_cumsum <- apply(exp_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) diff --git a/man/RPS.Rd b/man/RPS.Rd index 813c12f..c08b109 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -9,6 +9,7 @@ RPS( obs, time_dim = "sdate", memb_dim = "member", + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, @@ -19,18 +20,25 @@ RPS( ) } \arguments{ -\item{exp}{A named numerical array of the forecast with at least time and -member dimension.} +\item{exp}{A named numerical array of either the forecast with at least time +and member dimensions, or the probability with at least time and category +dimensions. The probability can be generated by \code{s2dv::GetProbs}.} -\item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -'dat_dim'.} +\item{obs}{A named numerical array of either the observation with at least +time dimension, or the probability with at least time and category +dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension -to compute the probabilities of the forecast. The default value is 'member'.} +to compute the probabilities of the forecast. The default value is 'member'. +If the data are probabilistics, set memb_dim as NULL.} + +\item{cat_dim}{A character string indicating the name of the category +dimension that is needed when the exp and obs are probabilities. The default +value is NULL, which means that the data are not probabilities.} \item{dat_dim}{A character string indicating the name of dataset dimension. The length of this dimension can be different between 'exp' and 'obs'. @@ -48,12 +56,12 @@ the whole period is used. The default value is NULL.} potential RPS that the forecast would have with an infinite ensemble size). The default value is FALSE.} -\item{weights}{A named numerical array of the weights for 'exp'. If 'dat_dim' -is NULL, the dimension should include 'memb_dim' and 'time_dim'. Else, the -dimension should also include 'dat_dim'. The default value is NULL. The -ensemble should have at least 70 members or span at least 10 time steps and -have more than 45 members if consistency between the weighted and unweighted -methodologies is desired.} +\item{weights}{A named numerical array of the weights for 'exp' probability +calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +default value is NULL. The ensemble should have at least 70 members or span +at least 10 time steps and have more than 45 members if consistency between +the weighted and unweighted methodologies is desired.} \item{cross.val}{A logical indicating whether to compute the thresholds between probabilistic categories in cross-validation. @@ -78,13 +86,22 @@ of multi-categorical probabilistic forecasts. The RPS ranges between 0 categories. In the case of a forecast divided into two categories (the lowest number of categories that a probabilistic forecast can have), the RPS corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 -and 1. If there is more than one dataset, RPS will be computed for each pair -of exp and obs data. +and 1.\cr +The function first calculates the probabilities for forecast and observation, +then use them to calculate RPS. Or, the probabilities of exp and obs can be +provided directly to compute the score. If there is more than one dataset, RPS +will be computed for each pair of exp and obs data. } \examples{ +# Use synthetic data exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) res <- RPS(exp = exp, obs = obs) +# Use probabilities as inputs +exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') + } \references{ diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index 51ba992..2040c09 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -10,6 +10,10 @@ obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +# dat1_2: probabilites +exp1_2 <- GetProbs(exp1, memb_dim = 'member') +obs1_2 <- GetProbs(obs1, memb_dim = NULL) + # dat2 set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -60,6 +64,12 @@ test_that("1. Input checks", { RPS(exp1, obs1, memb_dim = 'memb'), "Parameter 'memb_dim' is not found in 'exp' dimension." ) + # cat_dim + expect_error( + RPS(exp1_2, obs1_2, memb_dim = NULL), + "Only one of the two parameters 'memb_dim' and 'cat_dim' can have value." + ) + # exp, ref, and obs (2) expect_error( RPS(exp1, array(1:9, dim = c(sdate = 9))), @@ -160,6 +170,10 @@ test_that("2. Output checks: dat1", { c(0.3559286, 0.6032109), tolerance = 0.0001 ) + expect_equal( + as.vector(RPS(exp1, obs1)), + as.vector(RPS(exp1_2, obs1_2, memb_dim = NULL, cat_dim = 'bin')) + ) }) -- GitLab From 5001e4cef933a5d4f587a7a8724072d0aee94822 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 17 Apr 2023 15:18:39 +0200 Subject: [PATCH 2/5] Allow inputs to be probabilities --- R/RPSS.R | 208 +++++++++++++++++++++++-------------- man/RPSS.Rd | 58 ++++++----- tests/testthat/test-RPSS.R | 15 +++ 3 files changed, 178 insertions(+), 103 deletions(-) diff --git a/R/RPSS.R b/R/RPSS.R index ab433e9..7a18afe 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -11,26 +11,36 @@ #'model version, and another model. It is computed as #'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained #'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, -#'2016). If there is more than one dataset, RPS will be computed for each pair +#'2016).\cr +#'The function accepts either the data or the probabilities of each data as +#'inputs. If there is more than one dataset, RPSS will be computed for each pair #'of exp and obs data. #' -#'@param exp A named numerical array of the forecast with at least time and -#' member dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time and member dimension. The dimensions must be the same as 'exp' -#' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -#' it should not have dataset dimension. If there is corresponding reference -#' for each experiement, the dataset dimension must have the same length as in -#' 'exp'. If 'ref' is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probability with at least time and category +#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probability with at least time and category +#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of either the reference forecast with at +#' least time and member dimensions, or the probability with at least time and +#' category dimensions. The probability can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiement, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. +#' default value is 'member'. If the data are probabilistics, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. @@ -43,15 +53,13 @@ #'@param Fair A logical indicating whether to compute the FairRPSS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. -#'@param weights Deprecated and will be removed in the next release. Please use -#' 'weights_exp' and 'weights_ref' instead. -#'@param weights_exp A named numerical array of the forecast ensemble weights. -#' The dimension should include 'memb_dim', 'time_dim' and 'dat_dim' if there -#' are multiple datasets. All dimension lengths must be equal to 'exp' -#' dimension lengths. The default value is NULL, which means no weighting is -#' applied. The ensemble should have at least 70 members or span at least 10 -#' time steps and have more than 45 members if consistency between the weighted -#' and unweighted methodologies is desired. +#'@param weights_exp A named numerical array of the forecast ensemble weights +#' for probability calculation. The dimension should include 'memb_dim', +#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +#' lengths must be equal to 'exp' dimension lengths. The default value is NULL, +#' which means no weighting is applied. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. #'@param weights_ref Same as 'weights_exp' but for the reference forecast. #'@param cross.val A logical indicating whether to compute the thresholds between #' probabilistics categories in cross-validation. @@ -92,9 +100,9 @@ #'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) #'@import multiApply #'@export -RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, weights_exp = NULL, weights_ref = NULL, + Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, ncores = NULL) { # Check inputs @@ -126,15 +134,31 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(ref) & !time_dim %in% names(dim(ref))) { stop("Parameter 'time_dim' is not found in 'ref' dimension.") } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } } ## dat_dim if (!is.null(dat_dim)) { @@ -149,9 +173,11 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] @@ -164,7 +190,9 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { @@ -206,16 +234,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.logical(cross.val) | length(cross.val) > 1) { stop("Parameter 'cross.val' must be either TRUE or FALSE.") } - ## weights - if (!is.null(weights)) { - .warning(paste0("Parameter 'weights' is deprecated and will be removed in the next release. ", - "Use 'weights_exp' and 'weights_ref' instead. The value will be assigned ", - "to these two parameters now if they are NULL."), tag = '! Deprecation: ') - if (is.null(weights_exp)) weights_exp <- weights - if (is.null(weights_ref)) weights_ref <- weights - } ## weights_exp - if (!is.null(weights_exp)) { + if (!is.null(weights_exp) & is.null(cat_dim)) { if (!is.array(weights_exp) | !is.numeric(weights_exp)) stop("Parameter 'weights_exp' must be a named numeric array.") @@ -238,13 +258,16 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) } weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) - } - + } + } else if (!is.null(weights_exp) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' is probability already, so parameter ", + "'weights_exp' is not used. Change 'weights_exp' to NULL.")) + weights_exp <- NULL } ## weights_ref - if (!is.null(weights_ref)) { + if (!is.null(weights_ref) & is.null(cat_dim)) { if (!is.array(weights_ref) | !is.numeric(weights_ref)) - stop('Parameter "weights_ref" must be a named numeric array.') + stop("Parameter 'weights_ref' must be a named numeric array.") if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { if (length(dim(weights_ref)) != 2 | any(!names(dim(weights_ref)) %in% c(memb_dim, time_dim))) @@ -266,7 +289,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) } - + } else if (!is.null(weights_ref) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'ref' is probability already, so parameter ", + "'weights_ref' is not used. Change 'weights_ref' to NULL.")) + weights_ref <- NULL } ## ncores if (!is.null(ncores)) { @@ -279,32 +305,44 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ############################### # Compute RPSS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(time_dim, dat_dim) - } else { - target_dims_obs <- c(time_dim, memb_dim, dat_dim) + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } if (!is.null(ref)) { # use "ref" as reference forecast - if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { - target_dims_ref <- c(time_dim, memb_dim, dat_dim) - } else { + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) } data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs, ref = target_dims_ref) } else { data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs) } + output <- Apply(data, target_dims = target_dims, fun = .RPSS, time_dim = time_dim, memb_dim = memb_dim, - dat_dim = dat_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, weights_exp = weights_exp, @@ -316,13 +354,18 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } -.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights_exp = NULL, weights_ref = NULL, cross.val = FALSE) { +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE) { + #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL if (is.null(dat_dim)) { nexp <- 1 @@ -333,14 +376,17 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } # RPS of the forecast - rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, weights = weights_exp, cross.val = cross.val) + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, + cross.val = cross.val) # RPS of the reference forecast if (is.null(ref)) { ## using climatology as reference forecast - if (!memb_dim %in% names(dim(obs))) { - obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } } if (is.null(dat_dim)) { dim(obs) <- c(dim(obs), nobs = nobs) @@ -348,14 +394,19 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', rps_ref <- array(dim = c(dim(obs)[time_dim], nobs = nobs)) for (j in 1:nobs) { - obs_data <- obs[ , , j] - if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - # obs_probs: [bin, sdate] - obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) - # clim_probs: [bin, sdate] - clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + if (is.null(cat_dim)) { # calculate probs + obs_data <- obs[ , , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + # obs_probs: [bin, sdate] + } else { + obs_probs <- t(obs[ , , j]) + } + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] # Calculate RPS for each time step probs_clim_cumsum <- apply(clim_probs, 2, cumsum) @@ -386,9 +437,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', remove_dat_dim <- FALSE } - rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, weights = weights_ref, cross.val = cross.val) + rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, + cross.val = cross.val) if (!is.null(dat_dim)) { if (isTRUE(remove_dat_dim)) { dim(rps_ref) <- dim(rps_ref)[-2] diff --git a/man/RPSS.Rd b/man/RPSS.Rd index d70425e..a1e3e55 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -10,11 +10,11 @@ RPSS( ref = NULL, time_dim = "sdate", memb_dim = "member", + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights = NULL, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, @@ -22,27 +22,36 @@ RPSS( ) } \arguments{ -\item{exp}{A named numerical array of the forecast with at least time and -member dimension.} +\item{exp}{A named numerical array of either the forecast with at least time +and member dimensions, or the probability with at least time and category +dimensions. The probability can be generated by \code{s2dv::GetProbs}.} -\item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -'dat_dim'.} +\item{obs}{A named numerical array of either the observation with at least +time dimension, or the probability with at least time and category +dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} -\item{ref}{A named numerical array of the reference forecast data with at -least time and member dimension. The dimensions must be the same as 'exp' -except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -it should not have dataset dimension. If there is corresponding reference -for each experiement, the dataset dimension must have the same length as in -'exp'. If 'ref' is NULL, the climatological forecast is used as reference -forecast. The default value is NULL.} +\item{ref}{A named numerical array of either the reference forecast with at +least time and member dimensions, or the probability with at least time and +category dimensions. The probability can be generated by +\code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +not have dataset dimension. If there is corresponding reference for each +experiement, the dataset dimension must have the same length as in 'exp'. If +'ref' is NULL, the climatological forecast is used as reference forecast. +The default value is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the probabilities of the forecast and the reference forecast. The -default value is 'member'.} +default value is 'member'. If the data are probabilistics, set memb_dim as +NULL.} + +\item{cat_dim}{A character string indicating the name of the category +dimension that is needed when the exp and obs are probabilities. The default +value is NULL, which means that the data are not probabilities.} \item{dat_dim}{A character string indicating the name of dataset dimension. The length of this dimension can be different between 'exp' and 'obs'. @@ -60,16 +69,13 @@ the whole period is used. The default value is NULL.} potential RPSS that the forecast would have with an infinite ensemble size). The default value is FALSE.} -\item{weights}{Deprecated and will be removed in the next release. Please use -'weights_exp' and 'weights_ref' instead.} - -\item{weights_exp}{A named numerical array of the forecast ensemble weights. -The dimension should include 'memb_dim', 'time_dim' and 'dat_dim' if there -are multiple datasets. All dimension lengths must be equal to 'exp' -dimension lengths. The default value is NULL, which means no weighting is -applied. The ensemble should have at least 70 members or span at least 10 -time steps and have more than 45 members if consistency between the weighted - and unweighted methodologies is desired.} +\item{weights_exp}{A named numerical array of the forecast ensemble weights +for probability calculation. The dimension should include 'memb_dim', +'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +lengths must be equal to 'exp' dimension lengths. The default value is NULL, +which means no weighting is applied. The ensemble should have at least 70 +members or span at least 10 time steps and have more than 45 members if +consistency between the weighted and unweighted methodologies is desired.} \item{weights_ref}{Same as 'weights_exp' but for the reference forecast.} @@ -104,7 +110,9 @@ probabilities for all categories for all time steps), persistence, a previous model version, and another model. It is computed as \code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained based on a Random Walk test at the 95% confidence level (DelSole and Tippett, -2016). If there is more than one dataset, RPS will be computed for each pair +2016).\cr +The function accepts either the data or the probabilities of each data as +inputs. If there is more than one dataset, RPSS will be computed for each pair of exp and obs data. } \examples{ diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index 36efee8..ce0deb3 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -12,6 +12,11 @@ ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) set.seed(4) weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +# dat1_2 +exp1_2 <- GetProbs(exp1, memb_dim = 'member') +obs1_2 <- GetProbs(obs1, memb_dim = NULL) +ref1_2 <- GetProbs(ref1, memb_dim = 'member') + # dat2 set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -255,6 +260,16 @@ test_that("2. Output checks: dat1", { tolerance = 0.0001 ) + # dat1_2 + expect_equal( + RPSS(exp1, obs1), + RPSS(exp1_2, obs1_2, memb_dim = NULL, cat_dim = 'bin') + ) + expect_equal( + RPSS(exp1, obs1, ref1), + RPSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, cat_dim = 'bin') + ) + }) -- GitLab From 7dd2d002955f8c681d4d19ce7b77d112720c30e7 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 17 Apr 2023 16:34:33 +0200 Subject: [PATCH 3/5] Allow input to be probabilities; correct document and sanity checks --- R/ROCSS.R | 189 ++++++++++++++++++++++++------------ R/RPSS.R | 13 ++- man/ROCSS.Rd | 58 +++++++---- man/RPSS.Rd | 11 ++- tests/testthat/test-ROCSS.R | 15 +++ 5 files changed, 201 insertions(+), 85 deletions(-) diff --git a/R/ROCSS.R b/R/ROCSS.R index 1e9f5e2..3ac67e6 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -6,24 +6,36 @@ #'curve can be summarized with the area under the ROC curve, known as the ROC #'score, to provide a skill value for each category. The ROCSS ranges between #'minus infinite and 1. A positive ROCSS value indicates that the forecast has -#'higher skill than the reference forecasts, meaning the contrary otherwise. -#'@param exp A named numerical array of the forecast with at least time and -#' member dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time and member dimension. The dimensions must be the same as 'exp' -#' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -#' it should not have dataset dimension. If there is corresponding reference -#' for each experiement, the dataset dimension must have the same length as in -#' 'exp'. If 'ref' is NULL, the random forecast is used as reference forecast. -#' The default value is NULL. +#'higher skill than the reference forecasts, meaning the contrary otherwise.\cr +#'The function accepts either the data or the probabilities of each data as +#'inputs. If there is more than one dataset, RPSS will be computed for each pair +#'of exp and obs data. +#' +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probability with at least time and category +#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probability with at least time and category +#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of the reference forecast with at least +#' time and member dimensions, or the probability with at least time and +#' category dimensions. The probability can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiement, the dataset dimension must have the same length as in 'exp'. +#' If 'ref' is NULL, the random forecast is used as reference forecast. The +#' default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. +#' default value is 'member'. If the data are probabilistics, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. @@ -40,27 +52,34 @@ #' computation. The default value is NULL. #' #'@return -#'A numerical array of ROCSS with the same dimensions as 'exp' excluding -#''time_dim' and 'memb_dim' dimensions and including 'cat' dimension, which is -#'each category. The length if 'cat' dimension corresponds to the number of -#'probabilistic categories, i.e., 1 + length(prob_thresholds). If there are -#'multiple datasets, two additional dimensions 'nexp' and 'nobs' are added. +#'A numerical array of ROCSS with dimensions c(nexp, nobs, cat, the rest +#'dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the +#'number of experiment (i.e., dat_dim in exp), and nobs is the number of +#'observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are +#'omitted. dimension 'cat' refers to the probabilistic category, i.e., +#'\code{1 + length(prob_thresholds)}. #' #'@references #'Kharin, V. V. and Zwiers, F. W. (2003): #' https://doi.org/10.1175/1520-0442(2003)016%3C4145:OTRSOP%3E2.0.CO;2 #' #'@examples +#'# Use data as input #'exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) #'ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) #'obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) #'ROCSS(exp = exp, obs = obs) ## random forecast as reference forecast #'ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'ROCSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') #' #'@import multiApply #'@importFrom easyVerification EnsRoca #'@export -ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', +ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, cross.val = FALSE, ncores = NULL) { @@ -93,15 +112,31 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(ref) & !time_dim %in% names(dim(ref))) { stop("Parameter 'time_dim' is not found in 'ref' dimension.") } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } } ## dat_dim if (!is.null(dat_dim)) { @@ -116,9 +151,11 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] @@ -131,7 +168,9 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { @@ -187,41 +226,54 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', output_dims <- c('nexp', 'nobs', 'cat') } ## target_dims - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(time_dim, dat_dim) - } else { - target_dims_obs <- c(time_dim, memb_dim, dat_dim) + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } - ## If ref doesn't have & dat_dim is not NULL - if (!is.null(ref) && !is.null(dat_dim) &&!dat_dim %in% names(dim(ref))) { - target_dims_ref <- c(time_dim, memb_dim) - } else { - target_dims_ref <- c(time_dim, memb_dim, dat_dim) + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) + } } if (!is.null(ref)) { ## reference forecast is provided res <- Apply(data = list(exp = exp, obs = obs, ref = ref), - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs, ref = target_dims_ref), output_dims = output_dims, fun = .ROCSS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - time_dim = time_dim, dat_dim = dat_dim, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, cross.val = cross.val, ncores = ncores)$output1 } else { ## Random forecast as reference forecast res <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs), output_dims = output_dims, fun = .ROCSS, ref = ref, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - time_dim = time_dim, dat_dim = dat_dim, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, cross.val = cross.val, ncores = ncores)$output1 } @@ -229,13 +281,19 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', return(res) } -.ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, prob_thresholds = c(1/3, 2/3), +.ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, cross.val = FALSE) { + #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (dat)] - # ref: [sdate, memb, (dat)] or NULL - + # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -262,25 +320,34 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (exp_i in 1:nexp) { for (obs_i in 1:nobs) { - # Input dim for .GetProbs - ## if exp: [sdate, memb] - ## if obs: [sdate, (memb)] - exp_probs <- .GetProbs(ClimProjDiags::Subset(exp, dat_dim, exp_i, drop = 'selected'), - indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, cross.val = cross.val) - obs_probs <- .GetProbs(data = ClimProjDiags::Subset(obs, dat_dim, obs_i, drop = 'selected'), - indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, cross.val = cross.val) - ## exp_probs and obs_probs: [bin, sdate] - + if (is.null(cat_dim)) { # calculate probs + # Input dim for .GetProbs + ## if exp: [sdate, memb] + ## if obs: [sdate, (memb)] + exp_probs <- .GetProbs(data = ClimProjDiags::Subset(exp, dat_dim, exp_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + obs_probs <- .GetProbs(data = ClimProjDiags::Subset(obs, dat_dim, obs_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + ## exp_probs and obs_probs: [bin, sdate] + } else { + exp_probs <- exp[, , exp_i] + obs_probs <- obs[, , obs_i] + } + ## ROCS (exp) rocs_exp[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(exp_probs, c(time_dim, 'bin')), obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) if (!is.null(ref)) { - ref_probs <- .GetProbs(ClimProjDiags::Subset(ref, dat_dim, exp_i, drop = 'selected'), - indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, cross.val = cross.val) + if (is.null(cat_dim)) { # calculate probs + ref_probs <- .GetProbs(ClimProjDiags::Subset(ref, dat_dim, exp_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + } else { + ref_probs <- ref[, , exp_i] + } rocs_ref[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(ref_probs, c(time_dim, 'bin')), obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) } diff --git a/R/RPSS.R b/R/RPSS.R index 7a18afe..5ad8e0e 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -39,8 +39,8 @@ #' default value is 'member'. If the data are probabilistics, set memb_dim as #' NULL. #'@param cat_dim A character string indicating the name of the category -#' dimension that is needed when the exp and obs are probabilities. The default -#' value is NULL, which means that the data are not probabilities. +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. @@ -95,9 +95,16 @@ #' n/sum(n) #' }) #'dim(weights) <- c(member = 10, sdate = 50) +#'# Use data as input #'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast #'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') +#' #'@import multiApply #'@export RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, @@ -156,7 +163,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', stop("Parameter 'cat_dim' must be a character string.") } if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | - !cat_dim %in% names(dim(obs))) { + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") } } diff --git a/man/ROCSS.Rd b/man/ROCSS.Rd index 1f49517..4514164 100644 --- a/man/ROCSS.Rd +++ b/man/ROCSS.Rd @@ -10,6 +10,7 @@ ROCSS( ref = NULL, time_dim = "sdate", memb_dim = "member", + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, @@ -18,27 +19,36 @@ ROCSS( ) } \arguments{ -\item{exp}{A named numerical array of the forecast with at least time and -member dimension.} +\item{exp}{A named numerical array of either the forecast with at least time +and member dimensions, or the probability with at least time and category +dimensions. The probability can be generated by \code{s2dv::GetProbs}.} -\item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -'dat_dim'.} +\item{obs}{A named numerical array of either the observation with at least +time dimension, or the probability with at least time and category +dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} -\item{ref}{A named numerical array of the reference forecast data with at -least time and member dimension. The dimensions must be the same as 'exp' -except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -it should not have dataset dimension. If there is corresponding reference -for each experiement, the dataset dimension must have the same length as in -'exp'. If 'ref' is NULL, the random forecast is used as reference forecast. -The default value is NULL.} +\item{ref}{A named numerical array of the reference forecast with at least +time and member dimensions, or the probability with at least time and +category dimensions. The probability can be generated by +\code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +not have dataset dimension. If there is corresponding reference for each +experiement, the dataset dimension must have the same length as in 'exp'. +If 'ref' is NULL, the random forecast is used as reference forecast. The +default value is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the probabilities of the forecast and the reference forecast. The -default value is 'member'.} +default value is 'member'. If the data are probabilistics, set memb_dim as +NULL.} + +\item{cat_dim}{A character string indicating the name of the category +dimension that is needed when exp, obs, and ref are probabilities. The +default value is NULL, which means that the data are not probabilities.} \item{dat_dim}{A character string indicating the name of dataset dimension. The length of this dimension can be different between 'exp' and 'obs'. @@ -60,11 +70,12 @@ FALSE.} computation. The default value is NULL.} } \value{ -A numerical array of ROCSS with the same dimensions as 'exp' excluding -'time_dim' and 'memb_dim' dimensions and including 'cat' dimension, which is -each category. The length if 'cat' dimension corresponds to the number of -probabilistic categories, i.e., 1 + length(prob_thresholds). If there are -multiple datasets, two additional dimensions 'nexp' and 'nobs' are added. +A numerical array of ROCSS with dimensions c(nexp, nobs, cat, the rest +dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the +number of experiment (i.e., dat_dim in exp), and nobs is the number of +observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are +omitted. dimension 'cat' refers to the probabilistic category, i.e., +\code{1 + length(prob_thresholds)}. } \description{ The Relative Operating Characteristic Skill Score (ROCSS; Kharin and Zwiers, @@ -73,14 +84,23 @@ against the false-alarm rates for a particular category or event. The ROC curve can be summarized with the area under the ROC curve, known as the ROC score, to provide a skill value for each category. The ROCSS ranges between minus infinite and 1. A positive ROCSS value indicates that the forecast has -higher skill than the reference forecasts, meaning the contrary otherwise. +higher skill than the reference forecasts, meaning the contrary otherwise.\cr +The function accepts either the data or the probabilities of each data as +inputs. If there is more than one dataset, RPSS will be computed for each pair +of exp and obs data. } \examples{ +# Use data as input exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) ROCSS(exp = exp, obs = obs) ## random forecast as reference forecast ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +# Use probs as input +exp_probs <- GetProbs(exp, memb_dim = 'member') +obs_probs <- GetProbs(obs, memb_dim = NULL) +ref_probs <- GetProbs(ref, memb_dim = 'member') +ROCSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') } \references{ diff --git a/man/RPSS.Rd b/man/RPSS.Rd index a1e3e55..cda05a4 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -50,8 +50,8 @@ default value is 'member'. If the data are probabilistics, set memb_dim as NULL.} \item{cat_dim}{A character string indicating the name of the category -dimension that is needed when the exp and obs are probabilities. The default -value is NULL, which means that the data are not probabilities.} +dimension that is needed when exp, obs, and ref are probabilities. The +default value is NULL, which means that the data are not probabilities.} \item{dat_dim}{A character string indicating the name of dataset dimension. The length of this dimension can be different between 'exp' and 'obs'. @@ -127,9 +127,16 @@ weights <- sapply(1:dim(exp)['sdate'], function(i) { n/sum(n) }) dim(weights) <- c(member = 10, sdate = 50) +# Use data as input res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +# Use probs as input +exp_probs <- GetProbs(exp, memb_dim = 'member') +obs_probs <- GetProbs(obs, memb_dim = NULL) +ref_probs <- GetProbs(ref, memb_dim = 'member') +res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') + } \references{ Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 diff --git a/tests/testthat/test-ROCSS.R b/tests/testthat/test-ROCSS.R index a95d0ba..130cfa2 100644 --- a/tests/testthat/test-ROCSS.R +++ b/tests/testthat/test-ROCSS.R @@ -10,6 +10,11 @@ obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) +# dat1_2 +exp1_2 <- GetProbs(exp1, memb_dim = 'member') +obs1_2 <- GetProbs(obs1, memb_dim = NULL) +ref1_2 <- GetProbs(ref1, memb_dim = 'member') + # dat2 set.seed(1) exp2 <- array(rnorm(30), dim = c(member = 3, sdate = 10)) @@ -151,6 +156,16 @@ c(0.5238095, 0.5357143), tolerance = 0.0001 ) +# dat1_2 +expect_equal( + ROCSS(exp1, obs1), + ROCSS(exp1_2, obs1_2, memb_dim = NULL, cat_dim = 'bin') +) +expect_equal( + ROCSS(exp1, obs1, ref1), + ROCSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, cat_dim = 'bin') +) + }) ############################################## -- GitLab From 5195a268d16adff2320a8019a45df98088e9268c Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Tue, 25 Apr 2023 12:07:20 +0200 Subject: [PATCH 4/5] slightly modified documentation --- R/ROCSS.R | 18 +++++++++--------- R/RPS.R | 16 ++++++++-------- R/RPSS.R | 22 +++++++++++----------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/R/ROCSS.R b/R/ROCSS.R index 3ac67e6..2ca0782 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -6,32 +6,32 @@ #'curve can be summarized with the area under the ROC curve, known as the ROC #'score, to provide a skill value for each category. The ROCSS ranges between #'minus infinite and 1. A positive ROCSS value indicates that the forecast has -#'higher skill than the reference forecasts, meaning the contrary otherwise.\cr +#'higher skill than the reference forecast, meaning the contrary otherwise.\cr #'The function accepts either the data or the probabilities of each data as #'inputs. If there is more than one dataset, RPSS will be computed for each pair #'of exp and obs data. #' #'@param exp A named numerical array of either the forecast with at least time -#' and member dimensions, or the probability with at least time and category -#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. -#'@param obs A named numerical array of either the observation with at least -#' time dimension, or the probability with at least time and category -#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observations with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The #' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. #'@param ref A named numerical array of the reference forecast with at least -#' time and member dimensions, or the probability with at least time and +#' time and member dimensions, or the probabilities with at least time and #' category dimensions. The probability can be generated by #' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except #' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should #' not have dataset dimension. If there is corresponding reference for each -#' experiement, the dataset dimension must have the same length as in 'exp'. +#' experiment, the dataset dimension must have the same length as in 'exp'. #' If 'ref' is NULL, the random forecast is used as reference forecast. The #' default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. If the data are probabilistics, set memb_dim as +#' default value is 'member'. If the data are probabilities, set memb_dim as #' NULL. #'@param cat_dim A character string indicating the name of the category #' dimension that is needed when exp, obs, and ref are probabilities. The diff --git a/R/RPS.R b/R/RPS.R index 31f12c6..826c03a 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -8,25 +8,25 @@ #'(perfect forecast) and n-1 (worst possible forecast), where n is the number of #'categories. In the case of a forecast divided into two categories (the lowest #'number of categories that a probabilistic forecast can have), the RPS -#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 #'and 1.\cr -#'The function first calculates the probabilities for forecast and observation, +#'The function first calculates the probabilities for forecasts and observations, #'then use them to calculate RPS. Or, the probabilities of exp and obs can be #'provided directly to compute the score. If there is more than one dataset, RPS #'will be computed for each pair of exp and obs data. #' -#'@param exp A named numerical array of either the forecast with at least time -#' and member dimensions, or the probability with at least time and category -#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. #'@param obs A named numerical array of either the observation with at least -#' time dimension, or the probability with at least time and category -#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The #' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast. The default value is 'member'. -#' If the data are probabilistics, set memb_dim as NULL. +#' If the data are probabilities, set memb_dim as NULL. #'@param cat_dim A character string indicating the name of the category #' dimension that is needed when the exp and obs are probabilities. The default #' value is NULL, which means that the data are not probabilities. diff --git a/R/RPSS.R b/R/RPSS.R index 5ad8e0e..6299eb8 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -12,31 +12,31 @@ #'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained #'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, #'2016).\cr -#'The function accepts either the data or the probabilities of each data as -#'inputs. If there is more than one dataset, RPSS will be computed for each pair -#'of exp and obs data. +#'The function accepts either the ensemble members or the probabilities of +#' each data as inputs. If there is more than one dataset, RPSS will be +#' computed for each pair of exp and obs data. #' #'@param exp A named numerical array of either the forecast with at least time -#' and member dimensions, or the probability with at least time and category -#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. #'@param obs A named numerical array of either the observation with at least -#' time dimension, or the probability with at least time and category -#' dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The #' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. #'@param ref A named numerical array of either the reference forecast with at -#' least time and member dimensions, or the probability with at least time and -#' category dimensions. The probability can be generated by +#' least time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probabilities can be generated by #' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except #' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should #' not have dataset dimension. If there is corresponding reference for each -#' experiement, the dataset dimension must have the same length as in 'exp'. If +#' experiment, the dataset dimension must have the same length as in 'exp'. If #' 'ref' is NULL, the climatological forecast is used as reference forecast. #' The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. If the data are probabilistics, set memb_dim as +#' default value is 'member'. If the data are probabilities, set memb_dim as #' NULL. #'@param cat_dim A character string indicating the name of the category #' dimension that is needed when exp, obs, and ref are probabilities. The -- GitLab From 03d677dd68eeb39ca11cdba63af81dbce363b910 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 25 Apr 2023 12:41:49 +0200 Subject: [PATCH 5/5] Update man files --- R/RPS.R | 2 +- man/ROCSS.Rd | 18 +++++++++--------- man/RPS.Rd | 16 ++++++++-------- man/RPSS.Rd | 22 +++++++++++----------- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 826c03a..c5ff5ba 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -8,7 +8,7 @@ #'(perfect forecast) and n-1 (worst possible forecast), where n is the number of #'categories. In the case of a forecast divided into two categories (the lowest #'number of categories that a probabilistic forecast can have), the RPS -#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 #'and 1.\cr #'The function first calculates the probabilities for forecasts and observations, #'then use them to calculate RPS. Or, the probabilities of exp and obs can be diff --git a/man/ROCSS.Rd b/man/ROCSS.Rd index 4514164..7480f63 100644 --- a/man/ROCSS.Rd +++ b/man/ROCSS.Rd @@ -20,21 +20,21 @@ ROCSS( } \arguments{ \item{exp}{A named numerical array of either the forecast with at least time -and member dimensions, or the probability with at least time and category -dimensions. The probability can be generated by \code{s2dv::GetProbs}.} +and member dimensions, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}.} -\item{obs}{A named numerical array of either the observation with at least -time dimension, or the probability with at least time and category -dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +\item{obs}{A named numerical array of either the observations with at least +time dimension, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} \item{ref}{A named numerical array of the reference forecast with at least -time and member dimensions, or the probability with at least time and +time and member dimensions, or the probabilities with at least time and category dimensions. The probability can be generated by \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should not have dataset dimension. If there is corresponding reference for each -experiement, the dataset dimension must have the same length as in 'exp'. +experiment, the dataset dimension must have the same length as in 'exp'. If 'ref' is NULL, the random forecast is used as reference forecast. The default value is NULL.} @@ -43,7 +43,7 @@ The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the probabilities of the forecast and the reference forecast. The -default value is 'member'. If the data are probabilistics, set memb_dim as +default value is 'member'. If the data are probabilities, set memb_dim as NULL.} \item{cat_dim}{A character string indicating the name of the category @@ -84,7 +84,7 @@ against the false-alarm rates for a particular category or event. The ROC curve can be summarized with the area under the ROC curve, known as the ROC score, to provide a skill value for each category. The ROCSS ranges between minus infinite and 1. A positive ROCSS value indicates that the forecast has -higher skill than the reference forecasts, meaning the contrary otherwise.\cr +higher skill than the reference forecast, meaning the contrary otherwise.\cr The function accepts either the data or the probabilities of each data as inputs. If there is more than one dataset, RPSS will be computed for each pair of exp and obs data. diff --git a/man/RPS.Rd b/man/RPS.Rd index c08b109..2e21227 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -20,13 +20,13 @@ RPS( ) } \arguments{ -\item{exp}{A named numerical array of either the forecast with at least time -and member dimensions, or the probability with at least time and category -dimensions. The probability can be generated by \code{s2dv::GetProbs}.} +\item{exp}{A named numerical array of either the forecasts with at least time +and member dimensions, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}.} \item{obs}{A named numerical array of either the observation with at least -time dimension, or the probability with at least time and category -dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +time dimension, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} \item{time_dim}{A character string indicating the name of the time dimension. @@ -34,7 +34,7 @@ The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the probabilities of the forecast. The default value is 'member'. -If the data are probabilistics, set memb_dim as NULL.} +If the data are probabilities, set memb_dim as NULL.} \item{cat_dim}{A character string indicating the name of the category dimension that is needed when the exp and obs are probabilities. The default @@ -85,9 +85,9 @@ of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast divided into two categories (the lowest number of categories that a probabilistic forecast can have), the RPS -corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 +corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 and 1.\cr -The function first calculates the probabilities for forecast and observation, +The function first calculates the probabilities for forecasts and observations, then use them to calculate RPS. Or, the probabilities of exp and obs can be provided directly to compute the score. If there is more than one dataset, RPS will be computed for each pair of exp and obs data. diff --git a/man/RPSS.Rd b/man/RPSS.Rd index cda05a4..a6abe34 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -23,21 +23,21 @@ RPSS( } \arguments{ \item{exp}{A named numerical array of either the forecast with at least time -and member dimensions, or the probability with at least time and category -dimensions. The probability can be generated by \code{s2dv::GetProbs}.} +and member dimensions, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}.} \item{obs}{A named numerical array of either the observation with at least -time dimension, or the probability with at least time and category -dimensions. The probability can be generated by \code{s2dv::GetProbs}. The +time dimension, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} \item{ref}{A named numerical array of either the reference forecast with at -least time and member dimensions, or the probability with at least time and -category dimensions. The probability can be generated by +least time and member dimensions, or the probabilities with at least time and +category dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should not have dataset dimension. If there is corresponding reference for each -experiement, the dataset dimension must have the same length as in 'exp'. If +experiment, the dataset dimension must have the same length as in 'exp'. If 'ref' is NULL, the climatological forecast is used as reference forecast. The default value is NULL.} @@ -46,7 +46,7 @@ The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the probabilities of the forecast and the reference forecast. The -default value is 'member'. If the data are probabilistics, set memb_dim as +default value is 'member'. If the data are probabilities, set memb_dim as NULL.} \item{cat_dim}{A character string indicating the name of the category @@ -111,9 +111,9 @@ model version, and another model. It is computed as \code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained based on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016).\cr -The function accepts either the data or the probabilities of each data as -inputs. If there is more than one dataset, RPSS will be computed for each pair -of exp and obs data. +The function accepts either the ensemble members or the probabilities of +each data as inputs. If there is more than one dataset, RPSS will be +computed for each pair of exp and obs data. } \examples{ set.seed(1) -- GitLab