diff --git a/R/ROCSS.R b/R/ROCSS.R index 1e9f5e2a493888ec3e2d59f4dd99cb4072c21196..2ca078279228c9e7297a5c7c7a1440b97a622768 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -6,24 +6,36 @@ #'curve can be summarized with the area under the ROC curve, known as the ROC #'score, to provide a skill value for each category. The ROCSS ranges between #'minus infinite and 1. A positive ROCSS value indicates that the forecast has -#'higher skill than the reference forecasts, meaning the contrary otherwise. -#'@param exp A named numerical array of the forecast with at least time and -#' member dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time and member dimension. The dimensions must be the same as 'exp' -#' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -#' it should not have dataset dimension. If there is corresponding reference -#' for each experiement, the dataset dimension must have the same length as in -#' 'exp'. If 'ref' is NULL, the random forecast is used as reference forecast. -#' The default value is NULL. +#'higher skill than the reference forecast, meaning the contrary otherwise.\cr +#'The function accepts either the data or the probabilities of each data as +#'inputs. If there is more than one dataset, RPSS will be computed for each pair +#'of exp and obs data. +#' +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observations with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of the reference forecast with at least +#' time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probability can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiment, the dataset dimension must have the same length as in 'exp'. +#' If 'ref' is NULL, the random forecast is used as reference forecast. The +#' default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. +#' default value is 'member'. If the data are probabilities, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. @@ -40,27 +52,34 @@ #' computation. The default value is NULL. #' #'@return -#'A numerical array of ROCSS with the same dimensions as 'exp' excluding -#''time_dim' and 'memb_dim' dimensions and including 'cat' dimension, which is -#'each category. The length if 'cat' dimension corresponds to the number of -#'probabilistic categories, i.e., 1 + length(prob_thresholds). If there are -#'multiple datasets, two additional dimensions 'nexp' and 'nobs' are added. +#'A numerical array of ROCSS with dimensions c(nexp, nobs, cat, the rest +#'dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the +#'number of experiment (i.e., dat_dim in exp), and nobs is the number of +#'observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are +#'omitted. dimension 'cat' refers to the probabilistic category, i.e., +#'\code{1 + length(prob_thresholds)}. #' #'@references #'Kharin, V. V. and Zwiers, F. W. (2003): #' https://doi.org/10.1175/1520-0442(2003)016%3C4145:OTRSOP%3E2.0.CO;2 #' #'@examples +#'# Use data as input #'exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) #'ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) #'obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) #'ROCSS(exp = exp, obs = obs) ## random forecast as reference forecast #'ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'ROCSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') #' #'@import multiApply #'@importFrom easyVerification EnsRoca #'@export -ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', +ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, cross.val = FALSE, ncores = NULL) { @@ -93,15 +112,31 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(ref) & !time_dim %in% names(dim(ref))) { stop("Parameter 'time_dim' is not found in 'ref' dimension.") } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } } ## dat_dim if (!is.null(dat_dim)) { @@ -116,9 +151,11 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] @@ -131,7 +168,9 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { @@ -187,41 +226,54 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', output_dims <- c('nexp', 'nobs', 'cat') } ## target_dims - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(time_dim, dat_dim) - } else { - target_dims_obs <- c(time_dim, memb_dim, dat_dim) + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } - ## If ref doesn't have & dat_dim is not NULL - if (!is.null(ref) && !is.null(dat_dim) &&!dat_dim %in% names(dim(ref))) { - target_dims_ref <- c(time_dim, memb_dim) - } else { - target_dims_ref <- c(time_dim, memb_dim, dat_dim) + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) + } } if (!is.null(ref)) { ## reference forecast is provided res <- Apply(data = list(exp = exp, obs = obs, ref = ref), - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs, ref = target_dims_ref), output_dims = output_dims, fun = .ROCSS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - time_dim = time_dim, dat_dim = dat_dim, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, cross.val = cross.val, ncores = ncores)$output1 } else { ## Random forecast as reference forecast res <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs), output_dims = output_dims, fun = .ROCSS, ref = ref, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - time_dim = time_dim, dat_dim = dat_dim, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, cross.val = cross.val, ncores = ncores)$output1 } @@ -229,13 +281,19 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', return(res) } -.ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, prob_thresholds = c(1/3, 2/3), +.ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, cross.val = FALSE) { + #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (dat)] - # ref: [sdate, memb, (dat)] or NULL - + # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -262,25 +320,34 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (exp_i in 1:nexp) { for (obs_i in 1:nobs) { - # Input dim for .GetProbs - ## if exp: [sdate, memb] - ## if obs: [sdate, (memb)] - exp_probs <- .GetProbs(ClimProjDiags::Subset(exp, dat_dim, exp_i, drop = 'selected'), - indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, cross.val = cross.val) - obs_probs <- .GetProbs(data = ClimProjDiags::Subset(obs, dat_dim, obs_i, drop = 'selected'), - indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, cross.val = cross.val) - ## exp_probs and obs_probs: [bin, sdate] - + if (is.null(cat_dim)) { # calculate probs + # Input dim for .GetProbs + ## if exp: [sdate, memb] + ## if obs: [sdate, (memb)] + exp_probs <- .GetProbs(data = ClimProjDiags::Subset(exp, dat_dim, exp_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + obs_probs <- .GetProbs(data = ClimProjDiags::Subset(obs, dat_dim, obs_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + ## exp_probs and obs_probs: [bin, sdate] + } else { + exp_probs <- exp[, , exp_i] + obs_probs <- obs[, , obs_i] + } + ## ROCS (exp) rocs_exp[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(exp_probs, c(time_dim, 'bin')), obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) if (!is.null(ref)) { - ref_probs <- .GetProbs(ClimProjDiags::Subset(ref, dat_dim, exp_i, drop = 'selected'), - indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, cross.val = cross.val) + if (is.null(cat_dim)) { # calculate probs + ref_probs <- .GetProbs(ClimProjDiags::Subset(ref, dat_dim, exp_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + } else { + ref_probs <- ref[, , exp_i] + } rocs_ref[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(ref_probs, c(time_dim, 'bin')), obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) } diff --git a/R/RPS.R b/R/RPS.R index 75619b637a5dbc8c170f78fc4ddd982ea6464d1b..c5ff5ba483c307adc1f3110599ecbf2b904d829d 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -8,22 +8,31 @@ #'(perfect forecast) and n-1 (worst possible forecast), where n is the number of #'categories. In the case of a forecast divided into two categories (the lowest #'number of categories that a probabilistic forecast can have), the RPS -#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 -#'and 1. If there is more than one dataset, RPS will be computed for each pair -#'of exp and obs data. +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'and 1.\cr +#'The function first calculates the probabilities for forecasts and observations, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. #' -#'@param exp A named numerical array of the forecast with at least time and -#' member dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilities, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast. The default value is 'member'. #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. @@ -33,12 +42,12 @@ #'@param Fair A logical indicating whether to compute the FairRPS (the #' potential RPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. -#'@param weights A named numerical array of the weights for 'exp'. If 'dat_dim' -#' is NULL, the dimension should include 'memb_dim' and 'time_dim'. Else, the -#' dimension should also include 'dat_dim'. The default value is NULL. The -#' ensemble should have at least 70 members or span at least 10 time steps and -#' have more than 45 members if consistency between the weighted and unweighted -#' methodologies is desired. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. #'@param cross.val A logical indicating whether to compute the thresholds between #' probabilistic categories in cross-validation. #' The default value is FALSE. @@ -55,16 +64,22 @@ #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 #' #'@examples +#'# Use synthetic data #'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) #'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' #' #'@import multiApply #'@importFrom easyVerification convert2prob #'@export -RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights = NULL, cross.val = FALSE, ncores = NULL) { +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -82,12 +97,27 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } } ## dat_dim if (!is.null(dat_dim)) { @@ -102,9 +132,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] @@ -141,7 +173,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL stop("Parameter 'cross.val' must be either TRUE or FALSE.") } ## weights - if (!is.null(weights)) { + if (!is.null(weights) & is.null(cat_dim)) { if (!is.array(weights) | !is.numeric(weights)) stop("Parameter 'weights' must be a named numeric array.") if (is.null(dat_dim)) { @@ -166,6 +198,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL } ## ncores if (!is.null(ncores)) { @@ -178,17 +214,25 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL ############################### # Compute RPS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(time_dim, dat_dim) - } else { - target_dims_obs <- c(time_dim, memb_dim, dat_dim) + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } + rps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs), fun = .RPS, dat_dim = dat_dim, time_dim = time_dim, - memb_dim = memb_dim, + memb_dim = memb_dim, cat_dim = cat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, weights = weights, cross.val = cross.val, ncores = ncores)$output1 @@ -200,16 +244,23 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL } -.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights = NULL, - cross.val = FALSE) { - +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE) { + #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] # Adjust dimensions to be [sdate, memb, dat] for both exp and obs - if (!memb_dim %in% names(dim(obs))) obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } if (is.null(dat_dim)) { nexp <- 1 @@ -232,19 +283,27 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - if (!is.null(weights)) { - weights_data <- weights[ , , i] - if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) - } else { - weights_data <- weights + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[ , , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights + } + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) } - exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) - # exp_probs: [bin, sdate] - obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) - # obs_probs: [bin, sdate] probs_exp_cumsum <- apply(exp_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) diff --git a/R/RPSS.R b/R/RPSS.R index ab433e91698a45de1fb89ecdcfa3901a0e9181b2..6299eb8c19ecfd0f6dd00de25b1da3df8c581864 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -11,26 +11,36 @@ #'model version, and another model. It is computed as #'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained #'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, -#'2016). If there is more than one dataset, RPS will be computed for each pair -#'of exp and obs data. +#'2016).\cr +#'The function accepts either the ensemble members or the probabilities of +#' each data as inputs. If there is more than one dataset, RPSS will be +#' computed for each pair of exp and obs data. #' -#'@param exp A named numerical array of the forecast with at least time and -#' member dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time and member dimension. The dimensions must be the same as 'exp' -#' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -#' it should not have dataset dimension. If there is corresponding reference -#' for each experiement, the dataset dimension must have the same length as in -#' 'exp'. If 'ref' is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of either the reference forecast with at +#' least time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probabilities can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiment, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. +#' default value is 'member'. If the data are probabilities, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. @@ -43,15 +53,13 @@ #'@param Fair A logical indicating whether to compute the FairRPSS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. -#'@param weights Deprecated and will be removed in the next release. Please use -#' 'weights_exp' and 'weights_ref' instead. -#'@param weights_exp A named numerical array of the forecast ensemble weights. -#' The dimension should include 'memb_dim', 'time_dim' and 'dat_dim' if there -#' are multiple datasets. All dimension lengths must be equal to 'exp' -#' dimension lengths. The default value is NULL, which means no weighting is -#' applied. The ensemble should have at least 70 members or span at least 10 -#' time steps and have more than 45 members if consistency between the weighted -#' and unweighted methodologies is desired. +#'@param weights_exp A named numerical array of the forecast ensemble weights +#' for probability calculation. The dimension should include 'memb_dim', +#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +#' lengths must be equal to 'exp' dimension lengths. The default value is NULL, +#' which means no weighting is applied. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. #'@param weights_ref Same as 'weights_exp' but for the reference forecast. #'@param cross.val A logical indicating whether to compute the thresholds between #' probabilistics categories in cross-validation. @@ -87,14 +95,21 @@ #' n/sum(n) #' }) #'dim(weights) <- c(member = 10, sdate = 50) +#'# Use data as input #'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast #'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') +#' #'@import multiApply #'@export -RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, weights_exp = NULL, weights_ref = NULL, + Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, ncores = NULL) { # Check inputs @@ -126,15 +141,31 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(ref) & !time_dim %in% names(dim(ref))) { stop("Parameter 'time_dim' is not found in 'ref' dimension.") } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } } ## dat_dim if (!is.null(dat_dim)) { @@ -149,9 +180,11 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] @@ -164,7 +197,9 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { @@ -206,16 +241,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.logical(cross.val) | length(cross.val) > 1) { stop("Parameter 'cross.val' must be either TRUE or FALSE.") } - ## weights - if (!is.null(weights)) { - .warning(paste0("Parameter 'weights' is deprecated and will be removed in the next release. ", - "Use 'weights_exp' and 'weights_ref' instead. The value will be assigned ", - "to these two parameters now if they are NULL."), tag = '! Deprecation: ') - if (is.null(weights_exp)) weights_exp <- weights - if (is.null(weights_ref)) weights_ref <- weights - } ## weights_exp - if (!is.null(weights_exp)) { + if (!is.null(weights_exp) & is.null(cat_dim)) { if (!is.array(weights_exp) | !is.numeric(weights_exp)) stop("Parameter 'weights_exp' must be a named numeric array.") @@ -238,13 +265,16 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) } weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) - } - + } + } else if (!is.null(weights_exp) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' is probability already, so parameter ", + "'weights_exp' is not used. Change 'weights_exp' to NULL.")) + weights_exp <- NULL } ## weights_ref - if (!is.null(weights_ref)) { + if (!is.null(weights_ref) & is.null(cat_dim)) { if (!is.array(weights_ref) | !is.numeric(weights_ref)) - stop('Parameter "weights_ref" must be a named numeric array.') + stop("Parameter 'weights_ref' must be a named numeric array.") if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { if (length(dim(weights_ref)) != 2 | any(!names(dim(weights_ref)) %in% c(memb_dim, time_dim))) @@ -266,7 +296,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) } - + } else if (!is.null(weights_ref) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'ref' is probability already, so parameter ", + "'weights_ref' is not used. Change 'weights_ref' to NULL.")) + weights_ref <- NULL } ## ncores if (!is.null(ncores)) { @@ -279,32 +312,44 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ############################### # Compute RPSS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- c(time_dim, dat_dim) - } else { - target_dims_obs <- c(time_dim, memb_dim, dat_dim) + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } if (!is.null(ref)) { # use "ref" as reference forecast - if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { - target_dims_ref <- c(time_dim, memb_dim, dat_dim) - } else { + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) } data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs, ref = target_dims_ref) } else { data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs) } + output <- Apply(data, target_dims = target_dims, fun = .RPSS, time_dim = time_dim, memb_dim = memb_dim, - dat_dim = dat_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, weights_exp = weights_exp, @@ -316,13 +361,18 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } -.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights_exp = NULL, weights_ref = NULL, cross.val = FALSE) { +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE) { + #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL if (is.null(dat_dim)) { nexp <- 1 @@ -333,14 +383,17 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } # RPS of the forecast - rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, weights = weights_exp, cross.val = cross.val) + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, + cross.val = cross.val) # RPS of the reference forecast if (is.null(ref)) { ## using climatology as reference forecast - if (!memb_dim %in% names(dim(obs))) { - obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } } if (is.null(dat_dim)) { dim(obs) <- c(dim(obs), nobs = nobs) @@ -348,14 +401,19 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', rps_ref <- array(dim = c(dim(obs)[time_dim], nobs = nobs)) for (j in 1:nobs) { - obs_data <- obs[ , , j] - if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - # obs_probs: [bin, sdate] - obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) - # clim_probs: [bin, sdate] - clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + if (is.null(cat_dim)) { # calculate probs + obs_data <- obs[ , , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + # obs_probs: [bin, sdate] + } else { + obs_probs <- t(obs[ , , j]) + } + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] # Calculate RPS for each time step probs_clim_cumsum <- apply(clim_probs, 2, cumsum) @@ -386,9 +444,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', remove_dat_dim <- FALSE } - rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, weights = weights_ref, cross.val = cross.val) + rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, + cross.val = cross.val) if (!is.null(dat_dim)) { if (isTRUE(remove_dat_dim)) { dim(rps_ref) <- dim(rps_ref)[-2] diff --git a/man/ROCSS.Rd b/man/ROCSS.Rd index 1f4951781913d190e70b74642558dcbee4ab7073..7480f632ac1e6f9825da51c9cb0df7096e7fca13 100644 --- a/man/ROCSS.Rd +++ b/man/ROCSS.Rd @@ -10,6 +10,7 @@ ROCSS( ref = NULL, time_dim = "sdate", memb_dim = "member", + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, @@ -18,27 +19,36 @@ ROCSS( ) } \arguments{ -\item{exp}{A named numerical array of the forecast with at least time and -member dimension.} +\item{exp}{A named numerical array of either the forecast with at least time +and member dimensions, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}.} -\item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -'dat_dim'.} +\item{obs}{A named numerical array of either the observations with at least +time dimension, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} -\item{ref}{A named numerical array of the reference forecast data with at -least time and member dimension. The dimensions must be the same as 'exp' -except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -it should not have dataset dimension. If there is corresponding reference -for each experiement, the dataset dimension must have the same length as in -'exp'. If 'ref' is NULL, the random forecast is used as reference forecast. -The default value is NULL.} +\item{ref}{A named numerical array of the reference forecast with at least +time and member dimensions, or the probabilities with at least time and +category dimensions. The probability can be generated by +\code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +not have dataset dimension. If there is corresponding reference for each +experiment, the dataset dimension must have the same length as in 'exp'. +If 'ref' is NULL, the random forecast is used as reference forecast. The +default value is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the probabilities of the forecast and the reference forecast. The -default value is 'member'.} +default value is 'member'. If the data are probabilities, set memb_dim as +NULL.} + +\item{cat_dim}{A character string indicating the name of the category +dimension that is needed when exp, obs, and ref are probabilities. The +default value is NULL, which means that the data are not probabilities.} \item{dat_dim}{A character string indicating the name of dataset dimension. The length of this dimension can be different between 'exp' and 'obs'. @@ -60,11 +70,12 @@ FALSE.} computation. The default value is NULL.} } \value{ -A numerical array of ROCSS with the same dimensions as 'exp' excluding -'time_dim' and 'memb_dim' dimensions and including 'cat' dimension, which is -each category. The length if 'cat' dimension corresponds to the number of -probabilistic categories, i.e., 1 + length(prob_thresholds). If there are -multiple datasets, two additional dimensions 'nexp' and 'nobs' are added. +A numerical array of ROCSS with dimensions c(nexp, nobs, cat, the rest +dimensions of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the +number of experiment (i.e., dat_dim in exp), and nobs is the number of +observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are +omitted. dimension 'cat' refers to the probabilistic category, i.e., +\code{1 + length(prob_thresholds)}. } \description{ The Relative Operating Characteristic Skill Score (ROCSS; Kharin and Zwiers, @@ -73,14 +84,23 @@ against the false-alarm rates for a particular category or event. The ROC curve can be summarized with the area under the ROC curve, known as the ROC score, to provide a skill value for each category. The ROCSS ranges between minus infinite and 1. A positive ROCSS value indicates that the forecast has -higher skill than the reference forecasts, meaning the contrary otherwise. +higher skill than the reference forecast, meaning the contrary otherwise.\cr +The function accepts either the data or the probabilities of each data as +inputs. If there is more than one dataset, RPSS will be computed for each pair +of exp and obs data. } \examples{ +# Use data as input exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) ROCSS(exp = exp, obs = obs) ## random forecast as reference forecast ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +# Use probs as input +exp_probs <- GetProbs(exp, memb_dim = 'member') +obs_probs <- GetProbs(obs, memb_dim = NULL) +ref_probs <- GetProbs(ref, memb_dim = 'member') +ROCSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') } \references{ diff --git a/man/RPS.Rd b/man/RPS.Rd index 813c12f6e58e5cc6b33e9ffc95ebcd9555b4050d..2e21227f899b3b42f10ea35e32dff76eef24c715 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -9,6 +9,7 @@ RPS( obs, time_dim = "sdate", memb_dim = "member", + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, @@ -19,18 +20,25 @@ RPS( ) } \arguments{ -\item{exp}{A named numerical array of the forecast with at least time and -member dimension.} +\item{exp}{A named numerical array of either the forecasts with at least time +and member dimensions, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}.} -\item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -'dat_dim'.} +\item{obs}{A named numerical array of either the observation with at least +time dimension, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension -to compute the probabilities of the forecast. The default value is 'member'.} +to compute the probabilities of the forecast. The default value is 'member'. +If the data are probabilities, set memb_dim as NULL.} + +\item{cat_dim}{A character string indicating the name of the category +dimension that is needed when the exp and obs are probabilities. The default +value is NULL, which means that the data are not probabilities.} \item{dat_dim}{A character string indicating the name of dataset dimension. The length of this dimension can be different between 'exp' and 'obs'. @@ -48,12 +56,12 @@ the whole period is used. The default value is NULL.} potential RPS that the forecast would have with an infinite ensemble size). The default value is FALSE.} -\item{weights}{A named numerical array of the weights for 'exp'. If 'dat_dim' -is NULL, the dimension should include 'memb_dim' and 'time_dim'. Else, the -dimension should also include 'dat_dim'. The default value is NULL. The -ensemble should have at least 70 members or span at least 10 time steps and -have more than 45 members if consistency between the weighted and unweighted -methodologies is desired.} +\item{weights}{A named numerical array of the weights for 'exp' probability +calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +default value is NULL. The ensemble should have at least 70 members or span +at least 10 time steps and have more than 45 members if consistency between +the weighted and unweighted methodologies is desired.} \item{cross.val}{A logical indicating whether to compute the thresholds between probabilistic categories in cross-validation. @@ -77,14 +85,23 @@ of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast divided into two categories (the lowest number of categories that a probabilistic forecast can have), the RPS -corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 -and 1. If there is more than one dataset, RPS will be computed for each pair -of exp and obs data. +corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +and 1.\cr +The function first calculates the probabilities for forecasts and observations, +then use them to calculate RPS. Or, the probabilities of exp and obs can be +provided directly to compute the score. If there is more than one dataset, RPS +will be computed for each pair of exp and obs data. } \examples{ +# Use synthetic data exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) res <- RPS(exp = exp, obs = obs) +# Use probabilities as inputs +exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') + } \references{ diff --git a/man/RPSS.Rd b/man/RPSS.Rd index d70425e62f7df67e3a3c5ef75b0ff9163fb98389..a6abe342d9f408330a97302c1ca3918e2476ad3f 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -10,11 +10,11 @@ RPSS( ref = NULL, time_dim = "sdate", memb_dim = "member", + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights = NULL, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, @@ -22,27 +22,36 @@ RPSS( ) } \arguments{ -\item{exp}{A named numerical array of the forecast with at least time and -member dimension.} +\item{exp}{A named numerical array of either the forecast with at least time +and member dimensions, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}.} -\item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -'dat_dim'.} +\item{obs}{A named numerical array of either the observation with at least +time dimension, or the probabilities with at least time and category +dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.} -\item{ref}{A named numerical array of the reference forecast data with at -least time and member dimension. The dimensions must be the same as 'exp' -except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, -it should not have dataset dimension. If there is corresponding reference -for each experiement, the dataset dimension must have the same length as in -'exp'. If 'ref' is NULL, the climatological forecast is used as reference -forecast. The default value is NULL.} +\item{ref}{A named numerical array of either the reference forecast with at +least time and member dimensions, or the probabilities with at least time and +category dimensions. The probabilities can be generated by +\code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +not have dataset dimension. If there is corresponding reference for each +experiment, the dataset dimension must have the same length as in 'exp'. If +'ref' is NULL, the climatological forecast is used as reference forecast. +The default value is NULL.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the probabilities of the forecast and the reference forecast. The -default value is 'member'.} +default value is 'member'. If the data are probabilities, set memb_dim as +NULL.} + +\item{cat_dim}{A character string indicating the name of the category +dimension that is needed when exp, obs, and ref are probabilities. The +default value is NULL, which means that the data are not probabilities.} \item{dat_dim}{A character string indicating the name of dataset dimension. The length of this dimension can be different between 'exp' and 'obs'. @@ -60,16 +69,13 @@ the whole period is used. The default value is NULL.} potential RPSS that the forecast would have with an infinite ensemble size). The default value is FALSE.} -\item{weights}{Deprecated and will be removed in the next release. Please use -'weights_exp' and 'weights_ref' instead.} - -\item{weights_exp}{A named numerical array of the forecast ensemble weights. -The dimension should include 'memb_dim', 'time_dim' and 'dat_dim' if there -are multiple datasets. All dimension lengths must be equal to 'exp' -dimension lengths. The default value is NULL, which means no weighting is -applied. The ensemble should have at least 70 members or span at least 10 -time steps and have more than 45 members if consistency between the weighted - and unweighted methodologies is desired.} +\item{weights_exp}{A named numerical array of the forecast ensemble weights +for probability calculation. The dimension should include 'memb_dim', +'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +lengths must be equal to 'exp' dimension lengths. The default value is NULL, +which means no weighting is applied. The ensemble should have at least 70 +members or span at least 10 time steps and have more than 45 members if +consistency between the weighted and unweighted methodologies is desired.} \item{weights_ref}{Same as 'weights_exp' but for the reference forecast.} @@ -104,8 +110,10 @@ probabilities for all categories for all time steps), persistence, a previous model version, and another model. It is computed as \code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained based on a Random Walk test at the 95% confidence level (DelSole and Tippett, -2016). If there is more than one dataset, RPS will be computed for each pair -of exp and obs data. +2016).\cr +The function accepts either the ensemble members or the probabilities of +each data as inputs. If there is more than one dataset, RPSS will be +computed for each pair of exp and obs data. } \examples{ set.seed(1) @@ -119,9 +127,16 @@ weights <- sapply(1:dim(exp)['sdate'], function(i) { n/sum(n) }) dim(weights) <- c(member = 10, sdate = 50) +# Use data as input res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +# Use probs as input +exp_probs <- GetProbs(exp, memb_dim = 'member') +obs_probs <- GetProbs(obs, memb_dim = NULL) +ref_probs <- GetProbs(ref, memb_dim = 'member') +res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, cat_dim = 'bin') + } \references{ Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 diff --git a/tests/testthat/test-ROCSS.R b/tests/testthat/test-ROCSS.R index a95d0ba1b80f637556f4d0b014880d8d930a0b84..130cfa260a88b187f912a29ae9b5481666bebfff 100644 --- a/tests/testthat/test-ROCSS.R +++ b/tests/testthat/test-ROCSS.R @@ -10,6 +10,11 @@ obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) +# dat1_2 +exp1_2 <- GetProbs(exp1, memb_dim = 'member') +obs1_2 <- GetProbs(obs1, memb_dim = NULL) +ref1_2 <- GetProbs(ref1, memb_dim = 'member') + # dat2 set.seed(1) exp2 <- array(rnorm(30), dim = c(member = 3, sdate = 10)) @@ -151,6 +156,16 @@ c(0.5238095, 0.5357143), tolerance = 0.0001 ) +# dat1_2 +expect_equal( + ROCSS(exp1, obs1), + ROCSS(exp1_2, obs1_2, memb_dim = NULL, cat_dim = 'bin') +) +expect_equal( + ROCSS(exp1, obs1, ref1), + ROCSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, cat_dim = 'bin') +) + }) ############################################## diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index 51ba992d93b3e1ee4828bb095bbe363126a8b716..2040c093045f1bd88da25ba9eed3bc41547a18e3 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -10,6 +10,10 @@ obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +# dat1_2: probabilites +exp1_2 <- GetProbs(exp1, memb_dim = 'member') +obs1_2 <- GetProbs(obs1, memb_dim = NULL) + # dat2 set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -60,6 +64,12 @@ test_that("1. Input checks", { RPS(exp1, obs1, memb_dim = 'memb'), "Parameter 'memb_dim' is not found in 'exp' dimension." ) + # cat_dim + expect_error( + RPS(exp1_2, obs1_2, memb_dim = NULL), + "Only one of the two parameters 'memb_dim' and 'cat_dim' can have value." + ) + # exp, ref, and obs (2) expect_error( RPS(exp1, array(1:9, dim = c(sdate = 9))), @@ -160,6 +170,10 @@ test_that("2. Output checks: dat1", { c(0.3559286, 0.6032109), tolerance = 0.0001 ) + expect_equal( + as.vector(RPS(exp1, obs1)), + as.vector(RPS(exp1_2, obs1_2, memb_dim = NULL, cat_dim = 'bin')) + ) }) diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index 36efee8df0c317e06a46439643da8dc81a0561ee..ce0deb3c0765175f83889bd866e80fad183b1869 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -12,6 +12,11 @@ ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) set.seed(4) weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +# dat1_2 +exp1_2 <- GetProbs(exp1, memb_dim = 'member') +obs1_2 <- GetProbs(obs1, memb_dim = NULL) +ref1_2 <- GetProbs(ref1, memb_dim = 'member') + # dat2 set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -255,6 +260,16 @@ test_that("2. Output checks: dat1", { tolerance = 0.0001 ) + # dat1_2 + expect_equal( + RPSS(exp1, obs1), + RPSS(exp1_2, obs1_2, memb_dim = NULL, cat_dim = 'bin') + ) + expect_equal( + RPSS(exp1, obs1, ref1), + RPSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, cat_dim = 'bin') + ) + })