From 809eb885c4dcc5963c8343f87c288ba2bc8e8fcc Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 4 Mar 2022 16:49:53 +0100 Subject: [PATCH 01/18] first version (without documentation) --- R/RPS.R | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ R/RPSS.R | 64 ++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 R/RPS.R create mode 100644 R/RPSS.R diff --git a/R/RPS.R b/R/RPS.R new file mode 100644 index 0000000..0081d8e --- /dev/null +++ b/R/RPS.R @@ -0,0 +1,99 @@ + +library(multiApply) +library(easyVerification) + +RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ + + ## Checkings + if (!is.array(exp)){stop('"exp" must be an array.')} + if (!is.array(obs)){stop('"obs" must be an array.')} + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')} + if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} + if (!is.character(time_dim)){stop('"time_dim" must be a string.')} + if (!is.character(member_dim)){stop('"member_dim" must be a string.')} + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} + } + + ## RPS + rps <- multiApply::Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, member_dim), obs = time_dim), + n_categories = n_categories, indices_for_clim = indices_for_clim, + Fair = Fair, time_dim = time_dim, member_dim = member_dim, + fun = .RPS, ncores = ncores)$output1 + return(rps) +} + +.RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member'){ + + exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quartiles = indices_for_clim, + time_dim = time_dim, member_dim = member_dim, n_categories = n_categories)$probs + + obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, + time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs + + rps <- .RPS_from_probs(probs_exp = exp_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = Fair, n_members = as.numeric(dim(exp)[member_dim])) + + return(rps) +} + +.get_probs <- function(data, type, indices_for_quartiles, time_dim, member_dim, n_categories){ + + if (type == 'exp'){ + target_dims_quantiles = c(time_dim, member_dim) + target_dims_probs = list(member_dim, 'bin') + } else if (type == 'obs'){ + target_dims_quantiles = time_dim + target_dims_probs = list(NULL,'bin') + } else {stop("Parameter 'type' must be 'exp' or 'obs'")} + + quantiles <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = time_dim, indices = indices_for_quartiles), + target_dims = target_dims_quantiles, + fun = .quantile_aux, n_categories = n_categories, + output_dims = 'bin', ncores = NULL)$output1 + + probs <- multiApply::Apply(data = list(data = data, quantiles = quantiles), + target_dims = target_dims_probs, + fun = .c2p, + output_dims = 'bin', ncores = NULL)$output1 + + return(list('probs' = probs, 'quantiles' = quantiles)) +} + +.quantile_aux = function(data, n_categories){ + data = as.vector(data) + q = quantile(x = data, probs = 1:(n_categories - 1) / n_categories, type = 8, na.rm = TRUE) + return(q) +} + +.c2p <- function(data, quantiles) { + if (anyNA(data)){ + output <- array(data = NA, dim = length(quantiles) + 1) + } else { + output <- colMeans(easyVerification::convert2prob(as.vector(data), threshold = as.vector(quantiles))) + } + return(output) +} + + +.RPS_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members){ + + rps <- NULL + for (i in 1:n_categories){ + rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 ## (Wilks 2011, pp.348-350) + } + rps <- sum(rps) + + if (Fair == TRUE){ ## FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + R <- n_members + R_new <- Inf + probs_cum <- cumsum(probs_exp) + adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum) + adjustment <- sum(adjustment) + rps <- rps + adjustment + } + + return(rps) +} diff --git a/R/RPSS.R b/R/RPSS.R new file mode 100644 index 0000000..b9d59d0 --- /dev/null +++ b/R/RPSS.R @@ -0,0 +1,64 @@ + +library(multiApply) +library(easyVerification) +library(s2dv) + +RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ + + ## Checkings + if (!is.array(exp)){stop('"exp" must be an array.')} + if (!is.array(obs)){stop('"obs" must be an array.')} + if (!is.null(ref)){ + if (!is.array(ref)){stop('"ref" must be an array.')} + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} + } + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')} + if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} + if (!is.character(time_dim)){stop('"time_dim" must be a string.')} + if (!is.character(member_dim)){stop('"member_dim" must be a string.')} + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} + } + + if (!is.null(ref)){ ## using "ref" as reference forecast + output <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), + target_dims = list(exp = c(time_dim, member_dim), obs = time_dim, ref = c(time_dim, member_dim)), + n_categories = n_categories, indices_for_clim = indices_for_clim, + Fair = Fair, time_dim = time_dim, member_dim = member_dim, + fun = .RPSS, ncores = ncores) + } else { ## using climatology as reference forecast + output <- multiApply::Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, member_dim), obs = time_dim), + ref = ref, n_categories = n_categories, indices_for_clim = indices_for_clim, + Fair = Fair, time_dim = time_dim, member_dim = member_dim, + fun = .RPSS, ncores = ncores) + } + + return(output) +} + +.RPSS <- function(exp, obs, ref, n_categories, indices_for_clim, Fair, time_dim, member_dim){ + + ## RPS of the forecast + rps_exp <- .RPS(exp = exp, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) + + ## RPS of the reference forecast + if (is.null(ref)){ ## using climatology as reference forecast + clim_probs <- array(data = 1 / n_categories, dim = c(time = length(obs))) + obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs + rps_ref <- .RPS_from_probs(probs_exp = clim_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = FALSE) + } else { ## using "ref" as reference forecast + rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) + } + + ## RPSS + rpss <- 1 - mean(rps_exp) / mean(rps_ref) + + ## Significance + rps_exp <- array(data = rps_exp, dim = c(time = length(rps_exp))) + rps_ref <- array(data = rps_ref, dim = c(time = length(rps_ref))) + sign <- s2dv::RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, time_dim = 'time', ncores = NULL)$signif + + return(list(rpss = rpss, sign = sign)) +} -- GitLab From 8ec54099762aa2bf7d310d6c472abc402589aec1 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Sat, 5 Mar 2022 15:44:29 +0100 Subject: [PATCH 02/18] fixed dim(clim_probs) --- R/RPSS.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/RPSS.R b/R/RPSS.R index b9d59d0..fc25631 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -45,8 +45,8 @@ RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FA ## RPS of the reference forecast if (is.null(ref)){ ## using climatology as reference forecast - clim_probs <- array(data = 1 / n_categories, dim = c(time = length(obs))) obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs + clim_probs <- array(data = 1 / n_categories, dim = dim(obs_probs)) rps_ref <- .RPS_from_probs(probs_exp = clim_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = FALSE) } else { ## using "ref" as reference forecast rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) -- GitLab From 3a30a3f67b4100267ea743c70431b3f6fed9fcf6 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 10 Mar 2022 17:06:58 +0100 Subject: [PATCH 03/18] . --- R/RPS.R | 5 +++-- R/RPSS.R | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 0081d8e..c7e7e0b 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -33,8 +33,9 @@ RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs - rps <- .RPS_from_probs(probs_exp = exp_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = Fair, n_members = as.numeric(dim(exp)[member_dim])) - + rps <- multiApply::Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin', + fun = .RPS_from_probs, n_categories = n_categories, Fair = Fair, + n_members = as.numeric(dim(exp)[member_dim]), ncores = NULL)$output1 return(rps) } diff --git a/R/RPSS.R b/R/RPSS.R index fc25631..ae6881f 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -47,7 +47,8 @@ RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FA if (is.null(ref)){ ## using climatology as reference forecast obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs clim_probs <- array(data = 1 / n_categories, dim = dim(obs_probs)) - rps_ref <- .RPS_from_probs(probs_exp = clim_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = FALSE) + rps_ref <- multiApply::Apply(data = list(probs_exp = clim_probs, probs_obs = obs_probs), target_dims = 'bin', + fun = .RPS_from_probs, n_categories = n_categories, Fair = FALSE, ncores = NULL)$output1 } else { ## using "ref" as reference forecast rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) } -- GitLab From 178932ae938d67aa1d9dbb3f25c212f0b028ea61 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 17 Mar 2022 18:46:42 +0100 Subject: [PATCH 04/18] corrected typo in documentation --- R/RandomWalkTest.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 494be65..adeadc1 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -1,8 +1,8 @@ #'Random walk test for skill differences #' #'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common reference) based on Random Walks, with significance estimate at the 5% -#'confidence level, as in DelSole and Tippett (2015). +#'common reference) based on Random Walks, with significance estimate at the 95% +#'confidence level, as in DelSole and Tippett (2016). #' #'@param skill_A A numerical array of the time series of the skill with the #' forecaster A's. -- GitLab From 43c6d69b5e6ddb57e546d8a732c32d90d5768764 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 17 Mar 2022 18:55:35 +0100 Subject: [PATCH 05/18] included documentation --- R/RPS.R | 47 ++++++++++++++++++++++++++++++++++++++++++----- R/RPSS.R | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 89 insertions(+), 10 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index c7e7e0b..1cf25a4 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -1,8 +1,41 @@ - -library(multiApply) -library(easyVerification) - -RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) is defined +#'as the sum of the squared differences between the cumulative forecast probabilities (computed from the ensemble +#'members) and the observations (defined as 0% if the category did not happen and 100% if it happened). It can be +#'used to evaluate the skill of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect +#'forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast +#'divided into two categories (the lowest number of categories that a probabilistic forecast can provide), the +#'RPS corresponds to the Brier Score (BS; Wilks, 2011) and, therefore, ranges between 0 and 1. +#' +#'@param exp A numerical array with the forecast with, at least, time dimension. +#'@param obs A numerical array with the observations with, at least, time dimension. +#' The length of the time dimension must be the same as for "exp". +#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds +#' between the probabilistic categories. +#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories). +#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with +#'an infinite ensemble size). The default value is FALSE. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'year'. +#'@param member_dim A character string indicating the name of the member dimension to compute the +#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should +#' be provided directly to the function. The default value is 'member'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#' A numerical array with the RPS with the same dimensions as "exp" except the "time_dim" and "member_dim" dimensions. +#' +#'@examples +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +#' RPS(exp, obs, indices_for_clim = 1:50) +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -22,6 +55,10 @@ RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim, fun = .RPS, ncores = ncores)$output1 + + ## Returning only the mean RPS + rps <- multiApply::Apply(data = rps, target_dims = time_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + return(rps) } diff --git a/R/RPSS.R b/R/RPSS.R index ae6881f..a06de09 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -1,9 +1,51 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) +#'is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether +#'a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between +#'minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts +#'are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version, +#'and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based +#'on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1) +#' +#'@param exp A numerical array with the forecast with, at least, time dimension. +#'@param obs A numerical array with the observations with, at least, time dimension. +#' The length of the time dimension must be the same as for "exp". +#'@param ref NULL or a numerical array with the reference forecast with, at least, time dimension. +#' The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is +#' used as reference forecast. The default value is NULL. +#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds +#' between the probabilistic categories. +#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories). +#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with +#'an infinite ensemble size). The default value is FALSE. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'year'. +#'@param member_dim A character string indicating the name of the member dimension to compute the +#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should +#' be provided directly to the function. The default value is 'member'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" +#' and "member_dim" dimensions. +#'} +#'\item{$sign}{ +#' A logical array with the statistical significance of the RPSS with the same dimensions +#' as "exp" except the "time_dim" and "member_dim" dimensions. +#'} -library(multiApply) -library(easyVerification) -library(s2dv) - -RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ +#'@examples +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +#' RPSS(exp, obs, indices_for_clim = 1:50) +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} -- GitLab From 7fa4531030744435bdee06a87993766136bcac14 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 25 Mar 2022 12:02:00 +0100 Subject: [PATCH 06/18] indices_for_clim = NULL by default (to use the whole period for climatology) --- R/RPS.R | 9 +++++++-- R/RPSS.R | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 1cf25a4..4cd5120 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -12,7 +12,7 @@ #'@param obs A numerical array with the observations with, at least, time dimension. #' The length of the time dimension must be the same as for "exp". #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds -#' between the probabilistic categories. +#' between the probabilistic categories. If NULL, the whole period is used. The default value is NULL. #'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories). #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with #'an infinite ensemble size). The default value is FALSE. @@ -35,12 +35,17 @@ #'@import multiApply #'@importFrom easyVerification convert2prob #'@export -RPS <- function(exp, obs, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ +RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} if (!is.array(obs)){stop('"obs" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:as.numeric(dim(obs)[time_dim]) + } else if (!is.numeric(indices_for_clim)){ + stop('"indices_for_clim" must be NULL or a numeric vector.') + } if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')} if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} if (!is.character(time_dim)){stop('"time_dim" must be a string.')} diff --git a/R/RPSS.R b/R/RPSS.R index a06de09..59005c2 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -16,7 +16,7 @@ #' The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is #' used as reference forecast. The default value is NULL. #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds -#' between the probabilistic categories. +#' between the probabilistic categories. If NULL, the whole period is used. The default value is NULL. #'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories). #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with #'an infinite ensemble size). The default value is FALSE. @@ -45,7 +45,7 @@ #' #'@import multiApply #'@export -RPSS <- function(exp, obs, ref = NULL, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ +RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -55,6 +55,11 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim, n_categories = 3, Fair if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} } if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:as.numeric(dim(obs)[time_dim]) + } else if (!is.numeric(indices_for_clim)){ + stop('"indices_for_clim" must be NULL or a numeric vector.') + } if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')} if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} if (!is.character(time_dim)){stop('"time_dim" must be a string.')} -- GitLab From 8f72f8b8ddca314abb93f75d5b0e7c7f1f139fb7 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 30 Mar 2022 13:15:02 +0200 Subject: [PATCH 07/18] replaced n_categories with prob_thresholds --- R/RPS.R | 57 ++++++++++++++++++++++++-------------------------------- R/RPSS.R | 45 +++++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 46 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 4cd5120..b28791e 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -5,15 +5,16 @@ #'members) and the observations (defined as 0% if the category did not happen and 100% if it happened). It can be #'used to evaluate the skill of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect #'forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast -#'divided into two categories (the lowest number of categories that a probabilistic forecast can provide), the +#'divided into two categories (the lowest number of categories that a probabilistic forecast can have), the #'RPS corresponds to the Brier Score (BS; Wilks, 2011) and, therefore, ranges between 0 and 1. #' #'@param exp A numerical array with the forecast with, at least, time dimension. #'@param obs A numerical array with the observations with, at least, time dimension. #' The length of the time dimension must be the same as for "exp". +#'@param prob_thresholds A numeric vector with the relative thresholds (from 0 to 1) between the categories. +#' The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories. #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds #' between the probabilistic categories. If NULL, the whole period is used. The default value is NULL. -#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories). #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with #'an infinite ensemble size). The default value is FALSE. #'@param time_dim A character string indicating the name of the time dimension. @@ -30,23 +31,26 @@ #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) -#' RPS(exp, obs, indices_for_clim = 1:50) +#' RPS(exp = exp, obs = obs) #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@importFrom easyVerification convert2prob #'@export -RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ +RPS <- function(exp, obs, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} if (!is.array(obs)){stop('"obs" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){ + stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.') + } if (is.null(indices_for_clim)){ indices_for_clim <- 1:as.numeric(dim(obs)[time_dim]) } else if (!is.numeric(indices_for_clim)){ stop('"indices_for_clim" must be NULL or a numeric vector.') } - if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')} if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} if (!is.character(time_dim)){stop('"time_dim" must be a string.')} if (!is.character(member_dim)){stop('"member_dim" must be a string.')} @@ -57,7 +61,7 @@ RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALS ## RPS rps <- multiApply::Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c(time_dim, member_dim), obs = time_dim), - n_categories = n_categories, indices_for_clim = indices_for_clim, + prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim, fun = .RPS, ncores = ncores)$output1 @@ -67,47 +71,34 @@ RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALS return(rps) } -.RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member'){ +.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){ - exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quartiles = indices_for_clim, - time_dim = time_dim, member_dim = member_dim, n_categories = n_categories)$probs + exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quantiles = indices_for_clim, + time_dim = time_dim, member_dim = member_dim, prob_thresholds = prob_thresholds) - obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, - time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs + obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, + time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds) + ## RPS for each time step rps <- multiApply::Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin', - fun = .RPS_from_probs, n_categories = n_categories, Fair = Fair, + fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = Fair, n_members = as.numeric(dim(exp)[member_dim]), ncores = NULL)$output1 return(rps) } -.get_probs <- function(data, type, indices_for_quartiles, time_dim, member_dim, n_categories){ +.get_probs <- function(data, type, indices_for_quantiles, time_dim, member_dim, prob_thresholds){ if (type == 'exp'){ - target_dims_quantiles = c(time_dim, member_dim) - target_dims_probs = list(member_dim, 'bin') + target_dims = member_dim } else if (type == 'obs'){ - target_dims_quantiles = time_dim - target_dims_probs = list(NULL,'bin') + target_dims = NULL } else {stop("Parameter 'type' must be 'exp' or 'obs'")} - quantiles <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = time_dim, indices = indices_for_quartiles), - target_dims = target_dims_quantiles, - fun = .quantile_aux, n_categories = n_categories, - output_dims = 'bin', ncores = NULL)$output1 - - probs <- multiApply::Apply(data = list(data = data, quantiles = quantiles), - target_dims = target_dims_probs, - fun = .c2p, - output_dims = 'bin', ncores = NULL)$output1 + quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE) ## Absolute thresholds - return(list('probs' = probs, 'quantiles' = quantiles)) -} - -.quantile_aux = function(data, n_categories){ - data = as.vector(data) - q = quantile(x = data, probs = 1:(n_categories - 1) / n_categories, type = 8, na.rm = TRUE) - return(q) + probs <- multiApply::Apply(data = data, target_dims = target_dims, fun = .c2p, output_dims = 'bin', + quantiles = quantiles, ncores = NULL)$output1 ## Probabilities + return(probs) } .c2p <- function(data, quantiles) { diff --git a/R/RPSS.R b/R/RPSS.R index 59005c2..f7dc80d 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -15,9 +15,10 @@ #'@param ref NULL or a numerical array with the reference forecast with, at least, time dimension. #' The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is #' used as reference forecast. The default value is NULL. +#'@param prob_thresholds A numeric vector with the relative thresholds (between 0 and 1) between the categories. +#' The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories. #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds #' between the probabilistic categories. If NULL, the whole period is used. The default value is NULL. -#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories). #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with #'an infinite ensemble size). The default value is FALSE. #'@param time_dim A character string indicating the name of the time dimension. @@ -40,27 +41,31 @@ #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) -#' RPSS(exp, obs, indices_for_clim = 1:50) +#' RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#' RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #' #'@import multiApply #'@export -RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ +RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} if (!is.array(obs)){stop('"obs" must be an array.')} + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} if (!is.null(ref)){ if (!is.array(ref)){stop('"ref" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} } - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){ + stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.') + } if (is.null(indices_for_clim)){ indices_for_clim <- 1:as.numeric(dim(obs)[time_dim]) } else if (!is.numeric(indices_for_clim)){ stop('"indices_for_clim" must be NULL or a numeric vector.') } - if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')} if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} if (!is.character(time_dim)){stop('"time_dim" must be a string.')} if (!is.character(member_dim)){stop('"member_dim" must be a string.')} @@ -71,13 +76,13 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3 if (!is.null(ref)){ ## using "ref" as reference forecast output <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), target_dims = list(exp = c(time_dim, member_dim), obs = time_dim, ref = c(time_dim, member_dim)), - n_categories = n_categories, indices_for_clim = indices_for_clim, + prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim, fun = .RPSS, ncores = ncores) } else { ## using climatology as reference forecast output <- multiApply::Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c(time_dim, member_dim), obs = time_dim), - ref = ref, n_categories = n_categories, indices_for_clim = indices_for_clim, + ref = ref, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim, fun = .RPSS, ncores = ncores) } @@ -85,19 +90,19 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3 return(output) } -.RPSS <- function(exp, obs, ref, n_categories, indices_for_clim, Fair, time_dim, member_dim){ +.RPSS <- function(exp, obs, ref, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){ ## RPS of the forecast - rps_exp <- .RPS(exp = exp, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) + rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) ## RPS of the reference forecast if (is.null(ref)){ ## using climatology as reference forecast - obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs - clim_probs <- array(data = 1 / n_categories, dim = dim(obs_probs)) + obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds) + clim_probs <- .get_probs_clim(prob_thresholds = prob_thresholds, dims = dim(obs_probs), time_dim = time_dim) rps_ref <- multiApply::Apply(data = list(probs_exp = clim_probs, probs_obs = obs_probs), target_dims = 'bin', - fun = .RPS_from_probs, n_categories = n_categories, Fair = FALSE, ncores = NULL)$output1 + fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = FALSE, ncores = NULL)$output1 } else { ## using "ref" as reference forecast - rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) + rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) } ## RPSS @@ -110,3 +115,17 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3 return(list(rpss = rpss, sign = sign)) } + +.get_probs_clim <- function(prob_thresholds, dims, time_dim){ + + probs <- array(data = NA, dim = c(bin = length(prob_thresholds)+1)) + probs[1] <- prob_thresholds[1] + if (length(prob_thresholds) > 1){ + probs[2:(length(probs)-1)] <- diff(prob_thresholds) + } + probs[length(probs)] <- 1 - prob_thresholds[length(prob_thresholds)] + stopifnot(identical(sum(probs),1)) + probs <- s2dv::InsertDim(data = probs, posdim = 2, lendim = dims[[time_dim]], name = time_dim) + + return(probs) +} -- GitLab From e2990e4edddf003d258901a91e0cf77e6c269080 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 11 May 2022 17:56:08 +0200 Subject: [PATCH 08/18] Formatting and combine small functions --- R/RPS.R | 172 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 96 insertions(+), 76 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index b28791e..886ecec 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -1,45 +1,60 @@ #'Compute the Ranked Probability Score #' -#'The Ranked Probability Score (RPS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) is defined -#'as the sum of the squared differences between the cumulative forecast probabilities (computed from the ensemble -#'members) and the observations (defined as 0% if the category did not happen and 100% if it happened). It can be -#'used to evaluate the skill of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect -#'forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast -#'divided into two categories (the lowest number of categories that a probabilistic forecast can have), the -#'RPS corresponds to the Brier Score (BS; Wilks, 2011) and, therefore, ranges between 0 and 1. +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 +#'and 1. #' -#'@param exp A numerical array with the forecast with, at least, time dimension. -#'@param obs A numerical array with the observations with, at least, time dimension. -#' The length of the time dimension must be the same as for "exp". -#'@param prob_thresholds A numeric vector with the relative thresholds (from 0 to 1) between the categories. -#' The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories. -#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds -#' between the probabilistic categories. If NULL, the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with -#'an infinite ensemble size). The default value is FALSE. +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' expect 'memb_dim'. #'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'year'. -#'@param member_dim A character string indicating the name of the member dimension to compute the -#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should -#' be provided directly to the function. The default value is 'member'. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean of the forecast. If it is NULL, the ensemble +#' mean should be provided directly to the function. The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along "time_dim" +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return -#' A numerical array with the RPS with the same dimensions as "exp" except the "time_dim" and "member_dim" dimensions. +#'A numerical array of RPS with the same dimensions as "exp" except the +#'"time_dim" and "memb_dim" dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 #' #'@examples -#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) -#' RPS(exp = exp, obs = obs) +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#' res <- RPS(exp = exp, obs = obs, memb_dim = 'member') #' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@importFrom easyVerification convert2prob #'@export -RPS <- function(exp, obs, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + ncores = NULL) { - ## Checkings + # Check inputs + ## exp and obs (1) + if (!is.array(exp)){stop('"exp" must be an array.')} if (!is.array(obs)){stop('"obs" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} @@ -53,81 +68,86 @@ RPS <- function(exp, obs, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, } if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} if (!is.character(time_dim)){stop('"time_dim" must be a string.')} - if (!is.character(member_dim)){stop('"member_dim" must be a string.')} + if (!is.character(memb_dim)){stop('"memb_dim" must be a string.')} if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} } ## RPS - rps <- multiApply::Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, member_dim), obs = time_dim), - prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, time_dim = time_dim, member_dim = member_dim, - fun = .RPS, ncores = ncores)$output1 - - ## Returning only the mean RPS - rps <- multiApply::Apply(data = rps, target_dims = time_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim), obs = time_dim), + fun = .RPS, + prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, + Fair = Fair, + ncores = ncores)$output1 return(rps) } -.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){ - - exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quantiles = indices_for_clim, - time_dim = time_dim, member_dim = member_dim, prob_thresholds = prob_thresholds) - - obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, - time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds) - + +.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair) { + # exp: [sdate, memb] + # obs: [sdate] + + exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds) + # exp_probs: [bin, sdate] + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds) + # obs_probs: [bin, sdate] + ## RPS for each time step - rps <- multiApply::Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin', - fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = Fair, - n_members = as.numeric(dim(exp)[member_dim]), ncores = NULL)$output1 + rps <- Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin', + fun = .RPS_from_probs, Fair = Fair, + n_members = dim(exp)[2], ncores = NULL)$output1 + # rps: [sdate] + + # Return only the mean RPS + rps <- mean(rps, na.rm = FALSE) + return(rps) } -.get_probs <- function(data, type, indices_for_quantiles, time_dim, member_dim, prob_thresholds){ - - if (type == 'exp'){ - target_dims = member_dim - } else if (type == 'obs'){ - target_dims = NULL - } else {stop("Parameter 'type' must be 'exp' or 'obs'")} - - quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE) ## Absolute thresholds - - probs <- multiApply::Apply(data = data, target_dims = target_dims, fun = .c2p, output_dims = 'bin', - quantiles = quantiles, ncores = NULL)$output1 ## Probabilities - return(probs) -} -.c2p <- function(data, quantiles) { - if (anyNA(data)){ - output <- array(data = NA, dim = length(quantiles) + 1) - } else { - output <- colMeans(easyVerification::convert2prob(as.vector(data), threshold = as.vector(quantiles))) +.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { + # if exp: [sdate, memb] + # if obs: [sdate] + if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) + + # Absolute thresholds + quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE) + + # Probabilities + probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + } else { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + } } - return(output) + + return(probs) } -.RPS_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members){ - - rps <- NULL - for (i in 1:n_categories){ - rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 ## (Wilks 2011, pp.348-350) - } - rps <- sum(rps) - - if (Fair == TRUE){ ## FairRPS +.RPS_from_probs <- function(probs_exp, probs_obs, Fair, n_members) { + # probs_exp: [bin] + # probs_obs: [bin] + + rps <- sum((cumsum(probs_exp) - cumsum(probs_obs))^2) + + if (Fair) { # FairRPS ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] R <- n_members R_new <- Inf probs_cum <- cumsum(probs_exp) - adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum) + adjustment <- -1 / (R - 1) * probs_cum * (1 - probs_cum) adjustment <- sum(adjustment) rps <- rps + adjustment } return(rps) } + + -- GitLab From 1379da555e2fc3c112d4e649aab5e5ae8e146b86 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 12 May 2022 14:49:15 +0200 Subject: [PATCH 09/18] Improve RPS() efficiency and correct the uusage of indices_for_clim; unit test created --- NAMESPACE | 3 + R/RPS.R | 149 +++++++++++++++++++++++--------------- man/RPS.Rd | 70 ++++++++++++++++++ man/RPSS.Rd | 75 +++++++++++++++++++ man/RandomWalkTest.Rd | 4 +- tests/testthat/test-RPS.R | 129 +++++++++++++++++++++++++++++++++ 6 files changed, 368 insertions(+), 62 deletions(-) create mode 100644 man/RPS.Rd create mode 100644 man/RPSS.Rd create mode 100644 tests/testthat/test-RPS.R diff --git a/NAMESPACE b/NAMESPACE index 841814e..1449ad4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -54,6 +54,8 @@ export(ProjectField) export(REOF) export(RMS) export(RMSSS) +export(RPS) +export(RPSS) export(RandomWalkTest) export(RatioPredictableComponents) export(RatioRMS) @@ -92,6 +94,7 @@ importFrom(ClimProjDiags,WeightedMean) importFrom(abind,abind) importFrom(abind,adrop) importFrom(easyNCDF,ArrayToNc) +importFrom(easyVerification,convert2prob) importFrom(grDevices,bmp) importFrom(grDevices,col2rgb) importFrom(grDevices,colorRampPalette) diff --git a/R/RPS.R b/R/RPS.R index 886ecec..744b9c7 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -14,16 +14,15 @@ #'@param exp A named numerical array of the forecast with at least time #' dimension. #'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' expect 'memb_dim'. +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean of the forecast. If it is NULL, the ensemble -#' mean should be provided directly to the function. The default value is NULL. +#' to compute the probabilities of the forecast. The default value is 'member'. #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. -#'@param indices_for_clim A vector of the indices to be taken along "time_dim" +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' #' for computing the thresholds between the probabilistic categories. If NULL, #' the whole period is used. The default value is NULL. #'@param Fair A logical indicating whether to compute the FairRPSS (the @@ -34,51 +33,93 @@ #' #'@return #'A numerical array of RPS with the same dimensions as "exp" except the -#'"time_dim" and "memb_dim" dimensions. +#''time_dim' and 'memb_dim' dimensions. #' #'@references #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 #' #'@examples -#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#' res <- RPS(exp = exp, obs = obs, memb_dim = 'member') +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) #' #'@import multiApply -#'@importFrom ClimProjDiags Subset #'@importFrom easyVerification convert2prob #'@export -RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) - - if (!is.array(exp)){stop('"exp" must be an array.')} - if (!is.array(obs)){stop('"obs" must be an array.')} - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){ - stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.') + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") } - if (is.null(indices_for_clim)){ - indices_for_clim <- 1:as.numeric(dim(obs)[time_dim]) - } else if (!is.numeric(indices_for_clim)){ - stop('"indices_for_clim" must be NULL or a numeric vector.') + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } - if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} - if (!is.character(time_dim)){stop('"time_dim" must be a string.')} - if (!is.character(memb_dim)){stop('"memb_dim" must be a string.')} - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") } - - ## RPS + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS rps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim), obs = time_dim), + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim), fun = .RPS, - prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, ncores = ncores)$output1 return(rps) @@ -95,59 +136,47 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) # obs_probs: [bin, sdate] - - ## RPS for each time step - rps <- Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin', - fun = .RPS_from_probs, Fair = Fair, - n_members = dim(exp)[2], ncores = NULL)$output1 + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps <- apply((probs_exp_cumsum - probs_obs_cumsum)^2, 2, sum) # rps: [sdate] + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + R_new <- Inf + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- apply(adjustment, 2, sum) + rps <- rps + adjustment + } + # Return only the mean RPS rps <- mean(rps, na.rm = FALSE) return(rps) } - .get_probs <- function(data, indices_for_quantiles, prob_thresholds) { # if exp: [sdate, memb] # if obs: [sdate] + + # Add dim [memb = 1] to obs if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) # Absolute thresholds - quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE) + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) # Probabilities probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] for (i_time in 1:dim(data)[1]) { if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) } else { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) } } return(probs) } - -.RPS_from_probs <- function(probs_exp, probs_obs, Fair, n_members) { - # probs_exp: [bin] - # probs_obs: [bin] - - rps <- sum((cumsum(probs_exp) - cumsum(probs_obs))^2) - - if (Fair) { # FairRPS - ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] - R <- n_members - R_new <- Inf - probs_cum <- cumsum(probs_exp) - adjustment <- -1 / (R - 1) * probs_cum * (1 - probs_cum) - adjustment <- sum(adjustment) - rps <- rps + adjustment - } - - return(rps) -} - - diff --git a/man/RPS.Rd b/man/RPS.Rd new file mode 100644 index 0000000..d1acc02 --- /dev/null +++ b/man/RPS.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RPS.R +\name{RPS} +\alias{RPS} +\title{Compute the Ranked Probability Score} +\usage{ +RPS( + exp, + obs, + time_dim = "sdate", + memb_dim = "member", + prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, + Fair = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast. The default value is 'member'.} + +\item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to +1) between the categories. The default value is c(1/3, 2/3), which +corresponds to tercile equiprobable categories.} + +\item{indices_for_clim}{A vector of the indices to be taken along 'time_dim' +for computing the thresholds between the probabilistic categories. If NULL, +the whole period is used. The default value is NULL.} + +\item{Fair}{A logical indicating whether to compute the FairRPSS (the +potential RPSS that the forecast would have with an infinite ensemble size). +The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numerical array of RPS with the same dimensions as "exp" except the +'time_dim' and 'memb_dim' dimensions. +} +\description{ +The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +squared differences between the cumulative forecast probabilities (computed +from the ensemble members) and the observations (defined as 0% if the category +did not happen and 100% if it happened). It can be used to evaluate the skill +of multi-categorical probabilistic forecasts. The RPS ranges between 0 +(perfect forecast) and n-1 (worst possible forecast), where n is the number of +categories. In the case of a forecast divided into two categories (the lowest +number of categories that a probabilistic forecast can have), the RPS +corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 +and 1. +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +res <- RPS(exp = exp, obs = obs) + +} +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +} diff --git a/man/RPSS.Rd b/man/RPSS.Rd new file mode 100644 index 0000000..47136f7 --- /dev/null +++ b/man/RPSS.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RPSS.R +\name{RPSS} +\alias{RPSS} +\title{Compute the Ranked Probability Skill Score} +\usage{ +RPSS( + exp, + obs, + ref = NULL, + prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, + Fair = FALSE, + time_dim = "year", + member_dim = "member", + ncores = NULL +) +} +\arguments{ +\item{exp}{A numerical array with the forecast with, at least, time dimension.} + +\item{obs}{A numerical array with the observations with, at least, time dimension. +The length of the time dimension must be the same as for "exp".} + +\item{ref}{NULL or a numerical array with the reference forecast with, at least, time dimension. +The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is +used as reference forecast. The default value is NULL.} + +\item{prob_thresholds}{A numeric vector with the relative thresholds (between 0 and 1) between the categories. +The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.} + +\item{indices_for_clim}{A vector with the indices to be taken along "time_dim" dimension for computing the thresholds +between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.} + +\item{Fair}{A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with +an infinite ensemble size). The default value is FALSE.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'year'.} + +\item{member_dim}{A character string indicating the name of the member dimension to compute the +ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should +be provided directly to the function. The default value is 'member'.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +\item{$rpss}{ + A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" + and "member_dim" dimensions. +} +\item{$sign}{ + A logical array with the statistical significance of the RPSS with the same dimensions + as "exp" except the "time_dim" and "member_dim" dimensions. +} +} +\description{ +The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) +is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether +a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between +minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the +reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts +are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version, +and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based +on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1) +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +RPSS(exp = exp, obs = obs) ## climatology as reference forecast +RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast + +} diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd index 1110648..bd1460a 100644 --- a/man/RandomWalkTest.Rd +++ b/man/RandomWalkTest.Rd @@ -37,8 +37,8 @@ A list of 2: } \description{ Forecast comparison of the skill obtained with 2 forecasts (with respect to a -common reference) based on Random Walks, with significance estimate at the 5% -confidence level, as in DelSole and Tippett (2015). +common reference) based on Random Walks, with significance estimate at the 95% +confidence level, as in DelSole and Tippett (2016). } \examples{ fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R new file mode 100644 index 0000000..7b1f77e --- /dev/null +++ b/tests/testthat/test-RPS.R @@ -0,0 +1,129 @@ +context("s2dv::RPS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + RPS(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + RPS(exp1, c()), + 'Parameter "obs" must be a numeric array.' + ) + + # time_dim + expect_error( + RPS(exp1, obs1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + RPS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + RPS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + RPS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + # exp, ref, and obs (2) + expect_error( + RPS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." + ) + # prob_thresholds + expect_error( + RPS(exp1, obs1, prob_thresholds = 1), + "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1." + ) + # indices_for_clim + expect_error( + RPS(exp1, obs1, indices_for_clim = array(1:6, dim = c(2, 3))), + "Parameter 'indices_for_clim' must be NULL or a numeric vector." + ) + expect_error( + RPS(exp1, obs1, indices_for_clim = 3:11), + "Parameter 'indices_for_clim' should be the indices of 'time_dim'." + ) + # Fair + expect_error( + RPS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # ncores + expect_error( + RPS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +dim(RPS(exp1, obs1)), +c(lat = 2) +) +expect_equal( +as.vector(RPS(exp1, obs1)), +c(0.3555556, 0.4444444), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPS(exp1, obs1, Fair = T)), +c(0.2000000, 0.2666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPS(exp1, obs1, indices_for_clim = 3:5)), +c(0.5000000, 0.4666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPS(exp1, obs1, prob_thresholds = seq(0.1, 0.9, 0.1))), +c(1.600000, 1.888889), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +dim(RPS(exp2, obs2)), +NULL +) +expect_equal( +RPS(exp2, obs2), +0.75, +tolerance = 0.0001 +) +expect_equal( +RPS(exp2, obs2, indices_for_clim = 2:5, prob_thresholds = seq(0.1, 0.9, 0.1)), +2.75, +tolerance = 0.0001 +) + +}) -- GitLab From 298bb734d1fce1a171b72b76334c799837df8120 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 12 May 2022 15:24:06 +0200 Subject: [PATCH 10/18] Change the order of 'obs' and 'ref' --- R/ResidualCorr.R | 61 ++++++++++++++++++++++----------------------- man/ResidualCorr.Rd | 12 ++++----- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index dae0cb8..541cfe8 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -17,11 +17,11 @@ #' #'@param exp A named numerical array of the forecast with at least time #' dimension. +#'@param obs A named numerical array of the observations with at least time +#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time dimension. The dimensions must be the same as "exp" except #' 'memb_dim'. -#'@param obs A named numerical array of the observations with at least time -#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. #'@param N.eff Effective sample size to be used in the statistical significance #' test. It can be NA (and it will be computed with the s2dv:::.Eno), a #' numeric (which is used for all cases), or an array with the same dimensions @@ -66,13 +66,13 @@ #' #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#' res <- ResidualCorr(exp, ref, obs, memb_dim = 'member') +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) +#' res <- ResidualCorr(exp = exp, obs = obs, ref = ref, memb_dim = 'member') #' #'@import multiApply #'@export -ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', +ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', memb_dim = NULL, method = 'pearson', alpha = NULL, handle.na = 'return.na', ncores = NULL) { @@ -80,10 +80,10 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', ## exp, ref, and obs (1) if (!is.array(exp) | !is.numeric(exp)) stop('Parameter "exp" must be a numeric array.') - if (!is.array(ref) | !is.numeric(ref)) - stop('Parameter "ref" must be a numeric array.') if (!is.array(obs) | !is.numeric(obs)) stop('Parameter "obs" must be a numeric array.') + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') ## N.eff if (is.array(N.eff)) { @@ -101,9 +101,9 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(ref)) | - !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs)) | + !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension.") } ## memb_dim if (!is.null(memb_dim)) { @@ -116,15 +116,15 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', } ## exp, ref, and obs (2) name_exp <- sort(names(dim(exp))) - name_ref <- sort(names(dim(ref))) name_obs <- sort(names(dim(obs))) + name_ref <- sort(names(dim(ref))) if (!is.null(memb_dim)) { name_exp <- name_exp[-which(name_exp == memb_dim)] name_ref <- name_ref[-which(name_ref == memb_dim)] } - if (!identical(length(name_exp), length(name_ref), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ", + if (!identical(length(name_exp), length(name_obs), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", "all dimensions expect 'memb_dim'.")) } ## method @@ -153,13 +153,13 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', ############################### # NA check: na.fail - if (handle.na == "na.fail" & (anyNA(exp) | anyNA(ref) | anyNA(obs))) + if (handle.na == "na.fail" & (anyNA(exp) | anyNA(obs) | anyNA(ref))) stop('The data contain NAs.') # Calculate ensemble mean dim_exp <- dim(exp) - dim_ref <- dim(ref) dim_obs <- dim(obs) + dim_ref <- dim(ref) if (!is.null(memb_dim)) { exp_memb_dim_ind <- which(names(dim_exp) == memb_dim) @@ -180,18 +180,17 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', # Residual correlation if (is.array(N.eff)) { - output <- Apply(data = list(exp = exp, ref = ref, - obs = obs, N.eff = N.eff), - target_dims = list(exp = time_dim, ref = time_dim, - obs = time_dim, N.eff = NULL), + output <- Apply(data = list(exp = exp, obs = obs, ref = ref, + N.eff = N.eff), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim, N.eff = NULL), output_dims = output_dims, fun = .ResidualCorr, method = method, alpha = alpha, handle.na = handle.na, ncores = ncores) } else { - output <- Apply(data = list(exp = exp, ref = ref, - obs = obs), - target_dims = list(exp = time_dim, ref = time_dim, - obs = time_dim), + output <- Apply(data = list(exp = exp, obs = obs, ref = ref), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim), output_dims = output_dims, N.eff = N.eff, fun = .ResidualCorr, method = method, alpha = alpha, handle.na = handle.na, ncores = ncores) @@ -200,9 +199,9 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', return(output) } -.ResidualCorr <- function(exp, ref, obs, N.eff, method, alpha, handle.na) { +.ResidualCorr <- function(exp, obs, ref, N.eff, method, alpha, handle.na) { # exp and ref and obs: [time] - .residual.corr <- function(exp, ref, obs, method, N.eff, alpha) { + .residual.corr <- function(exp, obs, ref, method, N.eff, alpha) { # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs') exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals @@ -235,16 +234,16 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', #================================================== - if (anyNA(exp) | anyNA(ref) | anyNA(obs)) { ## There are NAs + if (anyNA(exp) | anyNA(obs) | anyNA(ref)) { ## There are NAs if (handle.na == 'only.complete.triplets') { - nna <- is.na(exp) | is.na(ref) | is.na(obs) # A vector of T/F + nna <- is.na(exp) | is.na(obs) | is.na(ref) # A vector of T/F if (all(nna)) stop("There is no complete set of forecasts and observations.") # Remove the incomplete set exp <- exp[!nna] - ref <- ref[!nna] obs <- obs[!nna] + ref <- ref[!nna] - output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, + output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, N.eff = N.eff, alpha = alpha) } else if (handle.na == 'return.na') { @@ -257,7 +256,7 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', } } else { ## There is no NA - output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, + output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, N.eff = N.eff, alpha = alpha) } diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd index e98ea7a..fe7dd10 100644 --- a/man/ResidualCorr.Rd +++ b/man/ResidualCorr.Rd @@ -6,8 +6,8 @@ \usage{ ResidualCorr( exp, - ref, obs, + ref, N.eff = NA, time_dim = "sdate", memb_dim = NULL, @@ -21,13 +21,13 @@ ResidualCorr( \item{exp}{A named numerical array of the forecast with at least time dimension.} +\item{obs}{A named numerical array of the observations with at least time +dimension. The dimensions must be the same as "exp" except 'memb_dim'.} + \item{ref}{A named numerical array of the reference forecast data with at least time dimension. The dimensions must be the same as "exp" except 'memb_dim'.} -\item{obs}{A named numerical array of the observations with at least time -dimension. The dimensions must be the same as "exp" except 'memb_dim'.} - \item{N.eff}{Effective sample size to be used in the statistical significance test. It can be NA (and it will be computed with the s2dv:::.Eno), a numeric (which is used for all cases), or an array with the same dimensions @@ -95,8 +95,8 @@ effective degrees of freedom to account for the time series' autocorrelation } \examples{ exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -res <- ResidualCorr(exp, ref, obs, memb_dim = 'member') +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) +res <- ResidualCorr(exp = exp, obs = obs, ref = ref, memb_dim = 'member') } -- GitLab From 026cb55714c2aca94a909ef813bbb000557d4520 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 12 May 2022 17:41:11 +0200 Subject: [PATCH 11/18] Correct conditional statement and unit test --- R/ResidualCorr.R | 4 +- tests/testthat/test-ResidualCorr.R | 76 +++++++++++++++--------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index 541cfe8..7428d1b 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -122,8 +122,8 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', name_exp <- name_exp[-which(name_exp == memb_dim)] name_ref <- name_ref[-which(name_ref == memb_dim)] } - if (!identical(length(name_exp), length(name_obs), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs], dim(ref)[name_ref])) { + if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) | + any(dim(exp)[name_exp] != dim(obs)[name_obs]) | any(dim(exp)[name_exp] != dim(ref)[name_ref])) { stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", "all dimensions expect 'memb_dim'.")) } diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R index c9ac4b9..c15276c 100644 --- a/tests/testthat/test-ResidualCorr.R +++ b/tests/testthat/test-ResidualCorr.R @@ -29,63 +29,63 @@ test_that("1. Input checks", { 'Parameter "exp" must be a numeric array.' ) expect_error( - ResidualCorr(exp1, c('a')), + ResidualCorr(exp1, obs1, c('a')), 'Parameter "ref" must be a numeric array.' ) expect_error( - ResidualCorr(exp1, ref1, c()), + ResidualCorr(exp1, c()), 'Parameter "obs" must be a numeric array.' ) # N.eff expect_error( - ResidualCorr(exp1, ref1, obs1, N.eff = array(1, dim = c(lat = 2, lon = 2))), + ResidualCorr(exp1, obs1, ref1, N.eff = array(1, dim = c(lat = 2, lon = 2))), 'If parameter "N.eff" is provided with an array, it must have the same dimensions as "obs" except "time_dim".' ) expect_error( - ResidualCorr(exp1, ref1, obs1, N.eff = 1:3), + ResidualCorr(exp1, obs1, ref1, N.eff = 1:3), 'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".' ) # time_dim expect_error( - ResidualCorr(exp1, ref1, obs1, time_dim = 1), + ResidualCorr(exp1, obs1, ref1, time_dim = 1), 'Parameter "time_dim" must be a character string.' ) expect_error( - ResidualCorr(exp1, ref1, obs1, time_dim = 'time'), - "Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension." + ResidualCorr(exp1, obs1, ref1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension." ) # memb_dim expect_error( - ResidualCorr(exp1, ref1, obs1, memb_dim = TRUE), + ResidualCorr(exp1, obs1, ref1, memb_dim = TRUE), "Parameter 'memb_dim' must be a character string." ) expect_error( - ResidualCorr(exp1, ref1, obs1, memb_dim = 'member'), + ResidualCorr(exp1, obs1, ref1, memb_dim = 'member'), "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension." ) # exp, ref, and obs (2) expect_error( - ResidualCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), obs1, memb_dim = 'memb'), - "Parameter 'exp', 'ref', and 'obs' must have same length of all dimensions expect 'memb_dim'." + ResidualCorr(exp1, obs1, array(1:10, dim = c(sdate = 10, memb = 1)), memb_dim = 'memb'), + "Parameter 'exp', 'obs', and 'ref' must have same length of all dimensions expect 'memb_dim'." ) # method expect_error( - ResidualCorr(exp1, ref1, obs1, method = 'asd', memb_dim = 'memb'), + ResidualCorr(exp1, obs1, ref1, method = 'asd', memb_dim = 'memb'), 'Parameter "method" must be "pearson", "kendall", or "spearman".' ) # alpha expect_error( - ResidualCorr(exp2, ref2, obs2, alpha = 1), + ResidualCorr(exp2, obs2, ref2, alpha = 1), 'Parameter "alpha" must be NULL or a number between 0 and 1.' ) # handle.na expect_error( - ResidualCorr(exp2, ref2, obs2, handle.na = TRUE), + ResidualCorr(exp2, obs2, ref2, handle.na = TRUE), 'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".' ) # ncores expect_error( - ResidualCorr(exp2, ref2, obs2, ncores = 1.5), + ResidualCorr(exp2, obs2, ref2, ncores = 1.5), 'Parameter "ncores" must be either NULL or a positive integer.' ) @@ -94,69 +94,69 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { expect_equal( -names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')), +names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')), c("res.corr", "p.val") ) expect_equal( -dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr), +dim(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr), c(lat = 3, lon = 2) ) expect_equal( -dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p.val), +dim(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$p.val), c(lat = 3, lon = 2) ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr), c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$p), c(0.49695468, 0.05446055, 0.25203961, 0.23522967, 0.16960864, 0.10618145), tolerance = 0.0001 ) expect_equal( -names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)), +names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)), c("res.corr", "sign") ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$res.corr), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$res.corr), c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign), rep(FALSE, 6) ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$res), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$res), c(0.11111111, 0.42222222, -0.20000000, 0.02222222, 0.20000000, 0.42222222), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$p), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$p), c(0.3799615, 0.1120891, 0.2897920, 0.4757064, 0.2897920, 0.1120891), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$res), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$res), c(0.13939394, 0.61212121, -0.27272727, 0.07878788, 0.28484848, 0.47878788), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$p), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$p), c(0.35046594, 0.02998607, 0.22291917, 0.41435870, 0.21251908, 0.08076146), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$res), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$res), c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$p), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$p), c(0.49716394, 0.05446055, 0.26690777, 0.23522967, 0.18671025, 0.10618145), tolerance = 0.0001 ) @@ -165,17 +165,17 @@ tolerance = 0.0001 #--------------------------- exp1[1] <- NA expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr), c(NA, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$res.corr), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$res.corr), c(-0.1082588, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), tolerance = 0.0001 ) expect_error( -ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'na.fail'), +ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'na.fail'), "The data contain NAs." ) @@ -183,7 +183,7 @@ exp1[1,1,1,1:3] <- NA ref1[1,1,1,4:8] <- NA obs1[1,1,9:10] <- NA expect_error( -ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), +ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), "There is no complete set of forecasts and observations." ) @@ -194,24 +194,24 @@ ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.tri test_that("3. Output checks: dat2", { expect_equal( -names(ResidualCorr(exp2, ref2, obs2)), +names(ResidualCorr(exp2, obs2, ref2)), c("res.corr", "p.val") ) expect_equal( -dim(ResidualCorr(exp2, ref2, obs2)$res.corr), +dim(ResidualCorr(exp2, obs2, ref2)$res.corr), NULL ) expect_equal( -dim(ResidualCorr(exp2, ref2, obs2)$p), +dim(ResidualCorr(exp2, obs2, ref2)$p), NULL ) expect_equal( -ResidualCorr(exp2, ref2, obs2)$p, +ResidualCorr(exp2, obs2, ref2)$p, 0.2209847, tolerance = 0.0001 ) expect_equal( -ResidualCorr(exp2, ref2, obs2)$res, +ResidualCorr(exp2, obs2, ref2)$res, -0.274962, tolerance = 0.0001 ) -- GitLab From cb78b52add85f65281c67cbb23a0826402ff3699 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 12 May 2022 18:06:35 +0200 Subject: [PATCH 12/18] Improve RPSS() formatting and efficiency --- R/RPS.R | 17 ++- R/RPSS.R | 280 ++++++++++++++++++++++++------------- man/RPS.Rd | 3 +- man/RPSS.Rd | 85 ++++++----- tests/testthat/test-RPS.R | 2 +- tests/testthat/test-RPSS.R | 272 +++++++++++++++++++++++++++++++++++ 6 files changed, 516 insertions(+), 143 deletions(-) create mode 100644 tests/testthat/test-RPSS.R diff --git a/R/RPS.R b/R/RPS.R index 744b9c7..7b4af7c 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -14,7 +14,8 @@ #'@param exp A named numerical array of the forecast with at least time #' dimension. #'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#' dimension. The dimensions must be the same as 'exp' except 'obs' doesn't +#' have 'memb_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension @@ -80,7 +81,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) + "all dimensions expect 'obs' doesn't have 'memb_dim'.")) } ## prob_thresholds if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | @@ -117,16 +118,21 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', rps <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c(time_dim, memb_dim), obs = time_dim), + output_dims = time_dim, fun = .RPS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, ncores = ncores)$output1 - + + # Return only the mean RPS + rps <- MeanDims(rps, time_dim, na.rm = FALSE) + return(rps) } -.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair) { +.RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE) { # exp: [sdate, memb] # obs: [sdate] @@ -151,9 +157,6 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', rps <- rps + adjustment } - # Return only the mean RPS - rps <- mean(rps, na.rm = FALSE) - return(rps) } diff --git a/R/RPSS.R b/R/RPSS.R index f7dc80d..1498456 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -1,131 +1,215 @@ #'Compute the Ranked Probability Skill Score #' -#'The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) -#'is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether -#'a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between -#'minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the -#'reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts -#'are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version, -#'and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based -#'on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1) +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill. +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. +#'The statistical significance is obtained based on a Random Walk test at the +#'95% confidence level (DelSole and Tippett, 2016). #' -#'@param exp A numerical array with the forecast with, at least, time dimension. -#'@param obs A numerical array with the observations with, at least, time dimension. -#' The length of the time dimension must be the same as for "exp". -#'@param ref NULL or a numerical array with the reference forecast with, at least, time dimension. -#' The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is -#' used as reference forecast. The default value is NULL. -#'@param prob_thresholds A numeric vector with the relative thresholds (between 0 and 1) between the categories. -#' The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories. -#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds -#' between the probabilistic categories. If NULL, the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with -#'an infinite ensemble size). The default value is FALSE. +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'obs' doesn't +#' have 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'year'. -#'@param member_dim A character string indicating the name of the member dimension to compute the -#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should -#' be provided directly to the function. The default value is 'member'. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return #'\item{$rpss}{ -#' A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" -#' and "member_dim" dimensions. +#' A numerical array of the RPSS with the same dimensions as "exp" except the +#' 'time_dim' and 'memb_dim' dimensions. #'} #'\item{$sign}{ -#' A logical array with the statistical significance of the RPSS with the same dimensions -#' as "exp" except the "time_dim" and "member_dim" dimensions. +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. #'} - +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' #'@examples -#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) -#' RPSS(exp = exp, obs = obs) ## climatology as reference forecast -#' RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #' #'@import multiApply #'@export -RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){ - - ## Checkings - if (!is.array(exp)){stop('"exp" must be an array.')} - if (!is.array(obs)){stop('"obs" must be an array.')} - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} - if (!is.null(ref)){ - if (!is.array(ref)){stop('"ref" must be an array.')} - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + ncores = NULL){ + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') } - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){ - stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.') + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } - if (is.null(indices_for_clim)){ - indices_for_clim <- 1:as.numeric(dim(obs)[time_dim]) - } else if (!is.numeric(indices_for_clim)){ - stop('"indices_for_clim" must be NULL or a numeric vector.') + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") } - if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')} - if (!is.character(time_dim)){stop('"time_dim" must be a string.')} - if (!is.character(member_dim)){stop('"member_dim" must be a string.')} - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} - } - - if (!is.null(ref)){ ## using "ref" as reference forecast - output <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), - target_dims = list(exp = c(time_dim, member_dim), obs = time_dim, ref = c(time_dim, member_dim)), - prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, time_dim = time_dim, member_dim = member_dim, - fun = .RPSS, ncores = ncores) - } else { ## using climatology as reference forecast - output <- multiApply::Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, member_dim), obs = time_dim), - ref = ref, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, time_dim = time_dim, member_dim = member_dim, - fun = .RPSS, ncores = ncores) + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'obs' doesn't have 'memb_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } } + + ############################### + + # Compute RPSS + if (!is.null(ref)) { # use "ref" as reference forecast + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim, + ref = c(time_dim, memb_dim)) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim) + } + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + ncores = ncores) return(output) } -.RPSS <- function(exp, obs, ref, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){ - - ## RPS of the forecast - rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) +.RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate] + # ref: [sdate, memb] or NULL + + # RPS of the forecast + rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair) - ## RPS of the reference forecast - if (is.null(ref)){ ## using climatology as reference forecast - obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds) - clim_probs <- .get_probs_clim(prob_thresholds = prob_thresholds, dims = dim(obs_probs), time_dim = time_dim) - rps_ref <- multiApply::Apply(data = list(probs_exp = clim_probs, probs_obs = obs_probs), target_dims = 'bin', - fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = FALSE, ncores = NULL)$output1 - } else { ## using "ref" as reference forecast - rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim) + # RPS of the reference forecast + if (is.null(ref)) { ## using climatology as reference forecast + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds) + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + # rps_ref: [sdate] + +# if (Fair) { # FairRPS +# ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] +# R <- dim(exp)[2] #memb +# R_new <- Inf +# adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) +# adjustment <- apply(adjustment, 2, sum) +# rps_ref <- rps_ref + adjustment +# } + + } else { # use "ref" as reference forecast + rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair) } - ## RPSS + # RPSS rpss <- 1 - mean(rps_exp) / mean(rps_ref) - ## Significance - rps_exp <- array(data = rps_exp, dim = c(time = length(rps_exp))) - rps_ref <- array(data = rps_ref, dim = c(time = length(rps_ref))) - sign <- s2dv::RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, time_dim = 'time', ncores = NULL)$signif + # Significance + sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref)$signif return(list(rpss = rpss, sign = sign)) } -.get_probs_clim <- function(prob_thresholds, dims, time_dim){ - - probs <- array(data = NA, dim = c(bin = length(prob_thresholds)+1)) - probs[1] <- prob_thresholds[1] - if (length(prob_thresholds) > 1){ - probs[2:(length(probs)-1)] <- diff(prob_thresholds) - } - probs[length(probs)] <- 1 - prob_thresholds[length(prob_thresholds)] - stopifnot(identical(sum(probs),1)) - probs <- s2dv::InsertDim(data = probs, posdim = 2, lendim = dims[[time_dim]], name = time_dim) - - return(probs) -} diff --git a/man/RPS.Rd b/man/RPS.Rd index d1acc02..cbb0e7a 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -20,7 +20,8 @@ RPS( dimension.} \item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} +dimension. The dimensions must be the same as 'exp' except 'obs' doesn't +have 'memb_dim'.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} diff --git a/man/RPSS.Rd b/man/RPSS.Rd index 47136f7..6871d80 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -8,68 +8,81 @@ RPSS( exp, obs, ref = NULL, + time_dim = "sdate", + memb_dim = "member", prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - time_dim = "year", - member_dim = "member", ncores = NULL ) } \arguments{ -\item{exp}{A numerical array with the forecast with, at least, time dimension.} +\item{exp}{A named numerical array of the forecast with at least time +dimension.} -\item{obs}{A numerical array with the observations with, at least, time dimension. -The length of the time dimension must be the same as for "exp".} +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'obs' doesn't +have 'memb_dim'.} -\item{ref}{NULL or a numerical array with the reference forecast with, at least, time dimension. -The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is -used as reference forecast. The default value is NULL.} +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as 'exp' except +'memb_dim'. If it is NULL, the climatological forecast is used as reference +forecast. The default value is NULL.} -\item{prob_thresholds}{A numeric vector with the relative thresholds (between 0 and 1) between the categories. -The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.} +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} -\item{indices_for_clim}{A vector with the indices to be taken along "time_dim" dimension for computing the thresholds -between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.} +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast and the reference forecast. The +default value is 'member'.} -\item{Fair}{A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with -an infinite ensemble size). The default value is FALSE.} +\item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to +1) between the categories. The default value is c(1/3, 2/3), which +corresponds to tercile equiprobable categories.} -\item{time_dim}{A character string indicating the name of the time dimension. -The default value is 'year'.} +\item{indices_for_clim}{A vector of the indices to be taken along 'time_dim' +for computing the thresholds between the probabilistic categories. If NULL, +the whole period is used. The default value is NULL.} -\item{member_dim}{A character string indicating the name of the member dimension to compute the -ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should -be provided directly to the function. The default value is 'member'.} +\item{Fair}{A logical indicating whether to compute the FairRPSS (the +potential RPSS that the forecast would have with an infinite ensemble size). +The default value is FALSE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ \item{$rpss}{ - A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" - and "member_dim" dimensions. + A numerical array of the RPSS with the same dimensions as "exp" except the + 'time_dim' and 'memb_dim' dimensions. } \item{$sign}{ - A logical array with the statistical significance of the RPSS with the same dimensions - as "exp" except the "time_dim" and "member_dim" dimensions. + A logical array of the statistical significance of the RPSS with the same + dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. } } \description{ -The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) -is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether -a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between -minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the -reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts -are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version, -and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based -on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1) +The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +assess whether a forecast presents an improvement or worsening with respect to +a reference forecast. The RPSS ranges between minus infinite and 1. If the +RPSS is positive, it indicates that the forecast has higher skill than the +reference forecast, while a negative value means that it has a lower skill. +Examples of reference forecasts are the climatological forecast (same +probabilities for all categories for all time steps), persistence, a previous +model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. +The statistical significance is obtained based on a Random Walk test at the +95% confidence level (DelSole and Tippett, 2016). } \examples{ -exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) -RPSS(exp = exp, obs = obs) ## climatology as reference forecast -RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast } +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +} diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index 7b1f77e..00f89dc 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -49,7 +49,7 @@ test_that("1. Input checks", { # exp, ref, and obs (2) expect_error( RPS(exp1, array(1:9, dim = c(sdate = 9))), - "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'." ) # prob_thresholds expect_error( diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R new file mode 100644 index 0000000..88fa18e --- /dev/null +++ b/tests/testthat/test-RPSS.R @@ -0,0 +1,272 @@ +context("s2dv::RPSS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + RPSS(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + RPSS(exp1, c()), + 'Parameter "obs" must be a numeric array.' + ) + + # time_dim + expect_error( + RPSS(exp1, obs1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + RPSS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RPSS(exp2, obs2, array(rnorm(20), dim = c(member = 2, time = 10))), + "Parameter 'time_dim' is not found in 'ref' dimension." + ) + # memb_dim + expect_error( + RPSS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + RPSS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + RPSS(exp2, obs2, array(rnorm(20), dim = c(memb = 2, sdate = 10))), + "Parameter 'memb_dim' is not found in 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + RPSS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'." + ) + expect_error( + RPSS(exp1, obs1, ref2), + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." + ) + # prob_thresholds + expect_error( + RPSS(exp1, obs1, ref1, prob_thresholds = 1), + "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1." + ) + # indices_for_clim + expect_error( + RPSS(exp1, obs1, ref1, indices_for_clim = array(1:6, dim = c(2, 3))), + "Parameter 'indices_for_clim' must be NULL or a numeric vector." + ) + expect_error( + RPSS(exp1, obs1, indices_for_clim = 3:11), + "Parameter 'indices_for_clim' should be the indices of 'time_dim'." + ) + # Fair + expect_error( + RPSS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # ncores + expect_error( + RPSS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +names(RPSS(exp1, obs1)), +c("rpss", "sign") +) +expect_equal( +names(RPSS(exp1, obs1, ref1)), +c("rpss", "sign") +) +expect_equal( +dim(RPSS(exp1, obs1)$rpss), +c(lat = 2) +) +expect_equal( +dim(RPSS(exp1, obs1)$sign), +c(lat = 2) +) +expect_equal( +dim(RPSS(exp1, obs1, ref1)$rpss), +c(lat = 2) +) +expect_equal( +dim(RPSS(exp1, obs1, ref1)$sign), +c(lat = 2) +) +# ref = NULL +expect_equal( +as.vector(RPSS(exp1, obs1)$rpss), +c(0.15789474, -0.05263158), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1)$sign), +c(FALSE, FALSE), +) +expect_equal( +as.vector(RPSS(exp1, obs1, Fair = T)$rpss), +c(0.5263158, 0.3684211), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, indices_for_clim = 3:5)$rpss), +c(-0.4062500, -0.1052632), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +c(0.03030303, -0.14478114), +tolerance = 0.0001 +) +# ref = ref +expect_equal( +as.vector(RPSS(exp1, obs1, ref1)$rpss), +c(0.5259259, 0.4771242), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1)$sign), +c(FALSE, FALSE) +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, Fair = T)$rpss), +c(0.6000000, 0.6190476), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, indices_for_clim = 3:5)$rpss), +c(0.2857143, 0.4166667), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, indices_for_clim = 3:5)$sign), +c(FALSE, FALSE) +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +c(0.4754098, 0.3703704), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, prob_thresholds = seq(0.1, 0.9, 0.1))$sign), +c(T, F) +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +names(RPSS(exp2, obs2)), +c("rpss", "sign") +) +expect_equal( +names(RPSS(exp2, obs2, ref2)), +c("rpss", "sign") +) +expect_equal( +dim(RPSS(exp2, obs2)$rpss), +NULL +) +expect_equal( +dim(RPSS(exp2, obs2)$sign), +NULL +) +expect_equal( +dim(RPSS(exp2, obs2, ref2)$rpss), +NULL +) +expect_equal( +dim(RPSS(exp2, obs2, ref2)$sign), +NULL +) +# ref = NULL +expect_equal( +as.vector(RPSS(exp2, obs2)$rpss), +c(-0.7763158), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2)$sign), +FALSE, +) +expect_equal( +as.vector(RPSS(exp2, obs2, Fair = T)$rpss), +c(-0.1842105), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, indices_for_clim = 3:5)$rpss), +-0.8984375, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +c(-0.7272727), +tolerance = 0.0001 +) +# ref = ref +expect_equal( +as.vector(RPSS(exp2, obs2, ref2)$rpss), +0, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2)$sign), +FALSE +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, Fair = T)$rpss), +0, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, indices_for_clim = 3:5)$rpss), +0.03571429, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, indices_for_clim = 3:5)$sign), +FALSE +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +0.06557377, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$sign), +FALSE +) + + + +}) -- GitLab From 8c50012f7c8b1f9d33370cd4a67d509a5168ed43 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 12 May 2022 18:22:36 +0200 Subject: [PATCH 13/18] Move easyVerification to Import --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f2577bf..9690f44 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,9 +37,9 @@ Imports: NbClust, multiApply (>= 2.1.1), SpecsVerification (>= 0.5.0), - easyNCDF + easyNCDF, + easyVerification Suggests: - easyVerification, testthat License: Apache License 2.0 URL: https://earth.bsc.es/gitlab/es/s2dv/ -- GitLab From ed5a8f3ef529fda86c4baa856afd6cec456ddeb9 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 16 May 2022 15:35:33 +0200 Subject: [PATCH 14/18] Allow obs to have memb_dim --- R/RPS.R | 22 +++++++++++++++------- R/RPSS.R | 22 +++++++++++++++------- man/RPS.Rd | 3 +-- man/RPSS.Rd | 3 +-- tests/testthat/test-RPS.R | 13 +++++++++++-- tests/testthat/test-RPSS.R | 10 +++++++--- 6 files changed, 50 insertions(+), 23 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 7b4af7c..84956ac 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -14,8 +14,7 @@ #'@param exp A named numerical array of the forecast with at least time #' dimension. #'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'obs' doesn't -#' have 'memb_dim'. +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension @@ -78,10 +77,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'obs' doesn't have 'memb_dim'.")) + "all dimensions expect 'memb_dim'.")) } ## prob_thresholds if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | @@ -115,9 +117,15 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', ############################### # Compute RPS + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- time_dim + } else { + target_dims_obs <- c(time_dim, memb_dim) + } + rps <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim), + obs = target_dims_obs), output_dims = time_dim, fun = .RPS, prob_thresholds = prob_thresholds, @@ -134,7 +142,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE) { # exp: [sdate, memb] - # obs: [sdate] + # obs: [sdate, (memb)] exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) @@ -162,9 +170,9 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', .get_probs <- function(data, indices_for_quantiles, prob_thresholds) { # if exp: [sdate, memb] - # if obs: [sdate] + # if obs: [sdate, (memb)] - # Add dim [memb = 1] to obs + # Add dim [memb = 1] to obs if it doesn't have memb_dim if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) # Absolute thresholds diff --git a/R/RPSS.R b/R/RPSS.R index 1498456..04c3137 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -15,8 +15,7 @@ #'@param exp A named numerical array of the forecast with at least time #' dimension. #'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'obs' doesn't -#' have 'memb_dim'. +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time dimension. The dimensions must be the same as 'exp' except #' 'memb_dim'. If it is NULL, the climatological forecast is used as reference @@ -63,7 +62,7 @@ #'@export RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL){ + ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -98,10 +97,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'obs' doesn't have 'memb_dim'.")) + "all dimensions expect 'memb_dim'.")) } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) @@ -144,15 +146,21 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ############################### # Compute RPSS + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- time_dim + } else { + target_dims_obs <- c(time_dim, memb_dim) + } + if (!is.null(ref)) { # use "ref" as reference forecast data <- list(exp = exp, obs = obs, ref = ref) target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim, + obs = target_dims_obs, ref = c(time_dim, memb_dim)) } else { data <- list(exp = exp, obs = obs) target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim) + obs = target_dims_obs) } output <- Apply(data, target_dims = target_dims, @@ -167,7 +175,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE) { # exp: [sdate, memb] - # obs: [sdate] + # obs: [sdate, (memb)] # ref: [sdate, memb] or NULL # RPS of the forecast diff --git a/man/RPS.Rd b/man/RPS.Rd index cbb0e7a..d1acc02 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -20,8 +20,7 @@ RPS( dimension.} \item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'obs' doesn't -have 'memb_dim'.} +dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'sdate'.} diff --git a/man/RPSS.Rd b/man/RPSS.Rd index 6871d80..6324ea2 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -21,8 +21,7 @@ RPSS( dimension.} \item{obs}{A named numerical array of the observation with at least time -dimension. The dimensions must be the same as 'exp' except 'obs' doesn't -have 'memb_dim'.} +dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} \item{ref}{A named numerical array of the reference forecast data with at least time dimension. The dimensions must be the same as 'exp' except diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index 00f89dc..e5a5f0c 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -14,6 +14,12 @@ exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) set.seed(2) obs2 <- array(rnorm(10), dim = c(sdate = 10)) +# dat2_1 +set.seed(1) +exp2_1 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) + ############################################## @@ -49,7 +55,7 @@ test_that("1. Input checks", { # exp, ref, and obs (2) expect_error( RPS(exp1, array(1:9, dim = c(sdate = 9))), - "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'." + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." ) # prob_thresholds expect_error( @@ -125,5 +131,8 @@ RPS(exp2, obs2, indices_for_clim = 2:5, prob_thresholds = seq(0.1, 0.9, 0.1)), 2.75, tolerance = 0.0001 ) - +expect_equal( +RPS(exp2, obs2), +RPS(exp2_1, obs2_1) +) }) diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index 88fa18e..b4c0b7a 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -15,6 +15,8 @@ set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) set.seed(2) obs2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(2) +obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) set.seed(3) ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -60,7 +62,7 @@ test_that("1. Input checks", { # exp, ref, and obs (2) expect_error( RPSS(exp1, array(1:9, dim = c(sdate = 9))), - "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'." + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." ) expect_error( RPSS(exp1, obs1, ref2), @@ -266,7 +268,9 @@ expect_equal( as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$sign), FALSE ) - - +expect_equal( +RPSS(exp2, obs2), +RPSS(exp2, obs2_1) +) }) -- GitLab From 269b8791c70b05b24d310c406c1794b29cdfa95b Mon Sep 17 00:00:00 2001 From: jcos Date: Wed, 18 May 2022 18:04:46 +0200 Subject: [PATCH 15/18] first attempt at weighted quantile and probs --- R/RPS.R | 52 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 84956ac..b2065cd 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -48,7 +48,7 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { + ncores = NULL, weights = NULL) { # Check inputs ## exp and obs (1) @@ -130,7 +130,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', fun = .RPS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores)$output1 + ncores = ncores, weights = weights)$output1 # Return only the mean RPS rps <- MeanDims(rps, time_dim, na.rm = FALSE) @@ -140,12 +140,12 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { + indices_for_clim = NULL, Fair = FALSE, weights = NULL) { # exp: [sdate, memb] # obs: [sdate, (memb)] exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) + prob_thresholds = prob_thresholds, weights = weights) # exp_probs: [bin, sdate] obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) @@ -168,7 +168,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', return(rps) } -.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { +.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weigths = NULL) { # if exp: [sdate, memb] # if obs: [sdate, (memb)] @@ -176,18 +176,50 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) # Absolute thresholds - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) - + if (is.null(weights)){ + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # linear approach to compute weighted distribution thresholds. + sorted_data, sorted_weights <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) + # approach 1 (interpolate from discrete cumulative weights distribution. NOT type 8) + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") + # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution) + fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use + predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) + quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic") + } # Probabilities probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] for (i_time in 1:dim(data)[1]) { - if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + if (is.null(weights)){ + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + } else { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + } } else { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + + fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) + predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) + integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic") + probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2]) } + } return(probs) } +.sorted_distributions <- function(data_vector, weights_vector){ + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_data <- data_vector[sorter] + sorted_weights <- weights_vector[sorter] + return(sorted_data, sorted_weights) +} \ No newline at end of file -- GitLab From 1283a29dad4b39c6eacf315c8755128c991f5b2d Mon Sep 17 00:00:00 2001 From: jcos Date: Thu, 19 May 2022 08:30:49 +0200 Subject: [PATCH 16/18] quantile and prob from poly fit coeff, not interp --- R/RPS.R | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index b2065cd..1afdf6e 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -185,11 +185,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 - quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") + quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") # library(signal) # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution) fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use - predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) - quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic") + a <- fit_model$coefficients + quantiles <- a[1]+a[2]*prob_threshold+a[3]*prob_threshold^2+a[4]*prob_threshold^3 } # Probabilities probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] @@ -202,10 +202,9 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', } } else { sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ]) - - fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) - predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) - integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic") + fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # poly(sorted_data,10,raw=TRUE)? + a <- fit_model$coefficients + integrated_probs <- a[1]+a[2]*quantiles+a[3]*quantiles^2+a[4]*quantiles^3 probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2]) } @@ -215,11 +214,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', } .sorted_distributions <- function(data_vector, weights_vector){ - weights_vector <- as.vector(weights_vector) - data_vector <- as.vector(data_vector) - weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 - sorter <- order(data_vector) - sorted_data <- data_vector[sorter] - sorted_weights <- weights_vector[sorter] - return(sorted_data, sorted_weights) + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_data <- data_vector[sorter] + sorted_weights <- weights_vector[sorter] + return(sorted_data, sorted_weights) } \ No newline at end of file -- GitLab From 762a4f529d4f4e040505a6ed585e4674578f0731 Mon Sep 17 00:00:00 2001 From: Pep Cos Date: Thu, 19 May 2022 10:01:55 +0200 Subject: [PATCH 17/18] Revert "quantile and prob from poly fit coeff, not interp" This reverts commit 1283a29dad4b39c6eacf315c8755128c991f5b2d --- R/RPS.R | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index 1afdf6e..b2065cd 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -185,11 +185,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 - quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") # library(signal) + quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution) fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use - a <- fit_model$coefficients - quantiles <- a[1]+a[2]*prob_threshold+a[3]*prob_threshold^2+a[4]*prob_threshold^3 + predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) + quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic") } # Probabilities probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] @@ -202,9 +202,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', } } else { sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ]) - fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # poly(sorted_data,10,raw=TRUE)? - a <- fit_model$coefficients - integrated_probs <- a[1]+a[2]*quantiles+a[3]*quantiles^2+a[4]*quantiles^3 + + fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) + predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) + integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic") probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2]) } @@ -214,11 +215,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', } .sorted_distributions <- function(data_vector, weights_vector){ - weights_vector <- as.vector(weights_vector) - data_vector <- as.vector(data_vector) - weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 - sorter <- order(data_vector) - sorted_data <- data_vector[sorter] - sorted_weights <- weights_vector[sorter] - return(sorted_data, sorted_weights) + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_data <- data_vector[sorter] + sorted_weights <- weights_vector[sorter] + return(sorted_data, sorted_weights) } \ No newline at end of file -- GitLab From d172207b5941974abf23500db1bb7536d1d22996 Mon Sep 17 00:00:00 2001 From: Pep Cos Date: Thu, 19 May 2022 10:07:20 +0200 Subject: [PATCH 18/18] Revert "first attempt at weighted quantile and probs" This reverts commit 269b8791c70b05b24d310c406c1794b29cdfa95b --- R/RPS.R | 52 ++++++++++------------------------------------------ 1 file changed, 10 insertions(+), 42 deletions(-) diff --git a/R/RPS.R b/R/RPS.R index b2065cd..84956ac 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -48,7 +48,7 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL, weights = NULL) { + ncores = NULL) { # Check inputs ## exp and obs (1) @@ -130,7 +130,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', fun = .RPS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores, weights = weights)$output1 + ncores = ncores)$output1 # Return only the mean RPS rps <- MeanDims(rps, time_dim, na.rm = FALSE) @@ -140,12 +140,12 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE, weights = NULL) { + indices_for_clim = NULL, Fair = FALSE) { # exp: [sdate, memb] # obs: [sdate, (memb)] exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = weights) + prob_thresholds = prob_thresholds) # exp_probs: [bin, sdate] obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) @@ -168,7 +168,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', return(rps) } -.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weigths = NULL) { +.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { # if exp: [sdate, memb] # if obs: [sdate, (memb)] @@ -176,50 +176,18 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) # Absolute thresholds - if (is.null(weights)){ - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # linear approach to compute weighted distribution thresholds. - sorted_data, sorted_weights <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) - # approach 1 (interpolate from discrete cumulative weights distribution. NOT type 8) - cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights - cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 - cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 - quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") - # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution) - fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use - predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) - quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic") - } + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + # Probabilities probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] for (i_time in 1:dim(data)[1]) { - if (is.null(weights)){ - if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) - } else { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) - } + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) } else { - sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ]) - - fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) - predicted.intervals <- predict(fit_model,data.frame(x=sorted_data)) - integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic") - probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2]) + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) } - } return(probs) } -.sorted_distributions <- function(data_vector, weights_vector){ - weights_vector <- as.vector(weights_vector) - data_vector <- as.vector(data_vector) - weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 - sorter <- order(data_vector) - sorted_data <- data_vector[sorter] - sorted_weights <- weights_vector[sorter] - return(sorted_data, sorted_weights) -} \ No newline at end of file -- GitLab