From 809eb885c4dcc5963c8343f87c288ba2bc8e8fcc Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Fri, 4 Mar 2022 16:49:53 +0100
Subject: [PATCH 01/18] first version (without documentation)

---
 R/RPS.R  | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 R/RPSS.R | 64 ++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)
 create mode 100644 R/RPS.R
 create mode 100644 R/RPSS.R

diff --git a/R/RPS.R b/R/RPS.R
new file mode 100644
index 0000000..0081d8e
--- /dev/null
+++ b/R/RPS.R
@@ -0,0 +1,99 @@
+
+library(multiApply)
+library(easyVerification)
+
+RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+  
+  ## Checkings
+  if (!is.array(exp)){stop('"exp" must be an array.')}
+  if (!is.array(obs)){stop('"obs" must be an array.')}
+  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
+  if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')}
+  if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
+  if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
+  if (!is.character(member_dim)){stop('"member_dim" must be a string.')}
+  if (!is.null(ncores)){
+    if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
+  }
+  
+  ## RPS
+  rps <- multiApply::Apply(data = list(exp = exp, obs = obs), 
+                           target_dims = list(exp = c(time_dim, member_dim), obs = time_dim),
+                           n_categories = n_categories, indices_for_clim = indices_for_clim,
+                           Fair = Fair, time_dim = time_dim, member_dim = member_dim,
+                           fun = .RPS, ncores = ncores)$output1
+  return(rps)
+}
+
+.RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member'){
+  
+  exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quartiles = indices_for_clim, 
+                          time_dim = time_dim, member_dim = member_dim, n_categories = n_categories)$probs
+  
+  obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, 
+                          time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs
+  
+  rps <- .RPS_from_probs(probs_exp = exp_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = Fair, n_members = as.numeric(dim(exp)[member_dim]))
+  
+  return(rps)
+}
+
+.get_probs <- function(data, type, indices_for_quartiles, time_dim, member_dim, n_categories){
+  
+  if (type == 'exp'){
+    target_dims_quantiles = c(time_dim, member_dim)
+    target_dims_probs = list(member_dim, 'bin')
+  } else if (type == 'obs'){
+    target_dims_quantiles = time_dim
+    target_dims_probs = list(NULL,'bin')
+  } else {stop("Parameter 'type' must be 'exp' or 'obs'")}
+  
+  quantiles <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = time_dim, indices = indices_for_quartiles), 
+                                 target_dims = target_dims_quantiles, 
+                                 fun = .quantile_aux, n_categories = n_categories, 
+                                 output_dims = 'bin', ncores = NULL)$output1
+  
+  probs <- multiApply::Apply(data = list(data = data, quantiles = quantiles),
+                             target_dims = target_dims_probs,
+                             fun = .c2p, 
+                             output_dims = 'bin', ncores = NULL)$output1
+  
+  return(list('probs' = probs, 'quantiles' = quantiles))
+}
+
+.quantile_aux = function(data, n_categories){
+  data = as.vector(data)
+  q = quantile(x = data, probs = 1:(n_categories - 1) / n_categories, type = 8, na.rm = TRUE)
+  return(q)
+}
+
+.c2p <- function(data, quantiles) {
+  if (anyNA(data)){
+    output <- array(data = NA, dim = length(quantiles) + 1)
+  } else {
+    output <- colMeans(easyVerification::convert2prob(as.vector(data), threshold = as.vector(quantiles)))
+  }
+  return(output)
+}
+
+
+.RPS_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members){
+  
+  rps <- NULL
+  for (i in 1:n_categories){
+    rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 ## (Wilks 2011, pp.348-350)
+  }
+  rps <- sum(rps)
+  
+  if (Fair == TRUE){ ## FairRPS
+    ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+    R <- n_members
+    R_new <- Inf 
+    probs_cum <- cumsum(probs_exp)
+    adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum)
+    adjustment <- sum(adjustment)
+    rps <- rps + adjustment
+  }
+  
+  return(rps)
+}
diff --git a/R/RPSS.R b/R/RPSS.R
new file mode 100644
index 0000000..b9d59d0
--- /dev/null
+++ b/R/RPSS.R
@@ -0,0 +1,64 @@
+
+library(multiApply)
+library(easyVerification)
+library(s2dv)
+
+RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+  
+  ## Checkings
+  if (!is.array(exp)){stop('"exp" must be an array.')}
+  if (!is.array(obs)){stop('"obs" must be an array.')}
+  if (!is.null(ref)){
+    if (!is.array(ref)){stop('"ref" must be an array.')}
+    if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')}
+  }
+  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
+  if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')}
+  if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
+  if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
+  if (!is.character(member_dim)){stop('"member_dim" must be a string.')}
+  if (!is.null(ncores)){
+    if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
+  } 
+  
+  if (!is.null(ref)){ ## using "ref" as reference forecast
+    output <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), 
+                                target_dims = list(exp = c(time_dim, member_dim), obs = time_dim, ref = c(time_dim, member_dim)),
+                                n_categories = n_categories, indices_for_clim = indices_for_clim,
+                                Fair = Fair, time_dim = time_dim, member_dim = member_dim,
+                                fun = .RPSS, ncores = ncores)
+  } else { ## using climatology as reference forecast
+    output <- multiApply::Apply(data = list(exp = exp, obs = obs), 
+                                target_dims = list(exp = c(time_dim, member_dim), obs = time_dim),
+                                ref = ref, n_categories = n_categories, indices_for_clim = indices_for_clim,
+                                Fair = Fair, time_dim = time_dim, member_dim = member_dim,
+                                fun = .RPSS, ncores = ncores)
+  }
+  
+  return(output)
+}
+
+.RPSS <- function(exp, obs, ref, n_categories, indices_for_clim, Fair, time_dim, member_dim){
+  
+  ## RPS of the forecast
+  rps_exp <- .RPS(exp = exp, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
+  
+  ## RPS of the reference forecast
+  if (is.null(ref)){ ## using climatology as reference forecast
+    clim_probs <- array(data = 1 / n_categories, dim = c(time = length(obs)))
+    obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs
+    rps_ref <- .RPS_from_probs(probs_exp = clim_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = FALSE)
+  } else { ## using "ref" as reference forecast
+    rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
+  }
+  
+  ## RPSS
+  rpss <- 1 - mean(rps_exp) / mean(rps_ref)
+  
+  ## Significance
+  rps_exp <- array(data = rps_exp, dim = c(time = length(rps_exp)))
+  rps_ref <- array(data = rps_ref, dim = c(time = length(rps_ref)))
+  sign <- s2dv::RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, time_dim = 'time', ncores = NULL)$signif
+  
+  return(list(rpss = rpss, sign = sign))
+}
-- 
GitLab


From 8ec54099762aa2bf7d310d6c472abc402589aec1 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Sat, 5 Mar 2022 15:44:29 +0100
Subject: [PATCH 02/18] fixed dim(clim_probs)

---
 R/RPSS.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/RPSS.R b/R/RPSS.R
index b9d59d0..fc25631 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -45,8 +45,8 @@ RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FA
   
   ## RPS of the reference forecast
   if (is.null(ref)){ ## using climatology as reference forecast
-    clim_probs <- array(data = 1 / n_categories, dim = c(time = length(obs)))
     obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs
+    clim_probs <- array(data = 1 / n_categories, dim = dim(obs_probs))
     rps_ref <- .RPS_from_probs(probs_exp = clim_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = FALSE)
   } else { ## using "ref" as reference forecast
     rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
-- 
GitLab


From 3a30a3f67b4100267ea743c70431b3f6fed9fcf6 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 10 Mar 2022 17:06:58 +0100
Subject: [PATCH 03/18] .

---
 R/RPS.R  | 5 +++--
 R/RPSS.R | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 0081d8e..c7e7e0b 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -33,8 +33,9 @@ RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim
   obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, 
                           time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs
   
-  rps <- .RPS_from_probs(probs_exp = exp_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = Fair, n_members = as.numeric(dim(exp)[member_dim]))
-  
+  rps <- multiApply::Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin',
+                           fun = .RPS_from_probs, n_categories = n_categories, Fair = Fair, 
+                           n_members = as.numeric(dim(exp)[member_dim]), ncores = NULL)$output1
   return(rps)
 }
 
diff --git a/R/RPSS.R b/R/RPSS.R
index fc25631..ae6881f 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -47,7 +47,8 @@ RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FA
   if (is.null(ref)){ ## using climatology as reference forecast
     obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs
     clim_probs <- array(data = 1 / n_categories, dim = dim(obs_probs))
-    rps_ref <- .RPS_from_probs(probs_exp = clim_probs, probs_obs = obs_probs, n_categories = n_categories, Fair = FALSE)
+    rps_ref <- multiApply::Apply(data = list(probs_exp = clim_probs, probs_obs = obs_probs), target_dims = 'bin',
+                                 fun = .RPS_from_probs, n_categories = n_categories, Fair = FALSE, ncores = NULL)$output1
   } else { ## using "ref" as reference forecast
     rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
   }
-- 
GitLab


From 178932ae938d67aa1d9dbb3f25c212f0b028ea61 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 17 Mar 2022 18:46:42 +0100
Subject: [PATCH 04/18] corrected typo in documentation

---
 R/RandomWalkTest.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R
index 494be65..adeadc1 100644
--- a/R/RandomWalkTest.R
+++ b/R/RandomWalkTest.R
@@ -1,8 +1,8 @@
 #'Random walk test for skill differences
 #'
 #'Forecast comparison of the skill obtained with 2 forecasts (with respect to a 
-#'common reference) based on Random Walks, with significance estimate at the 5%
-#'confidence level, as in DelSole and Tippett (2015).
+#'common reference) based on Random Walks, with significance estimate at the 95%
+#'confidence level, as in DelSole and Tippett (2016).
 #'
 #'@param skill_A A numerical array of the time series of the skill with the 
 #'  forecaster A's.
-- 
GitLab


From 43c6d69b5e6ddb57e546d8a732c32d90d5768764 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 17 Mar 2022 18:55:35 +0100
Subject: [PATCH 05/18] included documentation

---
 R/RPS.R  | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 R/RPSS.R | 52 +++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index c7e7e0b..1cf25a4 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -1,8 +1,41 @@
-
-library(multiApply)
-library(easyVerification)
-
-RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+#'Compute the Ranked Probability Score
+#'
+#'The Ranked Probability Score (RPS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) is defined 
+#'as the sum of the squared differences between the cumulative forecast probabilities (computed from the ensemble 
+#'members) and the observations (defined as 0% if the category did not happen and 100% if it happened). It can be
+#'used to evaluate the skill of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect 
+#'forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast 
+#'divided into two categories (the lowest number of categories that a probabilistic forecast can provide), the 
+#'RPS corresponds to the Brier Score (BS; Wilks, 2011) and, therefore, ranges between 0 and 1.
+#'
+#'@param exp A numerical array with the forecast with, at least, time dimension. 
+#'@param obs A numerical array with the observations with, at least, time dimension. 
+#'  The length of the time dimension must be the same as for "exp".
+#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
+#'  between the probabilistic categories.
+#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories).
+#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
+#'an infinite ensemble size). The default value is FALSE.
+#'@param time_dim A character string indicating the name of the time dimension.
+#'  The default value is 'year'.
+#'@param member_dim A character string indicating the name of the member dimension to compute the 
+#'  ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
+#'  be provided directly to the function. The default value is 'member'.
+#'@param ncores An integer indicating the number of cores to use for parallel 
+#'  computation. The default value is NULL.
+#'
+#'@return 
+#'  A numerical array with the RPS with the same dimensions as "exp" except the "time_dim" and "member_dim" dimensions.
+#'
+#'@examples
+#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
+#' RPS(exp, obs, indices_for_clim = 1:50)
+#'
+#'@import multiApply
+#'@importFrom easyVerification convert2prob
+#'@export
+RPS <- function(exp, obs, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
@@ -22,6 +55,10 @@ RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim
                            n_categories = n_categories, indices_for_clim = indices_for_clim,
                            Fair = Fair, time_dim = time_dim, member_dim = member_dim,
                            fun = .RPS, ncores = ncores)$output1
+  
+  ## Returning only the mean RPS
+  rps <- multiApply::Apply(data = rps, target_dims = time_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1
+  
   return(rps)
 }
 
diff --git a/R/RPSS.R b/R/RPSS.R
index ae6881f..a06de09 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -1,9 +1,51 @@
+#'Compute the Ranked Probability Skill Score
+#'
+#'The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7)
+#'is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether
+#'a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between
+#'minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the 
+#'reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts
+#'are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version,
+#'and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based
+#'on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1)
+#'
+#'@param exp A numerical array with the forecast with, at least, time dimension. 
+#'@param obs A numerical array with the observations with, at least, time dimension. 
+#'  The length of the time dimension must be the same as for "exp".
+#'@param ref NULL or a numerical array with the reference forecast with, at least, time dimension. 
+#'  The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is
+#'  used as reference forecast. The default value is NULL.
+#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
+#'  between the probabilistic categories.
+#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories).
+#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
+#'an infinite ensemble size). The default value is FALSE.
+#'@param time_dim A character string indicating the name of the time dimension.
+#'  The default value is 'year'.
+#'@param member_dim A character string indicating the name of the member dimension to compute the 
+#'  ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
+#'  be provided directly to the function. The default value is 'member'.
+#'@param ncores An integer indicating the number of cores to use for parallel 
+#'  computation. The default value is NULL.
+#'
+#'@return
+#'\item{$rpss}{
+#'  A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" 
+#'  and "member_dim" dimensions.
+#'}
+#'\item{$sign}{
+#'  A logical array with the statistical significance of the RPSS with the same dimensions 
+#'  as "exp" except the "time_dim" and "member_dim" dimensions.
+#'}
 
-library(multiApply)
-library(easyVerification)
-library(s2dv)
-
-RPSS <- function(exp, obs, ref = NULL, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+#'@examples
+#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
+#' RPSS(exp, obs, indices_for_clim = 1:50)
+#'
+#'@import multiApply
+#'@export
+RPSS <- function(exp, obs, ref = NULL, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
-- 
GitLab


From 7fa4531030744435bdee06a87993766136bcac14 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Fri, 25 Mar 2022 12:02:00 +0100
Subject: [PATCH 06/18] indices_for_clim = NULL by default (to use the whole
 period for climatology)

---
 R/RPS.R  | 9 +++++++--
 R/RPSS.R | 9 +++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 1cf25a4..4cd5120 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -12,7 +12,7 @@
 #'@param obs A numerical array with the observations with, at least, time dimension. 
 #'  The length of the time dimension must be the same as for "exp".
 #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
-#'  between the probabilistic categories.
+#'  between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.
 #'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories).
 #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
 #'an infinite ensemble size). The default value is FALSE.
@@ -35,12 +35,17 @@
 #'@import multiApply
 #'@importFrom easyVerification convert2prob
 #'@export
-RPS <- function(exp, obs, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
   if (!is.array(obs)){stop('"obs" must be an array.')}
   if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
+  if (is.null(indices_for_clim)){
+    indices_for_clim <- 1:as.numeric(dim(obs)[time_dim])
+  } else if (!is.numeric(indices_for_clim)){
+    stop('"indices_for_clim" must be NULL or a numeric vector.')
+  }
   if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')}
   if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
   if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
diff --git a/R/RPSS.R b/R/RPSS.R
index a06de09..59005c2 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -16,7 +16,7 @@
 #'  The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is
 #'  used as reference forecast. The default value is NULL.
 #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
-#'  between the probabilistic categories.
+#'  between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.
 #'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories).
 #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
 #'an infinite ensemble size). The default value is FALSE.
@@ -45,7 +45,7 @@
 #'
 #'@import multiApply
 #'@export
-RPSS <- function(exp, obs, ref = NULL, indices_for_clim, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
@@ -55,6 +55,11 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim, n_categories = 3, Fair
     if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')}
   }
   if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
+  if (is.null(indices_for_clim)){
+    indices_for_clim <- 1:as.numeric(dim(obs)[time_dim])
+  } else if (!is.numeric(indices_for_clim)){
+    stop('"indices_for_clim" must be NULL or a numeric vector.')
+  }
   if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')}
   if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
   if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
-- 
GitLab


From 8f72f8b8ddca314abb93f75d5b0e7c7f1f139fb7 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Wed, 30 Mar 2022 13:15:02 +0200
Subject: [PATCH 07/18] replaced n_categories with prob_thresholds

---
 R/RPS.R  | 57 ++++++++++++++++++++++++--------------------------------
 R/RPSS.R | 45 +++++++++++++++++++++++++++++++-------------
 2 files changed, 56 insertions(+), 46 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 4cd5120..b28791e 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -5,15 +5,16 @@
 #'members) and the observations (defined as 0% if the category did not happen and 100% if it happened). It can be
 #'used to evaluate the skill of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect 
 #'forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast 
-#'divided into two categories (the lowest number of categories that a probabilistic forecast can provide), the 
+#'divided into two categories (the lowest number of categories that a probabilistic forecast can have), the 
 #'RPS corresponds to the Brier Score (BS; Wilks, 2011) and, therefore, ranges between 0 and 1.
 #'
 #'@param exp A numerical array with the forecast with, at least, time dimension. 
 #'@param obs A numerical array with the observations with, at least, time dimension. 
 #'  The length of the time dimension must be the same as for "exp".
+#'@param prob_thresholds A numeric vector with the relative thresholds (from 0 to 1) between the categories. 
+#'  The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.
 #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
 #'  between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.
-#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories).
 #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
 #'an infinite ensemble size). The default value is FALSE.
 #'@param time_dim A character string indicating the name of the time dimension.
@@ -30,23 +31,26 @@
 #'@examples
 #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
 #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
-#' RPS(exp, obs, indices_for_clim = 1:50)
+#' RPS(exp = exp, obs = obs)
 #'
 #'@import multiApply
+#'@importFrom ClimProjDiags Subset
 #'@importFrom easyVerification convert2prob
 #'@export
-RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+RPS <- function(exp, obs, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
   if (!is.array(obs)){stop('"obs" must be an array.')}
   if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
+  if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){
+    stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.')
+  }
   if (is.null(indices_for_clim)){
     indices_for_clim <- 1:as.numeric(dim(obs)[time_dim])
   } else if (!is.numeric(indices_for_clim)){
     stop('"indices_for_clim" must be NULL or a numeric vector.')
   }
-  if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')}
   if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
   if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
   if (!is.character(member_dim)){stop('"member_dim" must be a string.')}
@@ -57,7 +61,7 @@ RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALS
   ## RPS
   rps <- multiApply::Apply(data = list(exp = exp, obs = obs), 
                            target_dims = list(exp = c(time_dim, member_dim), obs = time_dim),
-                           n_categories = n_categories, indices_for_clim = indices_for_clim,
+                           prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
                            Fair = Fair, time_dim = time_dim, member_dim = member_dim,
                            fun = .RPS, ncores = ncores)$output1
   
@@ -67,47 +71,34 @@ RPS <- function(exp, obs, indices_for_clim = NULL, n_categories = 3, Fair = FALS
   return(rps)
 }
 
-.RPS <- function(exp, obs, n_categories, indices_for_clim, Fair = FALSE, time_dim = 'year', member_dim = 'member'){
+.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){
   
-  exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quartiles = indices_for_clim, 
-                          time_dim = time_dim, member_dim = member_dim, n_categories = n_categories)$probs
+  exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quantiles = indices_for_clim, 
+                          time_dim = time_dim, member_dim = member_dim, prob_thresholds = prob_thresholds)
   
-  obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, 
-                          time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs
+  obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, 
+                          time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds)
   
+  ## RPS for each time step
   rps <- multiApply::Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin',
-                           fun = .RPS_from_probs, n_categories = n_categories, Fair = Fair, 
+                           fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = Fair, 
                            n_members = as.numeric(dim(exp)[member_dim]), ncores = NULL)$output1
   return(rps)
 }
 
-.get_probs <- function(data, type, indices_for_quartiles, time_dim, member_dim, n_categories){
+.get_probs <- function(data, type, indices_for_quantiles, time_dim, member_dim, prob_thresholds){
   
   if (type == 'exp'){
-    target_dims_quantiles = c(time_dim, member_dim)
-    target_dims_probs = list(member_dim, 'bin')
+    target_dims = member_dim
   } else if (type == 'obs'){
-    target_dims_quantiles = time_dim
-    target_dims_probs = list(NULL,'bin')
+    target_dims = NULL
   } else {stop("Parameter 'type' must be 'exp' or 'obs'")}
   
-  quantiles <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = time_dim, indices = indices_for_quartiles), 
-                                 target_dims = target_dims_quantiles, 
-                                 fun = .quantile_aux, n_categories = n_categories, 
-                                 output_dims = 'bin', ncores = NULL)$output1
-  
-  probs <- multiApply::Apply(data = list(data = data, quantiles = quantiles),
-                             target_dims = target_dims_probs,
-                             fun = .c2p, 
-                             output_dims = 'bin', ncores = NULL)$output1
+  quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE) ## Absolute thresholds
   
-  return(list('probs' = probs, 'quantiles' = quantiles))
-}
-
-.quantile_aux = function(data, n_categories){
-  data = as.vector(data)
-  q = quantile(x = data, probs = 1:(n_categories - 1) / n_categories, type = 8, na.rm = TRUE)
-  return(q)
+  probs <- multiApply::Apply(data = data, target_dims = target_dims, fun = .c2p, output_dims = 'bin', 
+                             quantiles = quantiles, ncores = NULL)$output1 ## Probabilities
+  return(probs)
 }
 
 .c2p <- function(data, quantiles) {
diff --git a/R/RPSS.R b/R/RPSS.R
index 59005c2..f7dc80d 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -15,9 +15,10 @@
 #'@param ref NULL or a numerical array with the reference forecast with, at least, time dimension. 
 #'  The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is
 #'  used as reference forecast. The default value is NULL.
+#'@param prob_thresholds A numeric vector with the relative thresholds (between 0 and 1) between the categories. 
+#'  The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.
 #'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
 #'  between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.
-#'@param n_categories An integer with the number of probabilistic categories. The default value is 3 (tercile categories).
 #'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
 #'an infinite ensemble size). The default value is FALSE.
 #'@param time_dim A character string indicating the name of the time dimension.
@@ -40,27 +41,31 @@
 
 #'@examples
 #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
 #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
-#' RPSS(exp, obs, indices_for_clim = 1:50)
+#' RPSS(exp = exp, obs = obs) ## climatology as reference forecast
+#' RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast
 #'
 #'@import multiApply
 #'@export
-RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
   if (!is.array(obs)){stop('"obs" must be an array.')}
+  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
   if (!is.null(ref)){
     if (!is.array(ref)){stop('"ref" must be an array.')}
     if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')}
   }
-  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
+  if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){
+    stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.')
+  }
   if (is.null(indices_for_clim)){
     indices_for_clim <- 1:as.numeric(dim(obs)[time_dim])
   } else if (!is.numeric(indices_for_clim)){
     stop('"indices_for_clim" must be NULL or a numeric vector.')
   }
-  if (!is.numeric(n_categories) | n_categories < 2){stop('"n_categories" must be an integer greater than 1.')}
   if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
   if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
   if (!is.character(member_dim)){stop('"member_dim" must be a string.')}
@@ -71,13 +76,13 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3
   if (!is.null(ref)){ ## using "ref" as reference forecast
     output <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), 
                                 target_dims = list(exp = c(time_dim, member_dim), obs = time_dim, ref = c(time_dim, member_dim)),
-                                n_categories = n_categories, indices_for_clim = indices_for_clim,
+                                prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
                                 Fair = Fair, time_dim = time_dim, member_dim = member_dim,
                                 fun = .RPSS, ncores = ncores)
   } else { ## using climatology as reference forecast
     output <- multiApply::Apply(data = list(exp = exp, obs = obs), 
                                 target_dims = list(exp = c(time_dim, member_dim), obs = time_dim),
-                                ref = ref, n_categories = n_categories, indices_for_clim = indices_for_clim,
+                                ref = ref, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
                                 Fair = Fair, time_dim = time_dim, member_dim = member_dim,
                                 fun = .RPSS, ncores = ncores)
   }
@@ -85,19 +90,19 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3
   return(output)
 }
 
-.RPSS <- function(exp, obs, ref, n_categories, indices_for_clim, Fair, time_dim, member_dim){
+.RPSS <- function(exp, obs, ref, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){
   
   ## RPS of the forecast
-  rps_exp <- .RPS(exp = exp, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
+  rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
   
   ## RPS of the reference forecast
   if (is.null(ref)){ ## using climatology as reference forecast
-    obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quartiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, n_categories = n_categories)$probs
-    clim_probs <- array(data = 1 / n_categories, dim = dim(obs_probs))
+    obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds)
+    clim_probs <- .get_probs_clim(prob_thresholds = prob_thresholds, dims = dim(obs_probs), time_dim = time_dim)
     rps_ref <- multiApply::Apply(data = list(probs_exp = clim_probs, probs_obs = obs_probs), target_dims = 'bin',
-                                 fun = .RPS_from_probs, n_categories = n_categories, Fair = FALSE, ncores = NULL)$output1
+                                 fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = FALSE, ncores = NULL)$output1
   } else { ## using "ref" as reference forecast
-    rps_ref <- .RPS(exp = ref, obs = obs, n_categories = n_categories, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
+    rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
   }
   
   ## RPSS
@@ -110,3 +115,17 @@ RPSS <- function(exp, obs, ref = NULL, indices_for_clim = NULL, n_categories = 3
   
   return(list(rpss = rpss, sign = sign))
 }
+
+.get_probs_clim <- function(prob_thresholds, dims, time_dim){
+  
+  probs <- array(data = NA, dim = c(bin = length(prob_thresholds)+1))
+  probs[1] <- prob_thresholds[1]
+  if (length(prob_thresholds) > 1){
+    probs[2:(length(probs)-1)] <- diff(prob_thresholds)
+  }
+  probs[length(probs)] <- 1 - prob_thresholds[length(prob_thresholds)]
+  stopifnot(identical(sum(probs),1))
+  probs <- s2dv::InsertDim(data = probs, posdim = 2, lendim = dims[[time_dim]], name = time_dim)
+  
+  return(probs)
+}
-- 
GitLab


From e2990e4edddf003d258901a91e0cf77e6c269080 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 11 May 2022 17:56:08 +0200
Subject: [PATCH 08/18] Formatting and combine small functions

---
 R/RPS.R | 172 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 96 insertions(+), 76 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index b28791e..886ecec 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -1,45 +1,60 @@
 #'Compute the Ranked Probability Score
 #'
-#'The Ranked Probability Score (RPS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) is defined 
-#'as the sum of the squared differences between the cumulative forecast probabilities (computed from the ensemble 
-#'members) and the observations (defined as 0% if the category did not happen and 100% if it happened). It can be
-#'used to evaluate the skill of multi-categorical probabilistic forecasts. The RPS ranges between 0 (perfect 
-#'forecast) and n-1 (worst possible forecast), where n is the number of categories. In the case of a forecast 
-#'divided into two categories (the lowest number of categories that a probabilistic forecast can have), the 
-#'RPS corresponds to the Brier Score (BS; Wilks, 2011) and, therefore, ranges between 0 and 1.
+#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the
+#'squared differences between the cumulative forecast probabilities (computed 
+#'from the ensemble members) and the observations (defined as 0% if the category
+#'did not happen and 100% if it happened). It can be used to evaluate the skill 
+#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 
+#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of
+#'categories. In the case of a forecast divided into two categories (the lowest
+#'number of categories that a probabilistic forecast can have), the RPS 
+#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 
+#'and 1.
 #'
-#'@param exp A numerical array with the forecast with, at least, time dimension. 
-#'@param obs A numerical array with the observations with, at least, time dimension. 
-#'  The length of the time dimension must be the same as for "exp".
-#'@param prob_thresholds A numeric vector with the relative thresholds (from 0 to 1) between the categories. 
-#'  The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.
-#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
-#'  between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.
-#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
-#'an infinite ensemble size). The default value is FALSE.
+#'@param exp A named numerical array of the forecast with at least time 
+#'  dimension.  
+#'@param obs A named numerical array of the observation with at least time 
+#'  dimension. The dimensions must be the same as 'exp' expect 'memb_dim'.
 #'@param time_dim A character string indicating the name of the time dimension.
-#'  The default value is 'year'.
-#'@param member_dim A character string indicating the name of the member dimension to compute the 
-#'  ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
-#'  be provided directly to the function. The default value is 'member'.
+#'  The default value is 'sdate'.
+#'@param memb_dim A character string indicating the name of the member dimension
+#'  to compute the ensemble mean of the forecast. If it is NULL, the ensemble
+#'  mean should be provided directly to the function. The default value is NULL.
+#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to
+#'  1) between the categories. The default value is c(1/3, 2/3), which 
+#'  corresponds to tercile equiprobable categories.
+#'@param indices_for_clim A vector of the indices to be taken along "time_dim" 
+#'  for computing the thresholds between the probabilistic categories. If NULL,
+#'  the whole period is used. The default value is NULL.
+#'@param Fair A logical indicating whether to compute the FairRPSS (the 
+#'  potential RPSS that the forecast would have with an infinite ensemble size).
+#'  The default value is FALSE.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
 #'@return 
-#'  A numerical array with the RPS with the same dimensions as "exp" except the "time_dim" and "member_dim" dimensions.
+#'A numerical array of RPS with the same dimensions as "exp" except the 
+#'"time_dim" and "memb_dim" dimensions.
+#'
+#'@references 
+#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7
 #'
 #'@examples
-#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
-#' RPS(exp = exp, obs = obs)
+#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
+#' res <- RPS(exp = exp, obs = obs, memb_dim = 'member')
 #'
 #'@import multiApply
 #'@importFrom ClimProjDiags Subset
 #'@importFrom easyVerification convert2prob
 #'@export
-RPS <- function(exp, obs, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
+RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, 
+                prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, 
+                ncores = NULL) {
   
-  ## Checkings
+  # Check inputs
+  ## exp and obs (1)
+
   if (!is.array(exp)){stop('"exp" must be an array.')}
   if (!is.array(obs)){stop('"obs" must be an array.')}
   if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
@@ -53,81 +68,86 @@ RPS <- function(exp, obs, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL,
   }
   if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
   if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
-  if (!is.character(member_dim)){stop('"member_dim" must be a string.')}
+  if (!is.character(memb_dim)){stop('"memb_dim" must be a string.')}
   if (!is.null(ncores)){
     if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
   }
   
   ## RPS
-  rps <- multiApply::Apply(data = list(exp = exp, obs = obs), 
-                           target_dims = list(exp = c(time_dim, member_dim), obs = time_dim),
-                           prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
-                           Fair = Fair, time_dim = time_dim, member_dim = member_dim,
-                           fun = .RPS, ncores = ncores)$output1
-  
-  ## Returning only the mean RPS
-  rps <- multiApply::Apply(data = rps, target_dims = time_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1
+  rps <- Apply(data = list(exp = exp, obs = obs), 
+               target_dims = list(exp = c(time_dim, memb_dim), obs = time_dim),
+               fun = .RPS,
+               prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
+               Fair = Fair,
+               ncores = ncores)$output1
   
   return(rps)
 }
 
-.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){
-  
-  exp_probs <- .get_probs(data = exp, type = 'exp', indices_for_quantiles = indices_for_clim, 
-                          time_dim = time_dim, member_dim = member_dim, prob_thresholds = prob_thresholds)
-  
-  obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, 
-                          time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds)
-  
+
+.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair) {
+  # exp: [sdate, memb]
+  # obs: [sdate]
+
+  exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, 
+                          prob_thresholds = prob_thresholds)
+  # exp_probs: [bin, sdate] 
+  obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, 
+                          prob_thresholds = prob_thresholds)
+  # obs_probs: [bin, sdate]
+ 
   ## RPS for each time step
-  rps <- multiApply::Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin',
-                           fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = Fair, 
-                           n_members = as.numeric(dim(exp)[member_dim]), ncores = NULL)$output1
+  rps <- Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin',
+                           fun = .RPS_from_probs, Fair = Fair, 
+                           n_members = dim(exp)[2], ncores = NULL)$output1
+  # rps: [sdate]
+
+  # Return only the mean RPS
+  rps <- mean(rps, na.rm = FALSE)
+
   return(rps)
 }
 
-.get_probs <- function(data, type, indices_for_quantiles, time_dim, member_dim, prob_thresholds){
-  
-  if (type == 'exp'){
-    target_dims = member_dim
-  } else if (type == 'obs'){
-    target_dims = NULL
-  } else {stop("Parameter 'type' must be 'exp' or 'obs'")}
-  
-  quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE) ## Absolute thresholds
-  
-  probs <- multiApply::Apply(data = data, target_dims = target_dims, fun = .c2p, output_dims = 'bin', 
-                             quantiles = quantiles, ncores = NULL)$output1 ## Probabilities
-  return(probs)
-}
 
-.c2p <- function(data, quantiles) {
-  if (anyNA(data)){
-    output <- array(data = NA, dim = length(quantiles) + 1)
-  } else {
-    output <- colMeans(easyVerification::convert2prob(as.vector(data), threshold = as.vector(quantiles)))
+.get_probs <- function(data, indices_for_quantiles, prob_thresholds) {
+  # if exp: [sdate, memb]
+  # if obs: [sdate]
+  if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1)
+
+  # Absolute thresholds
+  quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE)
+
+  # Probabilities
+  probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate]
+  for (i_time in 1:dim(data)[1]) {
+    if (anyNA(data[i_time, ])) {
+    probs[, i_time] <- rep(NA, dim = length(quantiles) + 1)
+    } else {
+    probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles))
+    }
   }
-  return(output)
+
+  return(probs)
 }
 
 
-.RPS_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members){
-  
-  rps <- NULL
-  for (i in 1:n_categories){
-    rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 ## (Wilks 2011, pp.348-350)
-  }
-  rps <- sum(rps)
-  
-  if (Fair == TRUE){ ## FairRPS
+.RPS_from_probs <- function(probs_exp, probs_obs, Fair, n_members) {
+  # probs_exp: [bin]
+  # probs_obs: [bin]
+
+  rps <- sum((cumsum(probs_exp) - cumsum(probs_obs))^2)
+
+  if (Fair) { # FairRPS
     ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
     R <- n_members
     R_new <- Inf 
     probs_cum <- cumsum(probs_exp)
-    adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum)
+    adjustment <- -1 / (R - 1) * probs_cum * (1 - probs_cum)
     adjustment <- sum(adjustment)
     rps <- rps + adjustment
   }
   
   return(rps)
 }
+
+
-- 
GitLab


From 1379da555e2fc3c112d4e649aab5e5ae8e146b86 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 12 May 2022 14:49:15 +0200
Subject: [PATCH 09/18] Improve RPS() efficiency and correct the uusage of
 indices_for_clim; unit test created

---
 NAMESPACE                 |   3 +
 R/RPS.R                   | 149 +++++++++++++++++++++++---------------
 man/RPS.Rd                |  70 ++++++++++++++++++
 man/RPSS.Rd               |  75 +++++++++++++++++++
 man/RandomWalkTest.Rd     |   4 +-
 tests/testthat/test-RPS.R | 129 +++++++++++++++++++++++++++++++++
 6 files changed, 368 insertions(+), 62 deletions(-)
 create mode 100644 man/RPS.Rd
 create mode 100644 man/RPSS.Rd
 create mode 100644 tests/testthat/test-RPS.R

diff --git a/NAMESPACE b/NAMESPACE
index 841814e..1449ad4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -54,6 +54,8 @@ export(ProjectField)
 export(REOF)
 export(RMS)
 export(RMSSS)
+export(RPS)
+export(RPSS)
 export(RandomWalkTest)
 export(RatioPredictableComponents)
 export(RatioRMS)
@@ -92,6 +94,7 @@ importFrom(ClimProjDiags,WeightedMean)
 importFrom(abind,abind)
 importFrom(abind,adrop)
 importFrom(easyNCDF,ArrayToNc)
+importFrom(easyVerification,convert2prob)
 importFrom(grDevices,bmp)
 importFrom(grDevices,col2rgb)
 importFrom(grDevices,colorRampPalette)
diff --git a/R/RPS.R b/R/RPS.R
index 886ecec..744b9c7 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -14,16 +14,15 @@
 #'@param exp A named numerical array of the forecast with at least time 
 #'  dimension.  
 #'@param obs A named numerical array of the observation with at least time 
-#'  dimension. The dimensions must be the same as 'exp' expect 'memb_dim'.
+#'  dimension. The dimensions must be the same as 'exp' except 'memb_dim'.
 #'@param time_dim A character string indicating the name of the time dimension.
 #'  The default value is 'sdate'.
 #'@param memb_dim A character string indicating the name of the member dimension
-#'  to compute the ensemble mean of the forecast. If it is NULL, the ensemble
-#'  mean should be provided directly to the function. The default value is NULL.
+#'  to compute the probabilities of the forecast. The default value is 'member'.
 #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to
 #'  1) between the categories. The default value is c(1/3, 2/3), which 
 #'  corresponds to tercile equiprobable categories.
-#'@param indices_for_clim A vector of the indices to be taken along "time_dim" 
+#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' 
 #'  for computing the thresholds between the probabilistic categories. If NULL,
 #'  the whole period is used. The default value is NULL.
 #'@param Fair A logical indicating whether to compute the FairRPSS (the 
@@ -34,51 +33,93 @@
 #'
 #'@return 
 #'A numerical array of RPS with the same dimensions as "exp" except the 
-#'"time_dim" and "memb_dim" dimensions.
+#''time_dim' and 'memb_dim' dimensions.
 #'
 #'@references 
 #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7
 #'
 #'@examples
-#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
-#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
-#' res <- RPS(exp = exp, obs = obs, memb_dim = 'member')
+#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
+#'res <- RPS(exp = exp, obs = obs)
 #'
 #'@import multiApply
-#'@importFrom ClimProjDiags Subset
 #'@importFrom easyVerification convert2prob
 #'@export
-RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, 
+RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', 
                 prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, 
                 ncores = NULL) {
   
   # Check inputs
   ## exp and obs (1)
-
-  if (!is.array(exp)){stop('"exp" must be an array.')}
-  if (!is.array(obs)){stop('"obs" must be an array.')}
-  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
-  if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){
-    stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.')
+  if (!is.array(exp) | !is.numeric(exp))
+    stop('Parameter "exp" must be a numeric array.')
+  if (!is.array(obs) | !is.numeric(obs))
+    stop('Parameter "obs" must be a numeric array.')
+  if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) |
+     any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
+    stop("Parameter 'exp' and 'obs' must have dimension names.")
   }
-  if (is.null(indices_for_clim)){
-    indices_for_clim <- 1:as.numeric(dim(obs)[time_dim])
-  } else if (!is.numeric(indices_for_clim)){
-    stop('"indices_for_clim" must be NULL or a numeric vector.')
+  ## time_dim
+  if (!is.character(time_dim) | length(time_dim) != 1)
+    stop('Parameter "time_dim" must be a character string.')
+  if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) {
+    stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
   }
-  if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
-  if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
-  if (!is.character(memb_dim)){stop('"memb_dim" must be a string.')}
-  if (!is.null(ncores)){
-    if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
+  ## memb_dim
+  if (!is.character(memb_dim) | length(memb_dim) > 1) {
+    stop("Parameter 'memb_dim' must be a character string.")
   }
-  
-  ## RPS
+  if (!memb_dim %in% names(dim(exp))) {
+    stop("Parameter 'memb_dim' is not found in 'exp' dimension.")
+  }
+  ## exp and obs (2)
+  name_exp <- sort(names(dim(exp)))
+  name_obs <- sort(names(dim(obs)))
+  name_exp <- name_exp[-which(name_exp == memb_dim)]
+  if (!identical(length(name_exp), length(name_obs)) |
+      !identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
+    stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
+                "all dimensions expect 'memb_dim'."))
+  }
+  ## prob_thresholds
+  if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) |
+      any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) {
+    stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.")
+  }
+  ## indices_for_clim
+  if (is.null(indices_for_clim)) {
+    indices_for_clim <- 1:dim(obs)[time_dim]
+  } else {
+    if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) {
+      stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.")
+    } else if (length(indices_for_clim) > dim(obs)[time_dim] |
+               max(indices_for_clim) > dim(obs)[time_dim] |
+               any(indices_for_clim) < 1) {
+      stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.")
+    }
+  }
+  ## Fair
+  if (!is.logical(Fair)  | length(Fair) > 1) {
+    stop("Parameter 'Fair' must be either TRUE or FALSE.")
+  }
+  ## ncores
+  if (!is.null(ncores)) {
+    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
+      length(ncores) > 1) {
+      stop("Parameter 'ncores' must be either NULL or a positive integer.")
+    }
+  }
+
+  ###############################
+ 
+  # Compute RPS
   rps <- Apply(data = list(exp = exp, obs = obs), 
-               target_dims = list(exp = c(time_dim, memb_dim), obs = time_dim),
+               target_dims = list(exp = c(time_dim, memb_dim), 
+                                  obs = time_dim),
                fun = .RPS,
-               prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
-               Fair = Fair,
+               prob_thresholds = prob_thresholds, 
+               indices_for_clim = indices_for_clim, Fair = Fair,
                ncores = ncores)$output1
   
   return(rps)
@@ -95,59 +136,47 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL,
   obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, 
                           prob_thresholds = prob_thresholds)
   # obs_probs: [bin, sdate]
- 
-  ## RPS for each time step
-  rps <- Apply(data = list(probs_exp = exp_probs, probs_obs = obs_probs), target_dims = 'bin',
-                           fun = .RPS_from_probs, Fair = Fair, 
-                           n_members = dim(exp)[2], ncores = NULL)$output1
+
+  probs_exp_cumsum <- apply(exp_probs, 2, cumsum)
+  probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
+  rps <- apply((probs_exp_cumsum - probs_obs_cumsum)^2, 2, sum)
   # rps: [sdate]
 
+  if (Fair) { # FairRPS
+    ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+    R <- dim(exp)[2]  #memb
+    R_new <- Inf
+    adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum)
+    adjustment <- apply(adjustment, 2, sum)
+    rps <- rps + adjustment
+  }
+
   # Return only the mean RPS
   rps <- mean(rps, na.rm = FALSE)
 
   return(rps)
 }
 
-
 .get_probs <- function(data, indices_for_quantiles, prob_thresholds) {
   # if exp: [sdate, memb]
   # if obs: [sdate]
+
+  # Add dim [memb = 1] to obs
   if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1)
 
   # Absolute thresholds
-  quantiles <- quantile(x = as.vector(data), probs = prob_thresholds, type = 8, na.rm = TRUE)
+  quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE)
 
   # Probabilities
   probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate]
   for (i_time in 1:dim(data)[1]) {
     if (anyNA(data[i_time, ])) {
-    probs[, i_time] <- rep(NA, dim = length(quantiles) + 1)
+      probs[, i_time] <- rep(NA, dim = length(quantiles) + 1)
     } else {
-    probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles))
+      probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles))
     }
   }
 
   return(probs)
 }
 
-
-.RPS_from_probs <- function(probs_exp, probs_obs, Fair, n_members) {
-  # probs_exp: [bin]
-  # probs_obs: [bin]
-
-  rps <- sum((cumsum(probs_exp) - cumsum(probs_obs))^2)
-
-  if (Fair) { # FairRPS
-    ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
-    R <- n_members
-    R_new <- Inf 
-    probs_cum <- cumsum(probs_exp)
-    adjustment <- -1 / (R - 1) * probs_cum * (1 - probs_cum)
-    adjustment <- sum(adjustment)
-    rps <- rps + adjustment
-  }
-  
-  return(rps)
-}
-
-
diff --git a/man/RPS.Rd b/man/RPS.Rd
new file mode 100644
index 0000000..d1acc02
--- /dev/null
+++ b/man/RPS.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RPS.R
+\name{RPS}
+\alias{RPS}
+\title{Compute the Ranked Probability Score}
+\usage{
+RPS(
+  exp,
+  obs,
+  time_dim = "sdate",
+  memb_dim = "member",
+  prob_thresholds = c(1/3, 2/3),
+  indices_for_clim = NULL,
+  Fair = FALSE,
+  ncores = NULL
+)
+}
+\arguments{
+\item{exp}{A named numerical array of the forecast with at least time 
+dimension.}
+
+\item{obs}{A named numerical array of the observation with at least time 
+dimension. The dimensions must be the same as 'exp' except 'memb_dim'.}
+
+\item{time_dim}{A character string indicating the name of the time dimension.
+The default value is 'sdate'.}
+
+\item{memb_dim}{A character string indicating the name of the member dimension
+to compute the probabilities of the forecast. The default value is 'member'.}
+
+\item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to
+1) between the categories. The default value is c(1/3, 2/3), which 
+corresponds to tercile equiprobable categories.}
+
+\item{indices_for_clim}{A vector of the indices to be taken along 'time_dim' 
+for computing the thresholds between the probabilistic categories. If NULL,
+the whole period is used. The default value is NULL.}
+
+\item{Fair}{A logical indicating whether to compute the FairRPSS (the 
+potential RPSS that the forecast would have with an infinite ensemble size).
+The default value is FALSE.}
+
+\item{ncores}{An integer indicating the number of cores to use for parallel 
+computation. The default value is NULL.}
+}
+\value{
+A numerical array of RPS with the same dimensions as "exp" except the 
+'time_dim' and 'memb_dim' dimensions.
+}
+\description{
+The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the
+squared differences between the cumulative forecast probabilities (computed 
+from the ensemble members) and the observations (defined as 0% if the category
+did not happen and 100% if it happened). It can be used to evaluate the skill 
+of multi-categorical probabilistic forecasts. The RPS ranges between 0 
+(perfect forecast) and n-1 (worst possible forecast), where n is the number of
+categories. In the case of a forecast divided into two categories (the lowest
+number of categories that a probabilistic forecast can have), the RPS 
+corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 
+and 1.
+}
+\examples{
+exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
+res <- RPS(exp = exp, obs = obs)
+
+}
+\references{
+Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7
+}
diff --git a/man/RPSS.Rd b/man/RPSS.Rd
new file mode 100644
index 0000000..47136f7
--- /dev/null
+++ b/man/RPSS.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RPSS.R
+\name{RPSS}
+\alias{RPSS}
+\title{Compute the Ranked Probability Skill Score}
+\usage{
+RPSS(
+  exp,
+  obs,
+  ref = NULL,
+  prob_thresholds = c(1/3, 2/3),
+  indices_for_clim = NULL,
+  Fair = FALSE,
+  time_dim = "year",
+  member_dim = "member",
+  ncores = NULL
+)
+}
+\arguments{
+\item{exp}{A numerical array with the forecast with, at least, time dimension.}
+
+\item{obs}{A numerical array with the observations with, at least, time dimension. 
+The length of the time dimension must be the same as for "exp".}
+
+\item{ref}{NULL or a numerical array with the reference forecast with, at least, time dimension. 
+The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is
+used as reference forecast. The default value is NULL.}
+
+\item{prob_thresholds}{A numeric vector with the relative thresholds (between 0 and 1) between the categories. 
+The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.}
+
+\item{indices_for_clim}{A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
+between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.}
+
+\item{Fair}{A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
+an infinite ensemble size). The default value is FALSE.}
+
+\item{time_dim}{A character string indicating the name of the time dimension.
+The default value is 'year'.}
+
+\item{member_dim}{A character string indicating the name of the member dimension to compute the 
+ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
+be provided directly to the function. The default value is 'member'.}
+
+\item{ncores}{An integer indicating the number of cores to use for parallel 
+computation. The default value is NULL.}
+}
+\value{
+\item{$rpss}{
+ A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" 
+ and "member_dim" dimensions.
+}
+\item{$sign}{
+ A logical array with the statistical significance of the RPSS with the same dimensions 
+ as "exp" except the "time_dim" and "member_dim" dimensions.
+}
+}
+\description{
+The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7)
+is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether
+a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between
+minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the 
+reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts
+are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version,
+and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based
+on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1)
+}
+\examples{
+exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
+RPSS(exp = exp, obs = obs) ## climatology as reference forecast
+RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast
+
+}
diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd
index 1110648..bd1460a 100644
--- a/man/RandomWalkTest.Rd
+++ b/man/RandomWalkTest.Rd
@@ -37,8 +37,8 @@ A list of 2:
 }
 \description{
 Forecast comparison of the skill obtained with 2 forecasts (with respect to a 
-common reference) based on Random Walks, with significance estimate at the 5%
-confidence level, as in DelSole and Tippett (2015).
+common reference) based on Random Walks, with significance estimate at the 95%
+confidence level, as in DelSole and Tippett (2016).
 }
 \examples{
 fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2))
diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R
new file mode 100644
index 0000000..7b1f77e
--- /dev/null
+++ b/tests/testthat/test-RPS.R
@@ -0,0 +1,129 @@
+context("s2dv::RPS tests")
+
+##############################################
+
+# dat1
+set.seed(1)
+exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2))
+set.seed(2)
+obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2))
+
+# dat2
+set.seed(1)
+exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
+set.seed(2)
+obs2 <- array(rnorm(10), dim = c(sdate = 10))
+
+
+
+##############################################
+test_that("1. Input checks", {
+  # exp and obs (1)
+  expect_error(
+  RPS(c()),
+  'Parameter "exp" must be a numeric array.'
+  )
+  expect_error(
+  RPS(exp1, c()),
+  'Parameter "obs" must be a numeric array.'
+  )
+
+  # time_dim
+  expect_error(
+  RPS(exp1, obs1, time_dim = 1),
+  'Parameter "time_dim" must be a character string.'
+  )
+  expect_error(
+  RPS(exp1, obs1, time_dim = 'time'),
+  "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension."
+  )
+  # memb_dim
+  expect_error(
+  RPS(exp1, obs1, memb_dim = TRUE),
+  "Parameter 'memb_dim' must be a character string."
+  )
+  expect_error(
+  RPS(exp1, obs1, memb_dim = 'memb'),
+  "Parameter 'memb_dim' is not found in 'exp' dimension."
+  )
+  # exp, ref, and obs (2)
+  expect_error(
+  RPS(exp1, array(1:9, dim = c(sdate = 9))),
+  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'."
+  )
+  # prob_thresholds
+  expect_error(
+  RPS(exp1, obs1, prob_thresholds = 1),
+  "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1."
+  )
+  # indices_for_clim
+  expect_error(
+  RPS(exp1, obs1, indices_for_clim = array(1:6, dim = c(2, 3))),
+  "Parameter 'indices_for_clim' must be NULL or a numeric vector."
+  )
+  expect_error(
+  RPS(exp1, obs1, indices_for_clim = 3:11),
+  "Parameter 'indices_for_clim' should be the indices of 'time_dim'."
+  )
+  # Fair
+  expect_error(
+  RPS(exp1, obs1, Fair = 1),
+  "Parameter 'Fair' must be either TRUE or FALSE."
+  )
+  # ncores
+  expect_error(
+  RPS(exp2, obs2, ncores = 1.5),
+  "Parameter 'ncores' must be either NULL or a positive integer."
+  )
+
+})
+
+##############################################
+test_that("2. Output checks: dat1", {
+
+expect_equal(
+dim(RPS(exp1, obs1)),
+c(lat = 2)
+)
+expect_equal(
+as.vector(RPS(exp1, obs1)),
+c(0.3555556, 0.4444444),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPS(exp1, obs1, Fair = T)),
+c(0.2000000, 0.2666667),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPS(exp1, obs1, indices_for_clim = 3:5)),
+c(0.5000000, 0.4666667),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPS(exp1, obs1, prob_thresholds = seq(0.1, 0.9, 0.1))),
+c(1.600000, 1.888889),
+tolerance = 0.0001
+)
+
+})
+
+##############################################
+test_that("3. Output checks: dat2", {
+
+expect_equal(
+dim(RPS(exp2, obs2)),
+NULL
+)
+expect_equal(
+RPS(exp2, obs2),
+0.75,
+tolerance = 0.0001
+)
+expect_equal(
+RPS(exp2, obs2, indices_for_clim = 2:5, prob_thresholds = seq(0.1, 0.9, 0.1)),
+2.75,
+tolerance = 0.0001
+)
+
+})
-- 
GitLab


From 298bb734d1fce1a171b72b76334c799837df8120 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 12 May 2022 15:24:06 +0200
Subject: [PATCH 10/18] Change the order of 'obs' and 'ref'

---
 R/ResidualCorr.R    | 61 ++++++++++++++++++++++-----------------------
 man/ResidualCorr.Rd | 12 ++++-----
 2 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index dae0cb8..541cfe8 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -17,11 +17,11 @@
 #'
 #'@param exp A named numerical array of the forecast with at least time 
 #'  dimension. 
+#'@param obs A named numerical array of the observations with at least time 
+#'  dimension. The dimensions must be the same as "exp" except 'memb_dim'.
 #'@param ref A named numerical array of the reference forecast data with at 
 #'  least time dimension. The dimensions must be the same as "exp" except 
 #'  'memb_dim'.
-#'@param obs A named numerical array of the observations with at least time 
-#'  dimension. The dimensions must be the same as "exp" except 'memb_dim'.
 #'@param N.eff Effective sample size to be used in the statistical significance
 #'  test. It can be NA (and it will be computed with the s2dv:::.Eno), a 
 #'  numeric (which is used for all cases), or an array with the same dimensions
@@ -66,13 +66,13 @@
 #'
 #'@examples
 #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
-#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50))
 #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
-#' res <- ResidualCorr(exp, ref, obs, memb_dim = 'member')
+#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50))
+#' res <- ResidualCorr(exp = exp, obs = obs, ref = ref, memb_dim = 'member')
 #'
 #'@import multiApply
 #'@export
-ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
+ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate',
                          memb_dim = NULL, method = 'pearson', alpha = NULL, 
                          handle.na = 'return.na', ncores = NULL) {
 
@@ -80,10 +80,10 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
   ## exp, ref, and obs (1)
   if (!is.array(exp) | !is.numeric(exp))
     stop('Parameter "exp" must be a numeric array.')
-  if (!is.array(ref) | !is.numeric(ref))
-    stop('Parameter "ref" must be a numeric array.')
   if (!is.array(obs) | !is.numeric(obs))
     stop('Parameter "obs" must be a numeric array.')
+  if (!is.array(ref) | !is.numeric(ref))
+    stop('Parameter "ref" must be a numeric array.')
 
   ## N.eff
   if (is.array(N.eff)) {
@@ -101,9 +101,9 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
   ## time_dim
   if (!is.character(time_dim) | length(time_dim) != 1)
     stop('Parameter "time_dim" must be a character string.')
-  if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(ref)) | 
-      !time_dim %in% names(dim(obs))) {
-    stop("Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension.")
+  if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs)) | 
+      !time_dim %in% names(dim(ref))) {
+    stop("Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension.")
   }
   ## memb_dim
   if (!is.null(memb_dim)) {
@@ -116,15 +116,15 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
   }
   ## exp, ref, and obs (2)
   name_exp <- sort(names(dim(exp)))
-  name_ref <- sort(names(dim(ref)))
   name_obs <- sort(names(dim(obs)))
+  name_ref <- sort(names(dim(ref)))
   if (!is.null(memb_dim)) {
     name_exp <- name_exp[-which(name_exp == memb_dim)]
     name_ref <- name_ref[-which(name_ref == memb_dim)]
   }
-  if (!identical(length(name_exp), length(name_ref), length(name_obs)) |
-      !identical(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) {
-    stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ",
+  if (!identical(length(name_exp), length(name_obs), length(name_ref)) |
+      !identical(dim(exp)[name_exp], dim(obs)[name_obs], dim(ref)[name_ref])) {
+    stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ",
                 "all dimensions expect 'memb_dim'."))
   }
   ## method
@@ -153,13 +153,13 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
   ###############################
 
   # NA check: na.fail
-  if (handle.na == "na.fail" & (anyNA(exp) | anyNA(ref) | anyNA(obs)))
+  if (handle.na == "na.fail" & (anyNA(exp) | anyNA(obs) | anyNA(ref)))
     stop('The data contain NAs.')
 
   # Calculate ensemble mean
   dim_exp <- dim(exp)
-  dim_ref <- dim(ref)
   dim_obs <- dim(obs)
+  dim_ref <- dim(ref)
 
   if (!is.null(memb_dim)) {
     exp_memb_dim_ind <- which(names(dim_exp) == memb_dim)
@@ -180,18 +180,17 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
 
   # Residual correlation
   if (is.array(N.eff)) {
-    output <- Apply(data = list(exp = exp, ref = ref,
-                                obs = obs, N.eff = N.eff),
-                    target_dims = list(exp = time_dim, ref = time_dim,
-                                       obs = time_dim, N.eff = NULL),
+    output <- Apply(data = list(exp = exp, obs = obs, ref = ref,
+                                N.eff = N.eff),
+                    target_dims = list(exp = time_dim, obs = time_dim,
+                                       ref = time_dim, N.eff = NULL),
                     output_dims = output_dims,
                     fun = .ResidualCorr, method = method,
                     alpha = alpha, handle.na = handle.na, ncores = ncores)
   } else { 
-    output <- Apply(data = list(exp = exp, ref = ref, 
-                                obs = obs),
-                    target_dims = list(exp = time_dim, ref = time_dim, 
-                                       obs = time_dim), 
+    output <- Apply(data = list(exp = exp, obs = obs, ref = ref),
+                    target_dims = list(exp = time_dim, obs = time_dim, 
+                                       ref = time_dim), 
                     output_dims = output_dims, N.eff = N.eff,
                     fun = .ResidualCorr, method = method, 
                     alpha = alpha, handle.na = handle.na, ncores = ncores)
@@ -200,9 +199,9 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
   return(output)
 }
 
-.ResidualCorr <- function(exp, ref, obs, N.eff, method, alpha, handle.na) {
+.ResidualCorr <- function(exp, obs, ref, N.eff, method, alpha, handle.na) {
   # exp and ref and obs: [time]
-  .residual.corr <- function(exp, ref, obs, method, N.eff, alpha) {
+  .residual.corr <- function(exp, obs, ref, method, N.eff, alpha) {
 
     # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs')
     exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals
@@ -235,16 +234,16 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
 
   #==================================================
   
-  if (anyNA(exp) | anyNA(ref) | anyNA(obs)) { ## There are NAs 
+  if (anyNA(exp) | anyNA(obs) | anyNA(ref)) { ## There are NAs 
     if (handle.na == 'only.complete.triplets') {
-      nna <- is.na(exp) | is.na(ref) | is.na(obs) # A vector of T/F
+      nna <- is.na(exp) | is.na(obs) | is.na(ref) # A vector of T/F
       if (all(nna)) stop("There is no complete set of forecasts and observations.")
       # Remove the incomplete set
       exp <- exp[!nna]
-      ref <- ref[!nna]
       obs <- obs[!nna]
+      ref <- ref[!nna]
 
-      output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, 
+      output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, 
                                N.eff = N.eff, alpha = alpha)
       
     } else if (handle.na == 'return.na') {
@@ -257,7 +256,7 @@ ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
     }
     
   } else { ## There is no NA  
-    output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, 
+    output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, 
                          N.eff = N.eff, alpha = alpha)
   }
 
diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd
index e98ea7a..fe7dd10 100644
--- a/man/ResidualCorr.Rd
+++ b/man/ResidualCorr.Rd
@@ -6,8 +6,8 @@
 \usage{
 ResidualCorr(
   exp,
-  ref,
   obs,
+  ref,
   N.eff = NA,
   time_dim = "sdate",
   memb_dim = NULL,
@@ -21,13 +21,13 @@ ResidualCorr(
 \item{exp}{A named numerical array of the forecast with at least time 
 dimension.}
 
+\item{obs}{A named numerical array of the observations with at least time 
+dimension. The dimensions must be the same as "exp" except 'memb_dim'.}
+
 \item{ref}{A named numerical array of the reference forecast data with at 
 least time dimension. The dimensions must be the same as "exp" except 
 'memb_dim'.}
 
-\item{obs}{A named numerical array of the observations with at least time 
-dimension. The dimensions must be the same as "exp" except 'memb_dim'.}
-
 \item{N.eff}{Effective sample size to be used in the statistical significance
 test. It can be NA (and it will be computed with the s2dv:::.Eno), a 
 numeric (which is used for all cases), or an array with the same dimensions
@@ -95,8 +95,8 @@ effective degrees of freedom to account for the time series' autocorrelation
 }
 \examples{
 exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
-ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50))
 obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
-res <- ResidualCorr(exp, ref, obs, memb_dim = 'member')
+ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50))
+res <- ResidualCorr(exp = exp, obs = obs, ref = ref, memb_dim = 'member')
 
 }
-- 
GitLab


From 026cb55714c2aca94a909ef813bbb000557d4520 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 12 May 2022 17:41:11 +0200
Subject: [PATCH 11/18] Correct conditional statement and unit test

---
 R/ResidualCorr.R                   |  4 +-
 tests/testthat/test-ResidualCorr.R | 76 +++++++++++++++---------------
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index 541cfe8..7428d1b 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -122,8 +122,8 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate',
     name_exp <- name_exp[-which(name_exp == memb_dim)]
     name_ref <- name_ref[-which(name_ref == memb_dim)]
   }
-  if (!identical(length(name_exp), length(name_obs), length(name_ref)) |
-      !identical(dim(exp)[name_exp], dim(obs)[name_obs], dim(ref)[name_ref])) {
+  if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) |
+      any(dim(exp)[name_exp] != dim(obs)[name_obs]) | any(dim(exp)[name_exp] != dim(ref)[name_ref])) {
     stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ",
                 "all dimensions expect 'memb_dim'."))
   }
diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R
index c9ac4b9..c15276c 100644
--- a/tests/testthat/test-ResidualCorr.R
+++ b/tests/testthat/test-ResidualCorr.R
@@ -29,63 +29,63 @@ test_that("1. Input checks", {
   'Parameter "exp" must be a numeric array.'
   )
   expect_error(
-  ResidualCorr(exp1, c('a')),
+  ResidualCorr(exp1, obs1, c('a')),
   'Parameter "ref" must be a numeric array.'
   )
   expect_error(
-  ResidualCorr(exp1, ref1, c()),
+  ResidualCorr(exp1, c()),
   'Parameter "obs" must be a numeric array.'
   )
   # N.eff
   expect_error(
-  ResidualCorr(exp1, ref1, obs1, N.eff = array(1, dim = c(lat = 2, lon = 2))),
+  ResidualCorr(exp1, obs1, ref1, N.eff = array(1, dim = c(lat = 2, lon = 2))),
 'If parameter "N.eff" is provided with an array, it must have the same dimensions as "obs" except "time_dim".'
   )
   expect_error(
-  ResidualCorr(exp1, ref1, obs1, N.eff = 1:3),
+  ResidualCorr(exp1, obs1, ref1, N.eff = 1:3),
   'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".'
   )
   # time_dim
   expect_error(
-  ResidualCorr(exp1, ref1, obs1, time_dim = 1),
+  ResidualCorr(exp1, obs1, ref1, time_dim = 1),
   'Parameter "time_dim" must be a character string.'
   )
   expect_error(
-  ResidualCorr(exp1, ref1, obs1, time_dim = 'time'),
-  "Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension."
+  ResidualCorr(exp1, obs1, ref1, time_dim = 'time'),
+  "Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension."
   )
   # memb_dim
   expect_error(
-  ResidualCorr(exp1, ref1, obs1, memb_dim = TRUE),
+  ResidualCorr(exp1, obs1, ref1, memb_dim = TRUE),
   "Parameter 'memb_dim' must be a character string."
   )
   expect_error(
-  ResidualCorr(exp1, ref1, obs1, memb_dim = 'member'),
+  ResidualCorr(exp1, obs1, ref1, memb_dim = 'member'),
   "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension."
   )
   # exp, ref, and obs (2)
   expect_error(
-  ResidualCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), obs1, memb_dim = 'memb'),
-  "Parameter 'exp', 'ref', and 'obs' must have same length of all dimensions expect 'memb_dim'."
+  ResidualCorr(exp1, obs1, array(1:10, dim = c(sdate = 10, memb = 1)), memb_dim = 'memb'),
+  "Parameter 'exp', 'obs', and 'ref' must have same length of all dimensions expect 'memb_dim'."
   )
   # method
   expect_error(
-  ResidualCorr(exp1, ref1, obs1, method = 'asd', memb_dim = 'memb'),
+  ResidualCorr(exp1, obs1, ref1, method = 'asd', memb_dim = 'memb'),
   'Parameter "method" must be "pearson", "kendall", or "spearman".'
   )
   # alpha
   expect_error(
-  ResidualCorr(exp2, ref2, obs2, alpha = 1),
+  ResidualCorr(exp2, obs2, ref2, alpha = 1),
   'Parameter "alpha" must be NULL or a number between 0 and 1.'
   )
   # handle.na
   expect_error(
-  ResidualCorr(exp2, ref2, obs2, handle.na = TRUE),
+  ResidualCorr(exp2, obs2, ref2, handle.na = TRUE),
   'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".'
   )
   # ncores
   expect_error(
-  ResidualCorr(exp2, ref2, obs2, ncores = 1.5),
+  ResidualCorr(exp2, obs2, ref2, ncores = 1.5),
   'Parameter "ncores" must be either NULL or a positive integer.'
   )
 
@@ -94,69 +94,69 @@ test_that("1. Input checks", {
 ##############################################
 test_that("2. Output checks: dat1", {
 expect_equal(
-names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')),
+names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')),
 c("res.corr", "p.val")
 )
 expect_equal(
-dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr),
+dim(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr),
 c(lat = 3, lon = 2)
 )
 expect_equal(
-dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p.val),
+dim(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$p.val),
 c(lat = 3, lon = 2)
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr),
 c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
 tolerance = 0.0001
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$p),
 c(0.49695468, 0.05446055, 0.25203961, 0.23522967, 0.16960864, 0.10618145),
 tolerance = 0.0001
 )
 expect_equal(
-names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)),
+names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)),
 c("res.corr", "sign")
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$res.corr),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$res.corr),
 c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
 tolerance = 0.0001
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign),
 rep(FALSE, 6)
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$res),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$res),
 c(0.11111111, 0.42222222, -0.20000000, 0.02222222, 0.20000000, 0.42222222),
 tolerance = 0.0001
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$p),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$p),
 c(0.3799615, 0.1120891, 0.2897920, 0.4757064, 0.2897920, 0.1120891),
 tolerance = 0.0001
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$res),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$res),
 c(0.13939394, 0.61212121, -0.27272727, 0.07878788, 0.28484848, 0.47878788),
 tolerance = 0.0001
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$p),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$p),
 c(0.35046594, 0.02998607, 0.22291917, 0.41435870, 0.21251908, 0.08076146),
 tolerance = 0.0001
 )
 
 
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$res),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$res),
 c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
 tolerance = 0.0001
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$p),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$p),
 c(0.49716394, 0.05446055, 0.26690777, 0.23522967, 0.18671025, 0.10618145),
 tolerance = 0.0001
 )
@@ -165,17 +165,17 @@ tolerance = 0.0001
 #---------------------------
 exp1[1] <- NA
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr),
 c(NA, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
 tolerance = 0.0001
 )
 expect_equal(
-as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$res.corr),
+as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$res.corr),
 c(-0.1082588, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
 tolerance = 0.0001
 )
 expect_error(
-ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'na.fail'),
+ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'na.fail'),
 "The data contain NAs."
 )
 
@@ -183,7 +183,7 @@ exp1[1,1,1,1:3] <- NA
 ref1[1,1,1,4:8] <- NA
 obs1[1,1,9:10] <- NA
 expect_error(
-ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets'),
+ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets'),
 "There is no complete set of forecasts and observations."
 )
 
@@ -194,24 +194,24 @@ ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.tri
 
 test_that("3. Output checks: dat2", {
 expect_equal(
-names(ResidualCorr(exp2, ref2, obs2)),
+names(ResidualCorr(exp2, obs2, ref2)),
 c("res.corr", "p.val")
 )
 expect_equal(
-dim(ResidualCorr(exp2, ref2, obs2)$res.corr),
+dim(ResidualCorr(exp2, obs2, ref2)$res.corr),
 NULL
 )
 expect_equal(
-dim(ResidualCorr(exp2, ref2, obs2)$p),
+dim(ResidualCorr(exp2, obs2, ref2)$p),
 NULL
 )
 expect_equal(
-ResidualCorr(exp2, ref2, obs2)$p,
+ResidualCorr(exp2, obs2, ref2)$p,
 0.2209847,
 tolerance = 0.0001
 )
 expect_equal(
-ResidualCorr(exp2, ref2, obs2)$res,
+ResidualCorr(exp2, obs2, ref2)$res,
 -0.274962,
 tolerance = 0.0001
 )
-- 
GitLab


From cb78b52add85f65281c67cbb23a0826402ff3699 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 12 May 2022 18:06:35 +0200
Subject: [PATCH 12/18] Improve RPSS() formatting and efficiency

---
 R/RPS.R                    |  17 ++-
 R/RPSS.R                   | 280 ++++++++++++++++++++++++-------------
 man/RPS.Rd                 |   3 +-
 man/RPSS.Rd                |  85 ++++++-----
 tests/testthat/test-RPS.R  |   2 +-
 tests/testthat/test-RPSS.R | 272 +++++++++++++++++++++++++++++++++++
 6 files changed, 516 insertions(+), 143 deletions(-)
 create mode 100644 tests/testthat/test-RPSS.R

diff --git a/R/RPS.R b/R/RPS.R
index 744b9c7..7b4af7c 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -14,7 +14,8 @@
 #'@param exp A named numerical array of the forecast with at least time 
 #'  dimension.  
 #'@param obs A named numerical array of the observation with at least time 
-#'  dimension. The dimensions must be the same as 'exp' except 'memb_dim'.
+#'  dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
+#'  have 'memb_dim'.
 #'@param time_dim A character string indicating the name of the time dimension.
 #'  The default value is 'sdate'.
 #'@param memb_dim A character string indicating the name of the member dimension
@@ -80,7 +81,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   if (!identical(length(name_exp), length(name_obs)) |
       !identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
     stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
-                "all dimensions expect 'memb_dim'."))
+                "all dimensions expect 'obs' doesn't have 'memb_dim'."))
   }
   ## prob_thresholds
   if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) |
@@ -117,16 +118,21 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   rps <- Apply(data = list(exp = exp, obs = obs), 
                target_dims = list(exp = c(time_dim, memb_dim), 
                                   obs = time_dim),
+               output_dims = time_dim,
                fun = .RPS,
                prob_thresholds = prob_thresholds, 
                indices_for_clim = indices_for_clim, Fair = Fair,
                ncores = ncores)$output1
-  
+
+  # Return only the mean RPS
+  rps <- MeanDims(rps, time_dim, na.rm = FALSE)
+ 
   return(rps)
 }
 
 
-.RPS <- function(exp, obs, prob_thresholds, indices_for_clim, Fair) {
+.RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), 
+                 indices_for_clim = NULL, Fair = FALSE) {
   # exp: [sdate, memb]
   # obs: [sdate]
 
@@ -151,9 +157,6 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
     rps <- rps + adjustment
   }
 
-  # Return only the mean RPS
-  rps <- mean(rps, na.rm = FALSE)
-
   return(rps)
 }
 
diff --git a/R/RPSS.R b/R/RPSS.R
index f7dc80d..1498456 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -1,131 +1,215 @@
 #'Compute the Ranked Probability Skill Score
 #'
-#'The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7)
-#'is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether
-#'a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between
-#'minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the 
-#'reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts
-#'are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version,
-#'and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based
-#'on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1)
+#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score 
+#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to 
+#'assess whether a forecast presents an improvement or worsening with respect to
+#'a reference forecast. The RPSS ranges between minus infinite and 1. If the 
+#'RPSS is positive, it indicates that the forecast has higher skill than the 
+#'reference forecast, while a negative value means that it has a lower skill. 
+#'Examples of reference forecasts are the climatological forecast (same 
+#'probabilities for all categories for all time steps), persistence, a previous
+#'model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref.
+#'The statistical significance is obtained based on a Random Walk test at the 
+#'95% confidence level (DelSole and Tippett, 2016).
 #'
-#'@param exp A numerical array with the forecast with, at least, time dimension. 
-#'@param obs A numerical array with the observations with, at least, time dimension. 
-#'  The length of the time dimension must be the same as for "exp".
-#'@param ref NULL or a numerical array with the reference forecast with, at least, time dimension. 
-#'  The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is
-#'  used as reference forecast. The default value is NULL.
-#'@param prob_thresholds A numeric vector with the relative thresholds (between 0 and 1) between the categories. 
-#'  The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.
-#'@param indices_for_clim A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
-#'  between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.
-#'@param Fair A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
-#'an infinite ensemble size). The default value is FALSE.
+#'@param exp A named numerical array of the forecast with at least time 
+#'  dimension.
+#'@param obs A named numerical array of the observation with at least time 
+#'  dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
+#'  have 'memb_dim'.
+#'@param ref A named numerical array of the reference forecast data with at 
+#'  least time dimension. The dimensions must be the same as 'exp' except 
+#'  'memb_dim'. If it is NULL, the climatological forecast is used as reference
+#'  forecast. The default value is NULL.
 #'@param time_dim A character string indicating the name of the time dimension.
-#'  The default value is 'year'.
-#'@param member_dim A character string indicating the name of the member dimension to compute the 
-#'  ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
-#'  be provided directly to the function. The default value is 'member'.
+#'  The default value is 'sdate'.
+#'@param memb_dim A character string indicating the name of the member dimension
+#'  to compute the probabilities of the forecast and the reference forecast. The
+#'  default value is 'member'.
+#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to
+#'  1) between the categories. The default value is c(1/3, 2/3), which 
+#'  corresponds to tercile equiprobable categories.
+#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' 
+#'  for computing the thresholds between the probabilistic categories. If NULL,
+#'  the whole period is used. The default value is NULL.
+#'@param Fair A logical indicating whether to compute the FairRPSS (the 
+#'  potential RPSS that the forecast would have with an infinite ensemble size).
+#'  The default value is FALSE.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
 #'@return
 #'\item{$rpss}{
-#'  A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" 
-#'  and "member_dim" dimensions.
+#'  A numerical array of the RPSS with the same dimensions as "exp" except the
+#'  'time_dim' and 'memb_dim' dimensions.
 #'}
 #'\item{$sign}{
-#'  A logical array with the statistical significance of the RPSS with the same dimensions 
-#'  as "exp" except the "time_dim" and "member_dim" dimensions.
+#'  A logical array of the statistical significance of the RPSS with the same
+#'  dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions.
 #'}
-
+#'
+#'@references 
+#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7
+#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1
+#'
 #'@examples
-#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
-#' RPSS(exp = exp, obs = obs) ## climatology as reference forecast
-#' RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast
+#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
+#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast
+#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast
 #'
 #'@import multiApply
 #'@export
-RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), indices_for_clim = NULL, Fair = FALSE, time_dim = 'year', member_dim = 'member', ncores = NULL){
-  
-  ## Checkings
-  if (!is.array(exp)){stop('"exp" must be an array.')}
-  if (!is.array(obs)){stop('"obs" must be an array.')}
-  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
-  if (!is.null(ref)){
-    if (!is.array(ref)){stop('"ref" must be an array.')}
-    if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')}
+RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
+                 prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, 
+                 ncores = NULL){
+ 
+  # Check inputs
+  ## exp, obs, and ref (1)
+  if (!is.array(exp) | !is.numeric(exp))
+    stop('Parameter "exp" must be a numeric array.')
+  if (!is.array(obs) | !is.numeric(obs))
+    stop('Parameter "obs" must be a numeric array.')
+  if (!is.null(ref)) {
+    if (!is.array(ref) | !is.numeric(ref))
+      stop('Parameter "ref" must be a numeric array.')
   }
-  if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)){
-    stop('"prob_thresholds" must be a numeric vector with the relative thresholds (from 0 to 1) between the categories.')
+  ## time_dim
+  if (!is.character(time_dim) | length(time_dim) != 1)
+    stop('Parameter "time_dim" must be a character string.')
+  if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) {
+    stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
   }
-  if (is.null(indices_for_clim)){
-    indices_for_clim <- 1:as.numeric(dim(obs)[time_dim])
-  } else if (!is.numeric(indices_for_clim)){
-    stop('"indices_for_clim" must be NULL or a numeric vector.')
+  if (!is.null(ref) & !time_dim %in% names(dim(ref))) {
+    stop("Parameter 'time_dim' is not found in 'ref' dimension.")
   }
-  if (!is.logical(Fair)){stop('"Fair" must be TRUE or FALSE.')}
-  if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
-  if (!is.character(member_dim)){stop('"member_dim" must be a string.')}
-  if (!is.null(ncores)){
-    if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
-  } 
-  
-  if (!is.null(ref)){ ## using "ref" as reference forecast
-    output <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), 
-                                target_dims = list(exp = c(time_dim, member_dim), obs = time_dim, ref = c(time_dim, member_dim)),
-                                prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
-                                Fair = Fair, time_dim = time_dim, member_dim = member_dim,
-                                fun = .RPSS, ncores = ncores)
-  } else { ## using climatology as reference forecast
-    output <- multiApply::Apply(data = list(exp = exp, obs = obs), 
-                                target_dims = list(exp = c(time_dim, member_dim), obs = time_dim),
-                                ref = ref, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim,
-                                Fair = Fair, time_dim = time_dim, member_dim = member_dim,
-                                fun = .RPSS, ncores = ncores)
+  ## memb_dim
+  if (!is.character(memb_dim) | length(memb_dim) > 1) {
+    stop("Parameter 'memb_dim' must be a character string.")
+  }
+  if (!memb_dim %in% names(dim(exp))) {
+    stop("Parameter 'memb_dim' is not found in 'exp' dimension.")
+  }
+  if (!is.null(ref) & !memb_dim %in% names(dim(ref))) {
+    stop("Parameter 'memb_dim' is not found in 'ref' dimension.")
+  }
+  ## exp and obs (2)
+  name_exp <- sort(names(dim(exp)))
+  name_obs <- sort(names(dim(obs)))
+  name_exp <- name_exp[-which(name_exp == memb_dim)]
+  if (!identical(length(name_exp), length(name_obs)) |
+      !identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
+    stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
+                "all dimensions expect 'obs' doesn't have 'memb_dim'."))
+  }
+  if (!is.null(ref)) {
+    name_ref <- sort(names(dim(ref)))
+    name_ref <- name_ref[-which(name_ref == memb_dim)]
+    if (!identical(length(name_exp), length(name_ref)) |
+        !identical(dim(exp)[name_exp], dim(ref)[name_ref])) {
+      stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
+                  "all dimensions expect 'memb_dim'."))
+    }
+  }
+  ## prob_thresholds
+  if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) |
+      any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) {
+    stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.")
+  }
+  ## indices_for_clim
+  if (is.null(indices_for_clim)) {
+    indices_for_clim <- 1:dim(obs)[time_dim]
+  } else {
+    if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) {
+      stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.")
+    } else if (length(indices_for_clim) > dim(obs)[time_dim] |
+               max(indices_for_clim) > dim(obs)[time_dim] |
+               any(indices_for_clim) < 1) {
+      stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.")
+    }
+  }
+  ## Fair
+  if (!is.logical(Fair)  | length(Fair) > 1) {
+    stop("Parameter 'Fair' must be either TRUE or FALSE.")
+  }
+  ## ncores
+  if (!is.null(ncores)) {
+    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
+      length(ncores) > 1) {
+      stop("Parameter 'ncores' must be either NULL or a positive integer.")
+    }
   }
+
+  ###############################
+
+  # Compute RPSS
+  if (!is.null(ref)) { # use "ref" as reference forecast
+    data <- list(exp = exp, obs = obs, ref = ref)
+    target_dims = list(exp = c(time_dim, memb_dim),
+                       obs = time_dim,
+                       ref = c(time_dim, memb_dim))
+  } else {
+    data <- list(exp = exp, obs = obs)
+    target_dims = list(exp = c(time_dim, memb_dim),
+                       obs = time_dim)
+  }
+  output <- Apply(data,
+                  target_dims = target_dims,
+                  fun = .RPSS,
+                  prob_thresholds = prob_thresholds,
+                  indices_for_clim = indices_for_clim, Fair = Fair,
+                  ncores = ncores)
   
   return(output)
 }
 
-.RPSS <- function(exp, obs, ref, prob_thresholds, indices_for_clim, Fair, time_dim, member_dim){
-  
-  ## RPS of the forecast
-  rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
+.RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), 
+                  indices_for_clim = NULL, Fair = FALSE) {
+  # exp: [sdate, memb]
+  # obs: [sdate]
+  # ref: [sdate, memb] or NULL
+
+  # RPS of the forecast
+  rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, 
+                  indices_for_clim = indices_for_clim, Fair = Fair)
   
-  ## RPS of the reference forecast
-  if (is.null(ref)){ ## using climatology as reference forecast
-    obs_probs <- .get_probs(data = obs, type = 'obs', indices_for_quantiles = indices_for_clim, time_dim = time_dim, member_dim = NULL, prob_thresholds = prob_thresholds)
-    clim_probs <- .get_probs_clim(prob_thresholds = prob_thresholds, dims = dim(obs_probs), time_dim = time_dim)
-    rps_ref <- multiApply::Apply(data = list(probs_exp = clim_probs, probs_obs = obs_probs), target_dims = 'bin',
-                                 fun = .RPS_from_probs, n_categories = length(prob_thresholds)+1, Fair = FALSE, ncores = NULL)$output1
-  } else { ## using "ref" as reference forecast
-    rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, time_dim = time_dim, member_dim = member_dim)
+  # RPS of the reference forecast
+  if (is.null(ref)) { ## using climatology as reference forecast
+    obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, 
+                            prob_thresholds = prob_thresholds)
+    # obs_probs: [bin, sdate] 
+
+    clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)])
+    clim_probs <- array(clim_probs, dim = dim(obs_probs))
+    # clim_probs: [bin, sdate]  
+
+    # Calculate RPS for each time step
+    probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
+    probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
+    rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum)
+    # rps_ref: [sdate]
+
+#    if (Fair) { # FairRPS
+#      ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+#      R <- dim(exp)[2]  #memb
+#      R_new <- Inf
+#      adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
+#      adjustment <- apply(adjustment, 2, sum)
+#      rps_ref <- rps_ref + adjustment
+#    }
+
+  } else { # use "ref" as reference forecast
+    rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, 
+                    indices_for_clim = indices_for_clim, Fair = Fair)
   }
   
-  ## RPSS
+  # RPSS
   rpss <- 1 - mean(rps_exp) / mean(rps_ref)
   
-  ## Significance
-  rps_exp <- array(data = rps_exp, dim = c(time = length(rps_exp)))
-  rps_ref <- array(data = rps_ref, dim = c(time = length(rps_ref)))
-  sign <- s2dv::RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, time_dim = 'time', ncores = NULL)$signif
+  # Significance
+  sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref)$signif
   
   return(list(rpss = rpss, sign = sign))
 }
 
-.get_probs_clim <- function(prob_thresholds, dims, time_dim){
-  
-  probs <- array(data = NA, dim = c(bin = length(prob_thresholds)+1))
-  probs[1] <- prob_thresholds[1]
-  if (length(prob_thresholds) > 1){
-    probs[2:(length(probs)-1)] <- diff(prob_thresholds)
-  }
-  probs[length(probs)] <- 1 - prob_thresholds[length(prob_thresholds)]
-  stopifnot(identical(sum(probs),1))
-  probs <- s2dv::InsertDim(data = probs, posdim = 2, lendim = dims[[time_dim]], name = time_dim)
-  
-  return(probs)
-}
diff --git a/man/RPS.Rd b/man/RPS.Rd
index d1acc02..cbb0e7a 100644
--- a/man/RPS.Rd
+++ b/man/RPS.Rd
@@ -20,7 +20,8 @@ RPS(
 dimension.}
 
 \item{obs}{A named numerical array of the observation with at least time 
-dimension. The dimensions must be the same as 'exp' except 'memb_dim'.}
+dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
+have 'memb_dim'.}
 
 \item{time_dim}{A character string indicating the name of the time dimension.
 The default value is 'sdate'.}
diff --git a/man/RPSS.Rd b/man/RPSS.Rd
index 47136f7..6871d80 100644
--- a/man/RPSS.Rd
+++ b/man/RPSS.Rd
@@ -8,68 +8,81 @@ RPSS(
   exp,
   obs,
   ref = NULL,
+  time_dim = "sdate",
+  memb_dim = "member",
   prob_thresholds = c(1/3, 2/3),
   indices_for_clim = NULL,
   Fair = FALSE,
-  time_dim = "year",
-  member_dim = "member",
   ncores = NULL
 )
 }
 \arguments{
-\item{exp}{A numerical array with the forecast with, at least, time dimension.}
+\item{exp}{A named numerical array of the forecast with at least time 
+dimension.}
 
-\item{obs}{A numerical array with the observations with, at least, time dimension. 
-The length of the time dimension must be the same as for "exp".}
+\item{obs}{A named numerical array of the observation with at least time 
+dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
+have 'memb_dim'.}
 
-\item{ref}{NULL or a numerical array with the reference forecast with, at least, time dimension. 
-The length of the time dimension must be the same as for "exp". If it is NULL, the climatological forecast is
-used as reference forecast. The default value is NULL.}
+\item{ref}{A named numerical array of the reference forecast data with at 
+least time dimension. The dimensions must be the same as 'exp' except 
+'memb_dim'. If it is NULL, the climatological forecast is used as reference
+forecast. The default value is NULL.}
 
-\item{prob_thresholds}{A numeric vector with the relative thresholds (between 0 and 1) between the categories. 
-The default value is c(1/3,2/3), which corresponds to tercile equiprobable categories.}
+\item{time_dim}{A character string indicating the name of the time dimension.
+The default value is 'sdate'.}
 
-\item{indices_for_clim}{A vector with the indices to be taken along "time_dim" dimension for computing the thresholds
-between the probabilistic categories. If NULL, the whole period is used. The default value is NULL.}
+\item{memb_dim}{A character string indicating the name of the member dimension
+to compute the probabilities of the forecast and the reference forecast. The
+default value is 'member'.}
 
-\item{Fair}{A logical indicating whether to compute the FairRPSS (the potential RPSS that the forecast would have with 
-an infinite ensemble size). The default value is FALSE.}
+\item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to
+1) between the categories. The default value is c(1/3, 2/3), which 
+corresponds to tercile equiprobable categories.}
 
-\item{time_dim}{A character string indicating the name of the time dimension.
-The default value is 'year'.}
+\item{indices_for_clim}{A vector of the indices to be taken along 'time_dim' 
+for computing the thresholds between the probabilistic categories. If NULL,
+the whole period is used. The default value is NULL.}
 
-\item{member_dim}{A character string indicating the name of the member dimension to compute the 
-ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
-be provided directly to the function. The default value is 'member'.}
+\item{Fair}{A logical indicating whether to compute the FairRPSS (the 
+potential RPSS that the forecast would have with an infinite ensemble size).
+The default value is FALSE.}
 
 \item{ncores}{An integer indicating the number of cores to use for parallel 
 computation. The default value is NULL.}
 }
 \value{
 \item{$rpss}{
- A numerical array with the RPSS with the same dimensions as "exp" except the "time_dim" 
- and "member_dim" dimensions.
+ A numerical array of the RPSS with the same dimensions as "exp" except the
+ 'time_dim' and 'memb_dim' dimensions.
 }
 \item{$sign}{
- A logical array with the statistical significance of the RPSS with the same dimensions 
- as "exp" except the "time_dim" and "member_dim" dimensions.
+ A logical array of the statistical significance of the RPSS with the same
+ dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions.
 }
 }
 \description{
-The Ranked Probability Skill Score (RPSS; Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7)
-is the skill score based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether
-a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between
-minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the 
-reference forecast, while a negative value means that it has a lower skill. Examples of reference forecasts
-are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version,
-and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. The statistical significance is obtained based
-on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1)
+The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score 
+based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to 
+assess whether a forecast presents an improvement or worsening with respect to
+a reference forecast. The RPSS ranges between minus infinite and 1. If the 
+RPSS is positive, it indicates that the forecast has higher skill than the 
+reference forecast, while a negative value means that it has a lower skill. 
+Examples of reference forecasts are the climatological forecast (same 
+probabilities for all categories for all time steps), persistence, a previous
+model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref.
+The statistical significance is obtained based on a Random Walk test at the 
+95% confidence level (DelSole and Tippett, 2016).
 }
 \examples{
-exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
-RPSS(exp = exp, obs = obs) ## climatology as reference forecast
-RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast
+exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
+ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast
+res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast
 
 }
+\references{
+Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7
+DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1
+}
diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R
index 7b1f77e..00f89dc 100644
--- a/tests/testthat/test-RPS.R
+++ b/tests/testthat/test-RPS.R
@@ -49,7 +49,7 @@ test_that("1. Input checks", {
   # exp, ref, and obs (2)
   expect_error(
   RPS(exp1, array(1:9, dim = c(sdate = 9))),
-  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'."
+  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'."
   )
   # prob_thresholds
   expect_error(
diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R
new file mode 100644
index 0000000..88fa18e
--- /dev/null
+++ b/tests/testthat/test-RPSS.R
@@ -0,0 +1,272 @@
+context("s2dv::RPSS tests")
+
+##############################################
+
+# dat1
+set.seed(1)
+exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2))
+set.seed(2)
+obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2))
+set.seed(3)
+ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2))
+
+# dat2
+set.seed(1)
+exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
+set.seed(2)
+obs2 <- array(rnorm(10), dim = c(sdate = 10))
+set.seed(3)
+ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
+
+
+##############################################
+test_that("1. Input checks", {
+  # exp and obs (1)
+  expect_error(
+  RPSS(c()),
+  'Parameter "exp" must be a numeric array.'
+  )
+  expect_error(
+  RPSS(exp1, c()),
+  'Parameter "obs" must be a numeric array.'
+  )
+
+  # time_dim
+  expect_error(
+  RPSS(exp1, obs1, time_dim = 1),
+  'Parameter "time_dim" must be a character string.'
+  )
+  expect_error(
+  RPSS(exp1, obs1, time_dim = 'time'),
+  "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension."
+  )
+  expect_error(
+  RPSS(exp2, obs2, array(rnorm(20), dim = c(member = 2, time = 10))),
+  "Parameter 'time_dim' is not found in 'ref' dimension."
+  )
+  # memb_dim
+  expect_error(
+  RPSS(exp1, obs1, memb_dim = TRUE),
+  "Parameter 'memb_dim' must be a character string."
+  )
+  expect_error(
+  RPSS(exp1, obs1, memb_dim = 'memb'),
+  "Parameter 'memb_dim' is not found in 'exp' dimension."
+  )
+  expect_error(
+  RPSS(exp2, obs2, array(rnorm(20), dim = c(memb = 2, sdate = 10))),
+  "Parameter 'memb_dim' is not found in 'ref' dimension."
+  )
+  # exp, ref, and obs (2)
+  expect_error(
+  RPSS(exp1, array(1:9, dim = c(sdate = 9))),
+  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'."
+  )
+  expect_error(
+  RPSS(exp1, obs1, ref2),
+  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'."
+  )
+  # prob_thresholds
+  expect_error(
+  RPSS(exp1, obs1, ref1, prob_thresholds = 1),
+  "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1."
+  )
+  # indices_for_clim
+  expect_error(
+  RPSS(exp1, obs1, ref1, indices_for_clim = array(1:6, dim = c(2, 3))),
+  "Parameter 'indices_for_clim' must be NULL or a numeric vector."
+  )
+  expect_error(
+  RPSS(exp1, obs1, indices_for_clim = 3:11),
+  "Parameter 'indices_for_clim' should be the indices of 'time_dim'."
+  )
+  # Fair
+  expect_error(
+  RPSS(exp1, obs1, Fair = 1),
+  "Parameter 'Fair' must be either TRUE or FALSE."
+  )
+  # ncores
+  expect_error(
+  RPSS(exp2, obs2, ncores = 1.5),
+  "Parameter 'ncores' must be either NULL or a positive integer."
+  )
+
+})
+
+##############################################
+test_that("2. Output checks: dat1", {
+
+expect_equal(
+names(RPSS(exp1, obs1)),
+c("rpss", "sign")
+)
+expect_equal(
+names(RPSS(exp1, obs1, ref1)),
+c("rpss", "sign")
+)
+expect_equal(
+dim(RPSS(exp1, obs1)$rpss),
+c(lat = 2)
+)
+expect_equal(
+dim(RPSS(exp1, obs1)$sign),
+c(lat = 2)
+)
+expect_equal(
+dim(RPSS(exp1, obs1, ref1)$rpss),
+c(lat = 2)
+)
+expect_equal(
+dim(RPSS(exp1, obs1, ref1)$sign),
+c(lat = 2)
+)
+# ref = NULL
+expect_equal(
+as.vector(RPSS(exp1, obs1)$rpss),
+c(0.15789474, -0.05263158),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1)$sign),
+c(FALSE, FALSE),
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, Fair = T)$rpss),
+c(0.5263158, 0.3684211),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, indices_for_clim = 3:5)$rpss),
+c(-0.4062500, -0.1052632),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss),
+c(0.03030303, -0.14478114),
+tolerance = 0.0001
+)
+# ref = ref
+expect_equal(
+as.vector(RPSS(exp1, obs1, ref1)$rpss),
+c(0.5259259, 0.4771242),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, ref1)$sign),
+c(FALSE, FALSE)
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, ref1, Fair = T)$rpss),
+c(0.6000000, 0.6190476),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, ref1, indices_for_clim = 3:5)$rpss),
+c(0.2857143, 0.4166667),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, ref1, indices_for_clim = 3:5)$sign),
+c(FALSE, FALSE)
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, ref1, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss),
+c(0.4754098, 0.3703704),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp1, obs1, ref1, prob_thresholds = seq(0.1, 0.9, 0.1))$sign),
+c(T, F)
+)
+
+})
+
+##############################################
+test_that("3. Output checks: dat2", {
+
+expect_equal(
+names(RPSS(exp2, obs2)),
+c("rpss", "sign")
+)
+expect_equal(
+names(RPSS(exp2, obs2, ref2)),
+c("rpss", "sign")
+)
+expect_equal(
+dim(RPSS(exp2, obs2)$rpss),
+NULL
+)
+expect_equal(
+dim(RPSS(exp2, obs2)$sign),
+NULL
+)
+expect_equal(
+dim(RPSS(exp2, obs2, ref2)$rpss),
+NULL
+)
+expect_equal(
+dim(RPSS(exp2, obs2, ref2)$sign),
+NULL
+)
+# ref = NULL
+expect_equal(
+as.vector(RPSS(exp2, obs2)$rpss),
+c(-0.7763158),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2)$sign),
+FALSE,
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, Fair = T)$rpss),
+c(-0.1842105),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, indices_for_clim = 3:5)$rpss),
+-0.8984375,
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss),
+c(-0.7272727),
+tolerance = 0.0001
+)
+# ref = ref
+expect_equal(
+as.vector(RPSS(exp2, obs2, ref2)$rpss),
+0,
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, ref2)$sign),
+FALSE
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, ref2, Fair = T)$rpss),
+0,
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, ref2, indices_for_clim = 3:5)$rpss),
+0.03571429,
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, ref2, indices_for_clim = 3:5)$sign),
+FALSE
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss),
+0.06557377,
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$sign),
+FALSE
+)
+
+
+
+})
-- 
GitLab


From 8c50012f7c8b1f9d33370cd4a67d509a5168ed43 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 12 May 2022 18:22:36 +0200
Subject: [PATCH 13/18] Move easyVerification to Import

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f2577bf..9690f44 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -37,9 +37,9 @@ Imports:
     NbClust,
     multiApply (>= 2.1.1),
     SpecsVerification (>= 0.5.0),
-    easyNCDF
+    easyNCDF,
+    easyVerification
 Suggests:
-    easyVerification,
     testthat
 License: Apache License 2.0
 URL: https://earth.bsc.es/gitlab/es/s2dv/
-- 
GitLab


From ed5a8f3ef529fda86c4baa856afd6cec456ddeb9 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 16 May 2022 15:35:33 +0200
Subject: [PATCH 14/18] Allow obs to have memb_dim

---
 R/RPS.R                    | 22 +++++++++++++++-------
 R/RPSS.R                   | 22 +++++++++++++++-------
 man/RPS.Rd                 |  3 +--
 man/RPSS.Rd                |  3 +--
 tests/testthat/test-RPS.R  | 13 +++++++++++--
 tests/testthat/test-RPSS.R | 10 +++++++---
 6 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 7b4af7c..84956ac 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -14,8 +14,7 @@
 #'@param exp A named numerical array of the forecast with at least time 
 #'  dimension.  
 #'@param obs A named numerical array of the observation with at least time 
-#'  dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
-#'  have 'memb_dim'.
+#'  dimension. The dimensions must be the same as 'exp' except 'memb_dim'.
 #'@param time_dim A character string indicating the name of the time dimension.
 #'  The default value is 'sdate'.
 #'@param memb_dim A character string indicating the name of the member dimension
@@ -78,10 +77,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   name_exp <- sort(names(dim(exp)))
   name_obs <- sort(names(dim(obs)))
   name_exp <- name_exp[-which(name_exp == memb_dim)]
+  if (memb_dim %in% name_obs) {
+    name_obs <- name_obs[-which(name_obs == memb_dim)]
+  }
   if (!identical(length(name_exp), length(name_obs)) |
       !identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
     stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
-                "all dimensions expect 'obs' doesn't have 'memb_dim'."))
+                "all dimensions expect 'memb_dim'."))
   }
   ## prob_thresholds
   if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) |
@@ -115,9 +117,15 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   ###############################
  
   # Compute RPS
+  if (!memb_dim %in% names(dim(obs))) {
+    target_dims_obs <- time_dim
+  } else {
+    target_dims_obs <- c(time_dim, memb_dim)
+  }
+
   rps <- Apply(data = list(exp = exp, obs = obs), 
                target_dims = list(exp = c(time_dim, memb_dim), 
-                                  obs = time_dim),
+                                  obs = target_dims_obs),
                output_dims = time_dim,
                fun = .RPS,
                prob_thresholds = prob_thresholds, 
@@ -134,7 +142,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
 .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), 
                  indices_for_clim = NULL, Fair = FALSE) {
   # exp: [sdate, memb]
-  # obs: [sdate]
+  # obs: [sdate, (memb)]
 
   exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, 
                           prob_thresholds = prob_thresholds)
@@ -162,9 +170,9 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
 
 .get_probs <- function(data, indices_for_quantiles, prob_thresholds) {
   # if exp: [sdate, memb]
-  # if obs: [sdate]
+  # if obs: [sdate, (memb)]
 
-  # Add dim [memb = 1] to obs
+  # Add dim [memb = 1] to obs if it doesn't have memb_dim
   if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1)
 
   # Absolute thresholds
diff --git a/R/RPSS.R b/R/RPSS.R
index 1498456..04c3137 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -15,8 +15,7 @@
 #'@param exp A named numerical array of the forecast with at least time 
 #'  dimension.
 #'@param obs A named numerical array of the observation with at least time 
-#'  dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
-#'  have 'memb_dim'.
+#'  dimension. The dimensions must be the same as 'exp' except 'memb_dim'. 
 #'@param ref A named numerical array of the reference forecast data with at 
 #'  least time dimension. The dimensions must be the same as 'exp' except 
 #'  'memb_dim'. If it is NULL, the climatological forecast is used as reference
@@ -63,7 +62,7 @@
 #'@export
 RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
                  prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, 
-                 ncores = NULL){
+                 ncores = NULL) {
  
   # Check inputs
   ## exp, obs, and ref (1)
@@ -98,10 +97,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
   name_exp <- sort(names(dim(exp)))
   name_obs <- sort(names(dim(obs)))
   name_exp <- name_exp[-which(name_exp == memb_dim)]
+  if (memb_dim %in% name_obs) {
+    name_obs <- name_obs[-which(name_obs == memb_dim)]
+  }
   if (!identical(length(name_exp), length(name_obs)) |
       !identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
     stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
-                "all dimensions expect 'obs' doesn't have 'memb_dim'."))
+                "all dimensions expect 'memb_dim'."))
   }
   if (!is.null(ref)) {
     name_ref <- sort(names(dim(ref)))
@@ -144,15 +146,21 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
   ###############################
 
   # Compute RPSS
+  if (!memb_dim %in% names(dim(obs))) {
+    target_dims_obs <- time_dim
+  } else {
+    target_dims_obs <- c(time_dim, memb_dim)
+  }
+
   if (!is.null(ref)) { # use "ref" as reference forecast
     data <- list(exp = exp, obs = obs, ref = ref)
     target_dims = list(exp = c(time_dim, memb_dim),
-                       obs = time_dim,
+                       obs = target_dims_obs,
                        ref = c(time_dim, memb_dim))
   } else {
     data <- list(exp = exp, obs = obs)
     target_dims = list(exp = c(time_dim, memb_dim),
-                       obs = time_dim)
+                       obs = target_dims_obs)
   }
   output <- Apply(data,
                   target_dims = target_dims,
@@ -167,7 +175,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
 .RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), 
                   indices_for_clim = NULL, Fair = FALSE) {
   # exp: [sdate, memb]
-  # obs: [sdate]
+  # obs: [sdate, (memb)]
   # ref: [sdate, memb] or NULL
 
   # RPS of the forecast
diff --git a/man/RPS.Rd b/man/RPS.Rd
index cbb0e7a..d1acc02 100644
--- a/man/RPS.Rd
+++ b/man/RPS.Rd
@@ -20,8 +20,7 @@ RPS(
 dimension.}
 
 \item{obs}{A named numerical array of the observation with at least time 
-dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
-have 'memb_dim'.}
+dimension. The dimensions must be the same as 'exp' except 'memb_dim'.}
 
 \item{time_dim}{A character string indicating the name of the time dimension.
 The default value is 'sdate'.}
diff --git a/man/RPSS.Rd b/man/RPSS.Rd
index 6871d80..6324ea2 100644
--- a/man/RPSS.Rd
+++ b/man/RPSS.Rd
@@ -21,8 +21,7 @@ RPSS(
 dimension.}
 
 \item{obs}{A named numerical array of the observation with at least time 
-dimension. The dimensions must be the same as 'exp' except 'obs' doesn't 
-have 'memb_dim'.}
+dimension. The dimensions must be the same as 'exp' except 'memb_dim'.}
 
 \item{ref}{A named numerical array of the reference forecast data with at 
 least time dimension. The dimensions must be the same as 'exp' except 
diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R
index 00f89dc..e5a5f0c 100644
--- a/tests/testthat/test-RPS.R
+++ b/tests/testthat/test-RPS.R
@@ -14,6 +14,12 @@ exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
 set.seed(2)
 obs2 <- array(rnorm(10), dim = c(sdate = 10))
 
+# dat2_1
+set.seed(1)
+exp2_1 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
+set.seed(2)
+obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10))
+
 
 
 ##############################################
@@ -49,7 +55,7 @@ test_that("1. Input checks", {
   # exp, ref, and obs (2)
   expect_error(
   RPS(exp1, array(1:9, dim = c(sdate = 9))),
-  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'."
+  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'."
   )
   # prob_thresholds
   expect_error(
@@ -125,5 +131,8 @@ RPS(exp2, obs2, indices_for_clim = 2:5, prob_thresholds = seq(0.1, 0.9, 0.1)),
 2.75,
 tolerance = 0.0001
 )
-
+expect_equal(
+RPS(exp2, obs2),
+RPS(exp2_1, obs2_1)
+)
 })
diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R
index 88fa18e..b4c0b7a 100644
--- a/tests/testthat/test-RPSS.R
+++ b/tests/testthat/test-RPSS.R
@@ -15,6 +15,8 @@ set.seed(1)
 exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
 set.seed(2)
 obs2 <- array(rnorm(10), dim = c(sdate = 10))
+set.seed(2)
+obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10))
 set.seed(3)
 ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
 
@@ -60,7 +62,7 @@ test_that("1. Input checks", {
   # exp, ref, and obs (2)
   expect_error(
   RPSS(exp1, array(1:9, dim = c(sdate = 9))),
-  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'obs' doesn't have 'memb_dim'."
+  "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'."
   )
   expect_error(
   RPSS(exp1, obs1, ref2),
@@ -266,7 +268,9 @@ expect_equal(
 as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$sign),
 FALSE
 )
-
-
+expect_equal(
+RPSS(exp2, obs2),
+RPSS(exp2, obs2_1)
+)
 
 })
-- 
GitLab


From 269b8791c70b05b24d310c406c1794b29cdfa95b Mon Sep 17 00:00:00 2001
From: jcos <josep.cos@bsc.es>
Date: Wed, 18 May 2022 18:04:46 +0200
Subject: [PATCH 15/18] first attempt at weighted quantile and probs

---
 R/RPS.R | 52 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 84956ac..b2065cd 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -48,7 +48,7 @@
 #'@export
 RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', 
                 prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, 
-                ncores = NULL) {
+                ncores = NULL, weights = NULL) {
   
   # Check inputs
   ## exp and obs (1)
@@ -130,7 +130,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
                fun = .RPS,
                prob_thresholds = prob_thresholds, 
                indices_for_clim = indices_for_clim, Fair = Fair,
-               ncores = ncores)$output1
+               ncores = ncores, weights = weights)$output1
 
   # Return only the mean RPS
   rps <- MeanDims(rps, time_dim, na.rm = FALSE)
@@ -140,12 +140,12 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
 
 
 .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), 
-                 indices_for_clim = NULL, Fair = FALSE) {
+                 indices_for_clim = NULL, Fair = FALSE, weights = NULL) {
   # exp: [sdate, memb]
   # obs: [sdate, (memb)]
 
   exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, 
-                          prob_thresholds = prob_thresholds)
+                          prob_thresholds = prob_thresholds, weights = weights)
   # exp_probs: [bin, sdate] 
   obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, 
                           prob_thresholds = prob_thresholds)
@@ -168,7 +168,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   return(rps)
 }
 
-.get_probs <- function(data, indices_for_quantiles, prob_thresholds) {
+.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weigths = NULL) {
   # if exp: [sdate, memb]
   # if obs: [sdate, (memb)]
 
@@ -176,18 +176,50 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1)
 
   # Absolute thresholds
-  quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE)
-
+  if (is.null(weights)){
+    quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE)
+  } else {
+    # linear approach to compute weighted distribution thresholds.
+    sorted_data, sorted_weights <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ])
+    # approach 1 (interpolate from discrete cumulative weights distribution. NOT type 8)
+    cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights
+    cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0
+    cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1
+    quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic")
+    # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution)
+    fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use
+    predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
+    quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic")
+  }
   # Probabilities
   probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate]
   for (i_time in 1:dim(data)[1]) {
-    if (anyNA(data[i_time, ])) {
-      probs[, i_time] <- rep(NA, dim = length(quantiles) + 1)
+    if (is.null(weights)){
+      if (anyNA(data[i_time, ])) {
+        probs[, i_time] <- rep(NA, dim = length(quantiles) + 1)
+      } else {
+        probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles))
+      }
     } else {
-      probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles))
+      sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ])
+
+      fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10))
+      predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
+      integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic")
+      probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2])
     }
+
   }
 
   return(probs)
 }
 
+.sorted_distributions <- function(data_vector, weights_vector){
+      weights_vector <- as.vector(weights_vector)
+      data_vector <- as.vector(data_vector)
+      weights_vector <- weights_vector / sum(weights_vector) # normalize to 1
+      sorter <- order(data_vector)
+      sorted_data <- data_vector[sorter]
+      sorted_weights <- weights_vector[sorter]
+      return(sorted_data, sorted_weights)
+}
\ No newline at end of file
-- 
GitLab


From 1283a29dad4b39c6eacf315c8755128c991f5b2d Mon Sep 17 00:00:00 2001
From: jcos <josep.cos@bsc.es>
Date: Thu, 19 May 2022 08:30:49 +0200
Subject: [PATCH 16/18] quantile and prob from poly fit coeff, not interp

---
 R/RPS.R | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index b2065cd..1afdf6e 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -185,11 +185,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
     cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights
     cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0
     cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1
-    quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic")
+    quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") # library(signal)
     # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution)
     fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use
-    predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
-    quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic")
+    a <- fit_model$coefficients
+    quantiles <- a[1]+a[2]*prob_threshold+a[3]*prob_threshold^2+a[4]*prob_threshold^3
   }
   # Probabilities
   probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate]
@@ -202,10 +202,9 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
       }
     } else {
       sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ])
-
-      fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10))
-      predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
-      integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic")
+      fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # poly(sorted_data,10,raw=TRUE)?
+      a <- fit_model$coefficients
+      integrated_probs <- a[1]+a[2]*quantiles+a[3]*quantiles^2+a[4]*quantiles^3
       probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2])
     }
 
@@ -215,11 +214,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
 }
 
 .sorted_distributions <- function(data_vector, weights_vector){
-      weights_vector <- as.vector(weights_vector)
-      data_vector <- as.vector(data_vector)
-      weights_vector <- weights_vector / sum(weights_vector) # normalize to 1
-      sorter <- order(data_vector)
-      sorted_data <- data_vector[sorter]
-      sorted_weights <- weights_vector[sorter]
-      return(sorted_data, sorted_weights)
+  weights_vector <- as.vector(weights_vector)
+  data_vector <- as.vector(data_vector)
+  weights_vector <- weights_vector / sum(weights_vector) # normalize to 1
+  sorter <- order(data_vector)
+  sorted_data <- data_vector[sorter]
+  sorted_weights <- weights_vector[sorter]
+  return(sorted_data, sorted_weights)
 }
\ No newline at end of file
-- 
GitLab


From 762a4f529d4f4e040505a6ed585e4674578f0731 Mon Sep 17 00:00:00 2001
From: Pep Cos <josep.cos@bsc.es>
Date: Thu, 19 May 2022 10:01:55 +0200
Subject: [PATCH 17/18] Revert "quantile and prob from poly fit coeff, not
 interp"

This reverts commit 1283a29dad4b39c6eacf315c8755128c991f5b2d
---
 R/RPS.R | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 1afdf6e..b2065cd 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -185,11 +185,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
     cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights
     cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0
     cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1
-    quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic") # library(signal)
+    quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic")
     # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution)
     fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use
-    a <- fit_model$coefficients
-    quantiles <- a[1]+a[2]*prob_threshold+a[3]*prob_threshold^2+a[4]*prob_threshold^3
+    predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
+    quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic")
   }
   # Probabilities
   probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate]
@@ -202,9 +202,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
       }
     } else {
       sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ])
-      fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # poly(sorted_data,10,raw=TRUE)?
-      a <- fit_model$coefficients
-      integrated_probs <- a[1]+a[2]*quantiles+a[3]*quantiles^2+a[4]*quantiles^3
+
+      fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10))
+      predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
+      integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic")
       probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2])
     }
 
@@ -214,11 +215,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
 }
 
 .sorted_distributions <- function(data_vector, weights_vector){
-  weights_vector <- as.vector(weights_vector)
-  data_vector <- as.vector(data_vector)
-  weights_vector <- weights_vector / sum(weights_vector) # normalize to 1
-  sorter <- order(data_vector)
-  sorted_data <- data_vector[sorter]
-  sorted_weights <- weights_vector[sorter]
-  return(sorted_data, sorted_weights)
+      weights_vector <- as.vector(weights_vector)
+      data_vector <- as.vector(data_vector)
+      weights_vector <- weights_vector / sum(weights_vector) # normalize to 1
+      sorter <- order(data_vector)
+      sorted_data <- data_vector[sorter]
+      sorted_weights <- weights_vector[sorter]
+      return(sorted_data, sorted_weights)
 }
\ No newline at end of file
-- 
GitLab


From d172207b5941974abf23500db1bb7536d1d22996 Mon Sep 17 00:00:00 2001
From: Pep Cos <josep.cos@bsc.es>
Date: Thu, 19 May 2022 10:07:20 +0200
Subject: [PATCH 18/18] Revert "first attempt at weighted quantile and probs"

This reverts commit 269b8791c70b05b24d310c406c1794b29cdfa95b
---
 R/RPS.R | 52 ++++++++++------------------------------------------
 1 file changed, 10 insertions(+), 42 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index b2065cd..84956ac 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -48,7 +48,7 @@
 #'@export
 RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', 
                 prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, 
-                ncores = NULL, weights = NULL) {
+                ncores = NULL) {
   
   # Check inputs
   ## exp and obs (1)
@@ -130,7 +130,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
                fun = .RPS,
                prob_thresholds = prob_thresholds, 
                indices_for_clim = indices_for_clim, Fair = Fair,
-               ncores = ncores, weights = weights)$output1
+               ncores = ncores)$output1
 
   # Return only the mean RPS
   rps <- MeanDims(rps, time_dim, na.rm = FALSE)
@@ -140,12 +140,12 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
 
 
 .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), 
-                 indices_for_clim = NULL, Fair = FALSE, weights = NULL) {
+                 indices_for_clim = NULL, Fair = FALSE) {
   # exp: [sdate, memb]
   # obs: [sdate, (memb)]
 
   exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, 
-                          prob_thresholds = prob_thresholds, weights = weights)
+                          prob_thresholds = prob_thresholds)
   # exp_probs: [bin, sdate] 
   obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, 
                           prob_thresholds = prob_thresholds)
@@ -168,7 +168,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   return(rps)
 }
 
-.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weigths = NULL) {
+.get_probs <- function(data, indices_for_quantiles, prob_thresholds) {
   # if exp: [sdate, memb]
   # if obs: [sdate, (memb)]
 
@@ -176,50 +176,18 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member',
   if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1)
 
   # Absolute thresholds
-  if (is.null(weights)){
-    quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE)
-  } else {
-    # linear approach to compute weighted distribution thresholds.
-    sorted_data, sorted_weights <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ])
-    # approach 1 (interpolate from discrete cumulative weights distribution. NOT type 8)
-    cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights
-    cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0
-    cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1
-    quantiles <- interp1(cumulative_weights, sorted_data, c(1/3,2/3), "cubic")
-    # approach 2 (interpolate from the polynomial fit of the cumulative weights distribution)
-    fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10)) # order 10, but we should think what order we want to use
-    predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
-    quantiles <- interp1(predicted.intervals, sorted_data, c(1/3,2/3), "cubic")
-  }
+  quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE)
+
   # Probabilities
   probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate]
   for (i_time in 1:dim(data)[1]) {
-    if (is.null(weights)){
-      if (anyNA(data[i_time, ])) {
-        probs[, i_time] <- rep(NA, dim = length(quantiles) + 1)
-      } else {
-        probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles))
-      }
+    if (anyNA(data[i_time, ])) {
+      probs[, i_time] <- rep(NA, dim = length(quantiles) + 1)
     } else {
-      sorted_data, sorted_weights <- .sorted_distributions(data[i_time, ], weights[i_time, ])
-
-      fit_model <- lm(cumsum(sorted_weights) ~ poly(sorted_data,10))
-      predicted.intervals <- predict(fit_model,data.frame(x=sorted_data))
-      integrated_probs <- interp1(sorted_data, predicted.intervals, quantiles, "cubic")
-      probs[, i_time] <- c(integrated_probs[1], integrated_probs[2]-integrated_probs[1], 1-integrated_probs[2])
+      probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles))
     }
-
   }
 
   return(probs)
 }
 
-.sorted_distributions <- function(data_vector, weights_vector){
-      weights_vector <- as.vector(weights_vector)
-      data_vector <- as.vector(data_vector)
-      weights_vector <- weights_vector / sum(weights_vector) # normalize to 1
-      sorter <- order(data_vector)
-      sorted_data <- data_vector[sorter]
-      sorted_weights <- weights_vector[sorter]
-      return(sorted_data, sorted_weights)
-}
\ No newline at end of file
-- 
GitLab