From 5c453082c8a9e63204d802ac2f4f4e21d2de867b Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Thu, 6 Jul 2023 18:05:56 +0200
Subject: [PATCH 01/10] removing NA first version

---
 R/RPS.R  | 128 +++++++++++++++++++++-----------------
 R/RPSS.R | 184 ++++++++++++++++++++++++++++++-------------------------
 2 files changed, 175 insertions(+), 137 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index c5ff5ba..b641439 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -256,74 +256,92 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
   # obs: [sdate, bin, (dat)]
 
   # Adjust dimensions to be [sdate, memb, dat] for both exp and obs
-  if (!is.null(memb_dim)) {
-    if (!memb_dim %in% names(dim(obs))) {
-      obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
-    }
-  }
 
-  if (is.null(dat_dim)) {
-    nexp <- 1
-    nobs <- 1
-    dim(exp) <- c(dim(exp), nexp = nexp)
-    dim(obs) <- c(dim(obs), nobs = nobs)
-    if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp)
-  } else {
-    nexp <- as.numeric(dim(exp)[dat_dim])
-    nobs <- as.numeric(dim(obs)[dat_dim])
-  }
+  # compute skill if there are less than 40% NAs in the data
+  f_NAs <- 0.4
+  good_values <- !is.na(exp) & !as.numeric(is.na(obs))
+  
+  if (f_NAs <= sum(good_values)/length(obs)) {
 
-  rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
+    ens_num <- dim(exp)[names(dim(exp))==memb_dim]
+    dim_names <- names(dim(exp))
+    exp <- array(exp[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
+    obs <- array(obs[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
+    obs <- obs [,!apply(obs,2,function (x) all(is.na(x)))]
+    names(dim(exp)) <- dim_names
 
-  for (i in 1:nexp) {
-    for (j in 1:nobs) {
-      exp_data <- exp[ , , i]
-      obs_data <- obs[ , , j]
 
-      if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2])
-      if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
+    if (!is.null(memb_dim)) {
+      if (!memb_dim %in% names(dim(obs))) {
+        obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
+      }
+    }
 
-      # If the data inputs are forecast/observation, calculate probabilities
-      if (is.null(cat_dim)) {
-        if (!is.null(weights)) {
-          weights_data <- weights[ , , i]
-          if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2])
-        } else {
-          weights_data <- weights
-        }
+    if (is.null(dat_dim)) {
+      nexp <- 1
+      nobs <- 1
+      dim(exp) <- c(dim(exp), nexp = nexp)
+      dim(obs) <- c(dim(obs), nobs = nobs)
+      if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp)
+    } else {
+      nexp <- as.numeric(dim(exp)[dat_dim])
+      nobs <- as.numeric(dim(obs)[dat_dim])
+    }
 
-        exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, 
-                                prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val)
-        # exp_probs: [bin, sdate]
-        obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
-                                prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
-        # obs_probs: [bin, sdate]
+    rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
+    indices_for_clim <- 1:nrow(exp)
 
-      } else { # inputs are probabilities already
-        exp_probs <- t(exp_data)
-        obs_probs <- t(obs_data)
-      }
+    for (i in 1:nexp) {
+      for (j in 1:nobs) {
+        exp_data <- exp[ , , i]
+        obs_data <- obs[ , , j]
+
+        if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2])
+        if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
+
+        # If the data inputs are forecast/observation, calculate probabilities
+        if (is.null(cat_dim)) {
+          if (!is.null(weights)) {
+            weights_data <- weights[ , , i]
+            if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2])
+          } else {
+            weights_data <- weights
+          }
+          exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, 
+                                  prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val)
+          # exp_probs: [bin, sdate]
+          obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
+                                  prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
+          # obs_probs: [bin, sdate]
+
+        } else { # inputs are probabilities already
+          exp_probs <- t(exp_data)
+          obs_probs <- t(obs_data)
+        }
 
-      probs_exp_cumsum <- apply(exp_probs, 2, cumsum)
-      probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
+        probs_exp_cumsum <- apply(exp_probs, 2, cumsum)
+        probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
 
-      # rps: [sdate, nexp, nobs]
-      rps[ , i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2)
-      if (Fair) { # FairRPS
-        ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
-        R <- dim(exp)[2]  #memb
-        R_new <- Inf
-        adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum)
-        adjustment <- colSums(adjustment)
-        rps[ , i, j] <- rps[ , i, j] + adjustment
+        # rps: [sdate, nexp, nobs]
+        rps[ , i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2)
+        if (Fair) { # FairRPS
+          ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+          R <- dim(exp)[2]  #memb
+          R_new <- Inf
+          adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum)
+          adjustment <- colSums(adjustment)
+          rps[ , i, j] <- rps[ , i, j] + adjustment
+        }
       }
     }
-  }
 
-  if (is.null(dat_dim)) {
-    dim(rps) <- dim(exp)[time_dim]
-  }
+    if (is.null(dat_dim)) {
+      dim(rps) <- dim(exp)[time_dim]
+    }
 
+  } else {
+    rps<-NA
+  } 
   return(rps)
 }
 
diff --git a/R/RPSS.R b/R/RPSS.R
index 6299eb8..c243997 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -382,106 +382,126 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     nobs <- as.numeric(dim(obs)[dat_dim])
   }
 
-  # RPS of the forecast
-  rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
+
+   # compute skill if there are less than 40% NAs in the data
+  f_NAs <- 0.4
+  good_values <- !is.na(exp) & !as.numeric(is.na(obs))
+
+  if (f_NAs <= sum(good_values)/length(obs)) {
+  
+    ens_num <- dim(exp)[names(dim(exp))==memb_dim]
+    dim_names <- names(dim(exp))
+    exp <- array(exp[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
+    obs <- array(obs[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
+    obs <- obs [,!apply(obs,2,function (x) all(is.na(x)))]
+    names(dim(exp)) <- dim_names
+    
+    # RPS of the forecast
+    rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
                   cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds,
                   indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, 
                   cross.val = cross.val)
-  
-  # RPS of the reference forecast
-  if (is.null(ref)) { ## using climatology as reference forecast
-    if (!is.null(memb_dim)) {
-      if (!memb_dim %in% names(dim(obs))) {
-        obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
-      }
-    }
-    if (is.null(dat_dim)) {
-      dim(obs) <- c(dim(obs), nobs = nobs)
-    }
-    rps_ref <- array(dim = c(dim(obs)[time_dim], nobs = nobs))
 
-    for (j in 1:nobs) {
-      if (is.null(cat_dim)) {  # calculate probs
-        obs_data <- obs[ , , j]
-        if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
-        obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
-                               prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
-        # obs_probs: [bin, sdate] 
-      } else {
-        obs_probs <- t(obs[ , , j])
+ 
+    # RPS of the reference forecast
+    if (is.null(ref)) { ## using climatology as reference forecast
+      if (!is.null(memb_dim)) {
+        if (!memb_dim %in% names(dim(obs))) {
+          obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
+        }
       }
-      clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 
-                      1 - prob_thresholds[length(prob_thresholds)])
-      clim_probs <- array(clim_probs, dim = dim(obs_probs))
-      # clim_probs: [bin, sdate]
+      names(dim(obs))[1] <- time_dim 
+      if (is.null(dat_dim)) {
+        dim(obs) <- c(dim(obs), nobs = nobs)
+      }
+      rps_ref <- array(dim = c(dim(obs)[time_dim], nobs = nobs))
 
-      # Calculate RPS for each time step
-      probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
-      probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
-      rps_ref[ , j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
-  #   if (Fair) { # FairRPS
-  #     ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
-  #     R <- dim(exp)[2]  #memb
-  #     R_new <- Inf
-  #     adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
-  #     adjustment <- apply(adjustment, 2, sum)
-  #     rps_ref <- rps_ref + adjustment
-  #   }
+      indices_for_clim <- 1:nrow(obs)
+      for (j in 1:nobs) {
+        if (is.null(cat_dim)) {  # calculate probs
+          obs_data <- obs[ , , j]
+          if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
+          obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
+                                 prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
+          # obs_probs: [bin, sdate] 
+        } else {
+          obs_probs <- t(obs[ , , j])
+        }
+        clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 
+                        1 - prob_thresholds[length(prob_thresholds)])
+        clim_probs <- array(clim_probs, dim = dim(obs_probs))
+        # clim_probs: [bin, sdate]
 
-    }
-    if (is.null(dat_dim)) {
-      dim(rps_ref) <- dim(exp)[time_dim]
-    }
+        # Calculate RPS for each time step
+        probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
+        probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
+        rps_ref[ , j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
+    #   if (Fair) { # FairRPS
+    #     ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+    #     R <- dim(exp)[2]  #memb
+    #     R_new <- Inf
+    #     adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
+    #     adjustment <- apply(adjustment, 2, sum)
+    #     rps_ref <- rps_ref + adjustment
+    #   }
 
-  } else { # use "ref" as reference forecast
-    if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) {
-      remove_dat_dim <- TRUE
-      ref <- InsertDim(ref, posdim = 3, lendim = 1, name = dat_dim)
-        if (!is.null(weights_ref)) {
-          weights_ref <- InsertDim(weights_ref, posdim = 3, lendim = 1, name = dat_dim)
-        }
-    } else {
-      remove_dat_dim <- FALSE
-    }
+      }
+      if (is.null(dat_dim)) {
+        dim(rps_ref) <- dim(exp)[time_dim]
+      }
 
-    rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
-                    cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, 
-                    indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, 
-                    cross.val = cross.val)
-    if (!is.null(dat_dim)) {
-      if (isTRUE(remove_dat_dim)) {
-        dim(rps_ref) <- dim(rps_ref)[-2]
+    } else { # use "ref" as reference forecast
+      if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) {
+        remove_dat_dim <- TRUE
+        ref <- InsertDim(ref, posdim = 3, lendim = 1, name = dat_dim)
+          if (!is.null(weights_ref)) {
+            weights_ref <- InsertDim(weights_ref, posdim = 3, lendim = 1, name = dat_dim)
+          }
+      } else {
+        remove_dat_dim <- FALSE
+      }
+
+      rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
+                      cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, 
+                      indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, 
+                      cross.val = cross.val)
+      if (!is.null(dat_dim)) {
+        if (isTRUE(remove_dat_dim)) {
+          dim(rps_ref) <- dim(rps_ref)[-2]
+        }
       }
     }
-  }
   
-  if (!is.null(dat_dim)) {
+    if (!is.null(dat_dim)) {
 
-    rps_exp_mean <- MeanDims(rps_exp, time_dim, na.rm = FALSE)
-    rps_ref_mean <- MeanDims(rps_ref, time_dim, na.rm = FALSE)
-    rpss <- array(dim = c(nexp = nexp, nobs = nobs))
-    sign <- array(dim = c(nexp = nexp, nobs = nobs))
+      rps_exp_mean <- MeanDims(rps_exp, time_dim, na.rm = FALSE)
+      rps_ref_mean <- MeanDims(rps_ref, time_dim, na.rm = FALSE)
+      rpss <- array(dim = c(nexp = nexp, nobs = nobs))
+      sign <- array(dim = c(nexp = nexp, nobs = nobs))
 
-    if (length(dim(rps_ref_mean)) == 1) {
-      for (i in 1:nexp) {
-        for (j in 1:nobs) {
-          rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j]
-          sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, j])$sign
+      if (length(dim(rps_ref_mean)) == 1) {
+        for (i in 1:nexp) {
+          for (j in 1:nobs) {
+            rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j]
+            sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, j])$sign
+          }
         }
-      }
-    } else {
-      for (i in 1:nexp) {
-        for (j in 1:nobs) {
-          rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j]
-          sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, i, j])$sign
+      } else {
+        for (i in 1:nexp) {
+          for (j in 1:nobs) {
+            rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j]
+            sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, i, j])$sign
+          }
         }
       }
+    } else {
+      rpss <- 1 - mean(rps_exp) / mean(rps_ref)
+      # Significance
+      sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, sign = T, pval = F)$sign
     }
   } else {
-    rpss <- 1 - mean(rps_exp) / mean(rps_ref)
-    # Significance
-    sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, sign = T, pval = F)$sign
-  }
-  
+  rpss <- NA
+  sign <- NA
+  } 
   return(list(rpss = rpss, sign = sign))
 }
-- 
GitLab


From c4aa1f9ebb07a3708a534d9ac46f8dcd66960499 Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Fri, 7 Jul 2023 18:29:27 +0200
Subject: [PATCH 02/10] f_NAs argument is added. Compatible with both memb_dim
 and cat_dim cases but only compatible with dat_dim = 1

---
 R/RPS.R  | 43 ++++++++++++++++++++++++++++++-------------
 R/RPSS.R | 44 +++++++++++++++++++++++++++++++-------------
 2 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index b641439..0e9abcc 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -53,6 +53,8 @@
 #'  The default value is FALSE.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
+#'@param Lower limit for the fraction of the non-NA values. The function returns NA 
+#'  if the fraction of non-NA values in the provided data is less than f_NAs. 
 #'
 #'@return 
 #'A numerical array of RPS with dimensions c(nexp, nobs, the rest dimensions of 
@@ -79,7 +81,7 @@
 #'@export
 RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, 
                 dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, 
-                Fair = FALSE, weights = NULL, cross.val = FALSE, ncores = NULL) {
+                Fair = FALSE, weights = NULL, cross.val = FALSE, ncores = NULL, f_NAs=1) {
   
   # Check inputs
   ## exp and obs (1)
@@ -210,6 +212,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
       stop("Parameter 'ncores' must be either NULL or a positive integer.")
     }
   }
+  ## f_NAs
+  if (!is.numeric(f_NAs) | !(f_NAs >= 0 & f_NAs <= 1)) {
+    stop("Parameter 'f_NAs' must be a numeric vector between 0 and 1.")
+  }
 
   ###############################
  
@@ -235,7 +241,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
                memb_dim = memb_dim, cat_dim = cat_dim,
                prob_thresholds = prob_thresholds, 
                indices_for_clim = indices_for_clim, Fair = Fair,
-               weights = weights, cross.val = cross.val, ncores = ncores)$output1
+               weights = weights, cross.val = cross.val, ncores = ncores, f_NAs=f_NAs)$output1
  
   # Return only the mean RPS
   rps <- MeanDims(rps, time_dim, na.rm = FALSE)
@@ -246,7 +252,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
 
 .RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
                  dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, 
-                 Fair = FALSE, weights = NULL, cross.val = FALSE) {
+                 Fair = FALSE, weights = NULL, cross.val = FALSE, f_NAs=1) {
   #--- if memb_dim: 
   # exp: [sdate, memb, (dat)]
   # obs: [sdate, (memb), (dat)]
@@ -257,19 +263,30 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
 
   # Adjust dimensions to be [sdate, memb, dat] for both exp and obs
 
-  # compute skill if there are less than 40% NAs in the data
-  f_NAs <- 0.4
+  # compute skill if the the percentage of the NAs are less than f_NAs
   good_values <- !is.na(exp) & !as.numeric(is.na(obs))
-  
-  if (f_NAs <= sum(good_values)/length(obs)) {
+  good_values <- apply(good_values,1,all)
 
-    ens_num <- dim(exp)[names(dim(exp))==memb_dim]
-    dim_names <- names(dim(exp))
-    exp <- array(exp[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
-    obs <- array(obs[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
-    obs <- obs [,!apply(obs,2,function (x) all(is.na(x)))]
-    names(dim(exp)) <- dim_names
+  if(!is.null(memb_dim))  {
+    frac <- sum(good_values)/length(obs)
+    col_num <- as.numeric(dim(exp)[memb_dim])
+  } else {
+    frac <- sum(good_values)/nrow(obs)
+    col_num <- ncol(obs)
+  }
 
+  if (f_NAs <= frac) {
+
+    dim_names_exp <- names(dim(exp))
+    dim_names_obs <- names(dim(obs))
+    exp <- array(exp[good_values,],dim=c(length(good_values[good_values]),col_num))
+    if(!is.null(memb_dim))  {
+      obs <- array(obs[good_values],dim=c(length(good_values[good_values])))
+    } else {
+      obs <- array(obs[good_values],dim=c(length(good_values[good_values]),col_num))
+    }
+    names(dim(exp)) <- dim_names_exp
+    names(dim(obs)) <- dim_names_obs
 
     if (!is.null(memb_dim)) {
       if (!memb_dim %in% names(dim(obs))) {
diff --git a/R/RPSS.R b/R/RPSS.R
index c243997..6a9a9fd 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -66,6 +66,8 @@
 #'  The default value is FALSE.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
+#'@param Lower limit for the fraction of the non-NA values. The function returns NA 
+#'  if the fraction of non-NA values in the provided data is less than f_NAs. 
 #'
 #'@return
 #'\item{$rpss}{
@@ -110,7 +112,7 @@
 RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
                  dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, 
-                 cross.val = FALSE, ncores = NULL) {
+                 cross.val = FALSE, ncores = NULL, f_NAs = 1) {
  
   # Check inputs
   ## exp, obs, and ref (1)
@@ -308,6 +310,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
       stop("Parameter 'ncores' must be either NULL or a positive integer.")
     }
   }
+  ## f_NAs
+  if (!is.numeric(f_NAs) | !(f_NAs >= 0 & f_NAs <= 1)) {
+    stop("Parameter 'f_NAs' must be a numeric vector between 0 and 1.")
+  }
 
   ###############################
 
@@ -355,7 +361,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
                   weights_exp = weights_exp,
                   weights_ref = weights_ref,
                   cross.val = cross.val, 
-                  ncores = ncores)
+                  ncores = ncores,
+                  f_NAs=f_NAs)
   
   return(output)
 
@@ -363,7 +370,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
 
 .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
                   dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
-                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE) {
+                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, f_NAs=1) {
 
   #--- if memb_dim: 
   # exp: [sdate, memb, (dat)]
@@ -383,18 +390,30 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
   }
 
 
-   # compute skill if there are less than 40% NAs in the data
-  f_NAs <- 0.4
+  # compute skill if the the percentage of the NAs are less than f_NAs
   good_values <- !is.na(exp) & !as.numeric(is.na(obs))
+  good_values <- apply(good_values,1,all) 
+
+  if(!is.null(memb_dim))  {
+    frac <- sum(good_values)/length(obs)
+    col_num <- as.numeric(dim(exp)[memb_dim])
+  } else {
+    frac <- sum(good_values)/nrow(obs)
+    col_num <- ncol(obs)
+  }
 
-  if (f_NAs <= sum(good_values)/length(obs)) {
+  if (f_NAs <= frac) {
   
-    ens_num <- dim(exp)[names(dim(exp))==memb_dim]
-    dim_names <- names(dim(exp))
-    exp <- array(exp[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
-    obs <- array(obs[good_values],dim=c(length(exp[good_values])/ens_num,ens_num))
-    obs <- obs [,!apply(obs,2,function (x) all(is.na(x)))]
-    names(dim(exp)) <- dim_names
+    dim_names_exp <- names(dim(exp))
+    dim_names_obs <- names(dim(obs))
+    exp <- array(exp[good_values,],dim=c(length(good_values[good_values]),col_num))
+    if(!is.null(memb_dim))  {
+      obs <- array(obs[good_values],dim=c(length(good_values[good_values])))
+    } else {
+      obs <- array(obs[good_values],dim=c(length(good_values[good_values]),col_num))
+    }
+    names(dim(exp)) <- dim_names_exp
+    names(dim(obs)) <- dim_names_obs
     
     # RPS of the forecast
     rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
@@ -410,7 +429,6 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
           obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
         }
       }
-      names(dim(obs))[1] <- time_dim 
       if (is.null(dat_dim)) {
         dim(obs) <- c(dim(obs), nobs = nobs)
       }
-- 
GitLab


From 1e763b8632e3930e6320304eb05d223a5757ff10 Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Sun, 9 Jul 2023 16:47:21 +0200
Subject: [PATCH 03/10] minor correction

---
 R/RPS.R  | 2 +-
 R/RPSS.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 0e9abcc..c28dd71 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -281,7 +281,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
     dim_names_obs <- names(dim(obs))
     exp <- array(exp[good_values,],dim=c(length(good_values[good_values]),col_num))
     if(!is.null(memb_dim))  {
-      obs <- array(obs[good_values],dim=c(length(good_values[good_values])))
+      obs <- array(obs[good_values],dim=c(length(good_values[good_values]),1)) ## dat_dim=1
     } else {
       obs <- array(obs[good_values],dim=c(length(good_values[good_values]),col_num))
     }
diff --git a/R/RPSS.R b/R/RPSS.R
index 6a9a9fd..6dc7bab 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -408,7 +408,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     dim_names_obs <- names(dim(obs))
     exp <- array(exp[good_values,],dim=c(length(good_values[good_values]),col_num))
     if(!is.null(memb_dim))  {
-      obs <- array(obs[good_values],dim=c(length(good_values[good_values])))
+      obs <- array(obs[good_values],dim=c(length(good_values[good_values]),1)) ## dat_dim=1
     } else {
       obs <- array(obs[good_values],dim=c(length(good_values[good_values]),col_num))
     }
-- 
GitLab


From 9c16296cb17fa68febae00c0c4a8c5f7f6d2b6d7 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Mon, 10 Jul 2023 16:44:25 +0200
Subject: [PATCH 04/10] first version; not working because .GetProbs

---
 R/RPS.R | 112 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 76 insertions(+), 36 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index c5ff5ba..70e693f 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -79,7 +79,7 @@
 #'@export
 RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, 
                 dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, 
-                Fair = FALSE, weights = NULL, cross.val = FALSE, ncores = NULL) {
+                Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE, ncores = NULL) {
   
   # Check inputs
   ## exp and obs (1)
@@ -203,6 +203,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
                     "'weights' is not used. Change 'weights' to NULL."))
     weights <- NULL
   }
+  ## na.rm
+  if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) {
+    stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1')
+  }
   ## ncores
   if (!is.null(ncores)) {
     if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
@@ -235,7 +239,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
                memb_dim = memb_dim, cat_dim = cat_dim,
                prob_thresholds = prob_thresholds, 
                indices_for_clim = indices_for_clim, Fair = Fair,
-               weights = weights, cross.val = cross.val, ncores = ncores)$output1
+               weights = weights, cross.val = cross.val, 
+               na.rm = na.rm, ncores = ncores)$output1
  
   # Return only the mean RPS
   rps <- MeanDims(rps, time_dim, na.rm = FALSE)
@@ -246,7 +251,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
 
 .RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
                  dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, 
-                 Fair = FALSE, weights = NULL, cross.val = FALSE) {
+                 Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE) {
   #--- if memb_dim: 
   # exp: [sdate, memb, (dat)]
   # obs: [sdate, (memb), (dat)]
@@ -283,40 +288,76 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
       if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2])
       if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
 
-      # If the data inputs are forecast/observation, calculate probabilities
-      if (is.null(cat_dim)) {
-        if (!is.null(weights)) {
-          weights_data <- weights[ , , i]
-          if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2])
-        } else {
-          weights_data <- weights
-        }
-
-        exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, 
-                                prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val)
-        # exp_probs: [bin, sdate]
-        obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
-                                prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
-        # obs_probs: [bin, sdate]
-
-      } else { # inputs are probabilities already
-        exp_probs <- t(exp_data)
-        obs_probs <- t(obs_data)
+     #################
+      
+      if (memb_dim %in% names(dim(exp_data))){
+        exp_mean <- apply(exp_data, 1, mean)
+      } else {exp_mean <- exp_data}
+      if (memb_dim %in% names(dim(obs_data))){
+        obs_mean <- apply(obs_data, 1, mean)
+      } else {obs_mean <- obs_data}
+      
+      if (cat_dim %in% names(dim(exp_data))){
+        exp_mean <- apply(exp_data, 1, mean)
+        obs_mean <- apply(obs_data, 1, mean)
+      } else {exp_mean <- exp_data}
+      
+      good_values <- !is.na(exp_mean) & !is.na(obs_mean)
+      
+      if (isTRUE(na.rm)){
+        f_NAs <- 0
+      } else if (isFALSE(na.rm)){
+        f_NAs <- 1
+      } else {
+        f_NAs <- na.rm
       }
-
-      probs_exp_cumsum <- apply(exp_probs, 2, cumsum)
-      probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
-
-      # rps: [sdate, nexp, nobs]
-      rps[ , i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2)
-      if (Fair) { # FairRPS
-        ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
-        R <- dim(exp)[2]  #memb
-        R_new <- Inf
-        adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum)
-        adjustment <- colSums(adjustment)
-        rps[ , i, j] <- rps[ , i, j] + adjustment
+      
+      if (f_NAs <= sum(good_values)/length(obs_mean)){
+        
+        exp_data <- exp_data[good_values]
+        obs_data <- obs_data[good_values]
+        
+        # If the data inputs are forecast/observation, calculate probabilities
+        if (is.null(cat_dim)) {
+          if (!is.null(weights)) {
+            weights_data <- weights[ , , i]
+            if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2])
+          } else {
+            weights_data <- weights
+          }
+          
+          exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, 
+                                 prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val)
+          # exp_probs: [bin, sdate]
+          obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
+                                 prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
+          # obs_probs: [bin, sdate]
+          
+        } else { # inputs are probabilities already
+          exp_probs <- t(exp_data)
+          obs_probs <- t(obs_data)
+        }
+        
+        probs_exp_cumsum <- apply(exp_probs, 2, cumsum)
+        probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
+        
+        # rps: [sdate, nexp, nobs]
+        rps[ , i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2)
+        if (Fair) { # FairRPS
+          ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+          R <- dim(exp)[2]  #memb
+          R_new <- Inf
+          adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum)
+          adjustment <- colSums(adjustment)
+          rps[ , i, j] <- rps[ , i, j] + adjustment
+        }
+        
+      } else { ## not enough values different from NA
+        
+        rps[ , i, j] <- NA
+        
       }
+      
     }
   }
 
@@ -326,4 +367,3 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
 
   return(rps)
 }
-
-- 
GitLab


From f871545e1dc17f0c9a66bf0de9bbb08d657f0d71 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 14 Jul 2023 14:56:20 +0200
Subject: [PATCH 05/10] RPS na.rm feature done

---
 R/RPS.R                   | 64 +++++++++++-----------------
 R/RPSS.R                  | 44 +++++++++++++-------
 man/RPS.Rd                | 15 +++++--
 man/RPSS.Rd               |  6 +++
 tests/testthat/test-RPS.R | 87 ++++++++++++++++++++++++++++++++++++---
 5 files changed, 150 insertions(+), 66 deletions(-)

diff --git a/R/RPS.R b/R/RPS.R
index 8c5e6f2..c385f10 100644
--- a/R/RPS.R
+++ b/R/RPS.R
@@ -13,7 +13,8 @@
 #'The function first calculates the probabilities for forecasts and observations, 
 #'then use them to calculate RPS. Or, the probabilities of exp and obs can be 
 #'provided directly to compute the score. If there is more than one dataset, RPS
-#'will be computed for each pair of exp and obs data.
+#'will be computed for each pair of exp and obs data. The fraction of acceptable
+#'NAs can be adjusted.
 #'
 #'@param exp A named numerical array of either the forecasts with at least time
 #'  and member dimensions, or the probabilities with at least time and category
@@ -48,13 +49,14 @@
 #'  default value is NULL. The ensemble should have at least 70 members or span 
 #'  at least 10 time steps and have more than 45 members if consistency between
 #'  the weighted and unweighted methodologies is desired. 
-#'@param cross.val A logical indicating whether to compute the thresholds between 
-#'  probabilistic categories in cross-validation.
-#'  The default value is FALSE.
-#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric,  
-#'  it shows the lower limit for the fraction of the non-NA values. The function 
-#'  returns NA, if the fraction of non-NA values in the provided data is less than 
-#'  na.rm. Otherwise, RPS will be calculated.
+#'@param cross.val A logical indicating whether to compute the thresholds
+#'  between probabilistic categories in cross-validation. The default value is
+#'  FALSE.
+#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it 
+#'  means the lower limit for the fraction of the non-NA values. 1 is equal to 
+#'  FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). 
+#   The function returns NA if the fraction of non-NA values in the data is less
+#'  than na.rm. Otherwise, RPS will be calculated. The default value is FALSE.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
@@ -292,22 +294,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
       if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2])
       if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
 
-     #################
-      if (!is.null(memb_dim)) { 
-        if (memb_dim %in% names(dim(exp_data))) {
-          exp_mean <- apply(exp_data, 1, mean)
-        } else { 
-          exp_mean <- exp_data
-        }
-        if (memb_dim %in% names(dim(obs_data))) {
-          obs_mean <- apply(obs_data, 1, mean)
-        } else {
-          obs_mean <- obs_data
-        }
-      } else if (!is.null(cat_dim)) {
-        exp_mean <- apply(exp_data, 1, mean)
-        obs_mean <- apply(obs_data, 1, mean)
-      } 
+      # Find the fraction of NAs
+      ## If any member/bin is NA at this time step, it is not good value.
+      exp_mean <- rowMeans(exp_data)
+      obs_mean <- rowMeans(obs_data)
       good_values <- !is.na(exp_mean) & !is.na(obs_mean)
       
       if (isTRUE(na.rm)) {
@@ -318,33 +308,24 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
         f_NAs <- na.rm
       }
 
-      if (f_NAs <= sum(good_values)/length(obs_mean)) {
+      if (f_NAs <= sum(good_values) / length(obs_mean)) {
         
-        exp_data <- exp_data[good_values,]
-        obs_data <- obs_data[good_values,]
-
-      if (is.null(dim(exp_data))) {
-        exp_data <- array(exp_data, c(sum(good_values), dim(exp)[2]))
-        names(dim(exp_data)) <- names(dim(exp)[1:2])
-      }
-
-      if (is.null(dim(obs_data))) {
-        obs_data <- array(obs_data, c(sum(good_values), dim(obs)[2]))
-        names(dim(obs_data)) <- names(dim(obs)[1:2])
-      }
+        exp_data <- exp_data[good_values, , drop = F]
+        obs_data <- obs_data[good_values, , drop = F]
         
         # If the data inputs are forecast/observation, calculate probabilities
         if (is.null(cat_dim)) {
           if (!is.null(weights)) {
-            weights_data <- weights[ , , i]
+            weights_data <- weights[which(good_values) , , i]
             if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2])
           } else {
-            weights_data <- weights
+            weights_data <- weights #NULL
           }
-         
+
+          # Subset indices_for_clim
           dum <- match(indices_for_clim, which(good_values))
           good_indices_for_clim <- dum[!is.na(dum)]
-          
+     
           exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, 
                                  prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val)
           # exp_probs: [bin, sdate]
@@ -362,6 +343,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL
         
         # rps: [sdate, nexp, nobs]
         rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2)
+
         if (Fair) { # FairRPS
           ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
           R <- dim(exp)[2]  #memb
diff --git a/R/RPSS.R b/R/RPSS.R
index 5e7777e..642e888 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -61,13 +61,14 @@
 #'  members or span at least 10 time steps and have more than 45 members if 
 #'  consistency between the weighted and unweighted methodologies is desired.
 #'@param weights_ref Same as 'weights_exp' but for the reference forecast.
-#'@param cross.val A logical indicating whether to compute the thresholds between 
-#'  probabilistics categories in cross-validation.
-#'  The default value is FALSE.
-#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric,  
-#'  it shows the lower limit for the fraction of the non-NA values. The function 
-#'  returns NA, if the fraction of non-NA values in the provided data is less than 
-#'  na.rm. Otherwise, RPSS will be calculated.
+#'@param cross.val A logical indicating whether to compute the thresholds
+#'  between probabilistics categories in cross-validation. The default value is
+#'  FALSE.
+#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it 
+#'  means the lower limit for the fraction of the non-NA values. 1 is equal to 
+#'  FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). 
+#   The function returns NA if the fraction of non-NA values in the data is less
+#'  than na.rm. Otherwise, RPS will be calculated. The default value is FALSE.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
@@ -114,7 +115,7 @@
 RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
                  dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, 
-                 cross.val = FALSE, na.rm=FALSE, ncores = NULL) {
+                 cross.val = FALSE, na.rm = FALSE, ncores = NULL) {
  
   # Check inputs
   ## exp, obs, and ref (1)
@@ -371,7 +372,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
 
 .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
                   dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
-                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm=FALSE) {
+                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = FALSE) {
 
   #--- if memb_dim: 
   # exp: [sdate, memb, (dat)]
@@ -390,6 +391,9 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     nobs <- as.numeric(dim(obs)[dat_dim])
   }
 
+#----------------------------------------------
+  # Calculate RPS
+
   # RPS of the forecast
   rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
                   cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds,
@@ -455,29 +459,33 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
                     cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, 
                     indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, 
-                    na.rm=na.rm, cross.val = cross.val)
+                    na.rm = na.rm, cross.val = cross.val)
     if (!is.null(dat_dim)) {
       if (isTRUE(remove_dat_dim)) {
         dim(rps_ref) <- dim(rps_ref)[-2]
       }
     }
   }
-  
-  if (!is.null(dat_dim)) {
 
+#----------------------------------------------
+  # Calculate RPSS
+
+  if (!is.null(dat_dim)) {
+    # rps_exp: [sdate, nexp, nobs]
+    # rps_ref: [sdate, (nexp), nobs]
     rps_exp_mean <- MeanDims(rps_exp, time_dim, na.rm = TRUE)
     rps_ref_mean <- MeanDims(rps_ref, time_dim, na.rm = TRUE)
     rpss <- array(dim = c(nexp = nexp, nobs = nobs))
     sign <- array(dim = c(nexp = nexp, nobs = nobs))
 
-    if (length(dim(rps_ref_mean)) == 1) {
+    if (length(dim(rps_ref_mean)) == 1) {  # rps_ref: [sdate, nobs]
       for (i in 1:nexp) {
         for (j in 1:nobs) {
           rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j]
           sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, j])$sign
         }
       }
-    } else {
+    } else {  # rps_ref: [sdate, nexp, nobs]
       for (i in 1:nexp) {
         for (j in 1:nobs) {
           rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j]
@@ -490,8 +498,12 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
         }
       }
     }
-  } else {
-    rpss <- 1 - mean(rps_exp,na.rm=TRUE) / mean(rps_ref,na.rm=TRUE)
+
+  } else {  # dat_dim is NULL
+
+    # rps_exp and rps_ref: [sdate]
+    rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE)
+
     # Significance
     good_values <- !is.na(rps_exp) & !is.na(rps_ref)
     if (!(na.rm <= sum(good_values)/length(good_values))) {
diff --git a/man/RPS.Rd b/man/RPS.Rd
index 2e21227..041ca07 100644
--- a/man/RPS.Rd
+++ b/man/RPS.Rd
@@ -16,6 +16,7 @@ RPS(
   Fair = FALSE,
   weights = NULL,
   cross.val = FALSE,
+  na.rm = FALSE,
   ncores = NULL
 )
 }
@@ -63,9 +64,14 @@ default value is NULL. The ensemble should have at least 70 members or span
 at least 10 time steps and have more than 45 members if consistency between
 the weighted and unweighted methodologies is desired.}
 
-\item{cross.val}{A logical indicating whether to compute the thresholds between 
-probabilistic categories in cross-validation.
-The default value is FALSE.}
+\item{cross.val}{A logical indicating whether to compute the thresholds
+between probabilistic categories in cross-validation. The default value is
+FALSE.}
+
+\item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric, it 
+means the lower limit for the fraction of the non-NA values. 1 is equal to 
+FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). 
+than na.rm. Otherwise, RPS will be calculated. The default value is FALSE.}
 
 \item{ncores}{An integer indicating the number of cores to use for parallel 
 computation. The default value is NULL.}
@@ -90,7 +96,8 @@ and 1.\cr
 The function first calculates the probabilities for forecasts and observations, 
 then use them to calculate RPS. Or, the probabilities of exp and obs can be 
 provided directly to compute the score. If there is more than one dataset, RPS
-will be computed for each pair of exp and obs data.
+will be computed for each pair of exp and obs data. The fraction of acceptable
+NAs can be adjusted.
 }
 \examples{
 # Use synthetic data
diff --git a/man/RPSS.Rd b/man/RPSS.Rd
index a6abe34..a9dd7d7 100644
--- a/man/RPSS.Rd
+++ b/man/RPSS.Rd
@@ -18,6 +18,7 @@ RPSS(
   weights_exp = NULL,
   weights_ref = NULL,
   cross.val = FALSE,
+  na.rm = FALSE,
   ncores = NULL
 )
 }
@@ -83,6 +84,11 @@ consistency between the weighted and unweighted methodologies is desired.}
 probabilistics categories in cross-validation.
 The default value is FALSE.}
 
+\item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric,
+it shows the lower limit for the fraction of the non-NA values. The function 
+returns NA, if the fraction of non-NA values in the provided data is less than 
+na.rm. Otherwise, RPSS will be calculated.}
+
 \item{ncores}{An integer indicating the number of cores to use for parallel 
 computation. The default value is NULL.}
 }
diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R
index 624eb37..d87da87 100644
--- a/tests/testthat/test-RPS.R
+++ b/tests/testthat/test-RPS.R
@@ -28,10 +28,18 @@ obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10))
 set.seed(1)
 exp3 <- array(rnorm(40), dim = c(member = 2, sdate = 10, dataset = 2))
 set.seed(2)
-obs3 <- array(rnorm(30), dim = c(member = 1, sdate = 10, dataset = 3))
+obs3 <- array(rnorm(60), dim = c(member = 3, sdate = 10, dataset = 3))
 set.seed(3)
 weights3 <-  array(abs(rnorm(30)), dim = c(member = 2, sdate = 10, dataset = 2))
 
+# dat4
+exp4 <- exp3
+obs4 <- obs3
+obs4[2, 1, 1] <- NA
+obs4[3, 2, 1] <- NA
+exp4[1, 1, 2] <- NA
+weights4 <- weights3
+
 ##############################################
 test_that("1. Input checks", {
   # exp and obs (1)
@@ -199,7 +207,7 @@ test_that("3. Output checks: dat2", {
 })
 
 ##############################################
-test_that("3. Output checks: dat3", {
+test_that("4. Output checks: dat3", {
 
   expect_equal(
   dim(RPS(exp3, obs3, dat_dim = 'dataset')),
@@ -207,12 +215,12 @@ test_that("3. Output checks: dat3", {
   )
   expect_equal(
   as.vector(RPS(exp3, obs3, dat_dim = 'dataset')),
-  c(0.75, 0.65, 0.75, 0.85, 0.75, 0.55),
+  c(0.3388889, 0.3388889, 0.2944444, 0.3277778, 0.3388889, 0.3388889),
   tolerance = 0.0001
   )
   expect_equal(
   as.vector(RPS(exp3, obs3, dat_dim = 'dataset', indices_for_clim = 2:5, prob_thresholds = seq(0.1, 0.9, 0.1))),
-  c(2.75, 2.45, 2.55, 2.55, 2.65, 2.15),
+  c(1.394444, 1.394444, 1.250000, 1.316667, 1.394444, 1.394444),
   tolerance = 0.0001
   )
   # weights
@@ -222,8 +230,77 @@ test_that("3. Output checks: dat3", {
   )
   expect_equal(
   as.vector(RPS(exp3, obs3, weights = weights3, dat_dim = 'dataset')),
-  c(0.7365024, 0.8316852, 0.6778686, 1.0256509, 0.8406320, 0.6385640),
+  c(0.3255765, 0.4290578, 0.2917297, 0.3554689, 0.3255765, 0.4290578),
   tolerance = 0.0001
   )
 
 })
+
+##############################################
+test_that("5. Output checks: dat4", {
+
+  res1 <- RPS(exp4, obs4, dat_dim = 'dataset')
+
+  expect_equal(
+  which(is.na(res1)),
+  c(1, 2, 4, 6)
+  )
+  expect_equal(
+  res1[1, 2:3],
+  RPS(exp3, obs3, dat_dim = 'dataset')[1, 2:3]
+  )
+
+  res2 <- RPS(exp4, obs4, dat_dim = 'dataset', na.rm = T)
+
+  expect_equal(
+  res2,
+  RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0)
+  )
+  expect_equal(
+  which(is.na(res2)),
+  integer(0)
+  )
+  expect_equal(
+  c(res2),
+  c(0.3472222, 0.3680556, 0.2944444, 0.3333333, 0.3388889, 0.2222222),
+  tolerance = 0.0001
+  )
+
+  expect_equal(
+  RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0.8),
+  res2
+  )
+  expect_equal(
+  RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0.95),
+  res1
+  )
+  expect_equal(
+  which(is.na(RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0.9))),
+  c(1, 2)
+  )
+
+  # weights
+  res3 <- RPS(exp4, obs4, weights = weights4, dat_dim = 'dataset')
+  expect_equal(
+  which(is.na(res3)),
+  c(1, 2, 4, 6)
+  )
+  expect_equal(
+  res3[1, 2:3],
+  RPS(exp3, obs3, dat_dim = 'dataset',weights = weights3)[1, 2:3]
+  )
+
+  res4 <- RPS(exp4, obs4, weights = weights4, dat_dim = 'dataset', na.rm = 0)
+  expect_equal(
+  c(res4),
+  c(0.3865228, 0.4885273, 0.2917297, 0.4143631, 0.3255765, 0.4028817),
+  tolerance = 0.0001
+  )
+
+  expect_equal(
+  which(is.na(RPS(exp4, obs4, weights = weights4, dat_dim = 'dataset', na.rm = 0.9))),
+  c(1, 2)
+  )
+
+})
+
-- 
GitLab


From c025876bd29679e14322b708d5093e059b3cabfa Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Tue, 18 Jul 2023 08:34:56 +0200
Subject: [PATCH 06/10] RPSS.R na.rm feature

---
 R/RPSS.R | 75 ++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 26 deletions(-)

diff --git a/R/RPSS.R b/R/RPSS.R
index 642e888..9b19c54 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -417,30 +417,48 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
         obs_data <- obs[ , , j]
         if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
         obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
-                               prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
+                               prob_thresholds = prob_thresholds, 
+                               weights = NULL, cross.val = cross.val)
         # obs_probs: [bin, sdate] 
       } else {
         obs_probs <- t(obs[ , , j])
       }
-      clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 
-                      1 - prob_thresholds[length(prob_thresholds)])
-      clim_probs <- array(clim_probs, dim = dim(obs_probs))
-      # clim_probs: [bin, sdate]
-
-      # Calculate RPS for each time step
-      probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
-      probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
-      rps_ref[ , j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
-  #   if (Fair) { # FairRPS
-  #     ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
-  #     R <- dim(exp)[2]  #memb
-  #     R_new <- Inf
-  #     adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
-  #     adjustment <- apply(adjustment, 2, sum)
-  #     rps_ref <- rps_ref + adjustment
-  #   }
 
+      good_values <- !is.na(colMeans(obs_probs))
+
+      if (isTRUE(na.rm)) {
+        f_NAs <- 0
+      } else if (isFALSE(na.rm)) {
+        f_NAs <- 1
+      } else {
+        f_NAs <- na.rm
+      }
+
+      if (f_NAs <= sum(good_values) / length(good_values)) {
+ 
+        clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 
+                        1 - prob_thresholds[length(prob_thresholds)])
+        clim_probs <- array(clim_probs, dim = dim(obs_probs))
+        # clim_probs: [bin, sdate]
+
+        # Calculate RPS for each time step
+        probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
+        probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
+        rps_ref[ , j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
+    #   if (Fair) { # FairRPS
+    #     ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+    #     R <- dim(exp)[2]  #memb
+    #     R_new <- Inf
+    #     adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
+    #     adjustment <- apply(adjustment, 2, sum)
+    #     rps_ref <- rps_ref + adjustment
+    #   }
+
+      } else {
+        rps_ref[ , j] <- as.numeric(NA)
+      }
     }
+
     if (is.null(dat_dim)) {
       dim(rps_ref) <- dim(exp)[time_dim]
     }
@@ -482,18 +500,23 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
       for (i in 1:nexp) {
         for (j in 1:nobs) {
           rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j]
-          sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, j])$sign
+          ind_nonNA <- !is.na(rps_exp[, i, j]) & !is.na(rps_ref[, j])
+          if (!any(ind_nonNA)) {
+            sign[i, j] <- NA
+          } else {
+            sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, j])$sign
+          }
         }
       }
     } else {  # rps_ref: [sdate, nexp, nobs]
       for (i in 1:nexp) {
         for (j in 1:nobs) {
           rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j]
-          good_values <- !is.na(rps_exp[, i, j]) & !is.na(rps_ref[, i, j])
-          if (!(na.rm <= sum(good_values)/length(good_values))) {
-          sign <- NA
+          ind_nonNA <- !is.na(rps_exp[, i, j]) & !is.na(rps_ref[, i, j])
+          if (!any(ind_nonNA)) {
+            sign[i, j] <- NA
           } else {
-          sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[good_values, i, j], skill_B = rps_ref[good_values, i, j])$sign
+            sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, i, j])$sign
           }
         }
       }
@@ -505,11 +528,11 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE)
 
     # Significance
-    good_values <- !is.na(rps_exp) & !is.na(rps_ref)
-    if (!(na.rm <= sum(good_values)/length(good_values))) {
+    ind_nonNA <- !is.na(rps_exp) & !is.na(rps_ref)
+    if (!any(ind_nonNA)) {
       sign <- NA
     } else {
-      sign <- .RandomWalkTest(skill_A = rps_exp[good_values], skill_B = rps_ref[good_values], sign = T, pval = F)$sign
+      sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], sign = T, pval = F)$sign
     }
   }
   
-- 
GitLab


From 04c5043c0501389806b709db475ef5ea5f2592d6 Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Tue, 25 Jul 2023 18:02:28 +0200
Subject: [PATCH 07/10] RPSS, NA removing before the obs prob calc and taking
 ref NAs into consideration

---
 R/RPSS.R | 201 +++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 136 insertions(+), 65 deletions(-)

diff --git a/R/RPSS.R b/R/RPSS.R
index 9b19c54..45d8a8a 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -372,7 +372,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
 
 .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
                   dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
-                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = FALSE) {
+                  Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE,
+                  na.rm = FALSE) {
 
   #--- if memb_dim: 
   # exp: [sdate, memb, (dat)]
@@ -383,58 +384,126 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
   # obs: [sdate, bin, (dat)]
   # ref: [sdate, bin, (dat)] or NULL
 
-  if (is.null(dat_dim)) {
-    nexp <- 1
-    nobs <- 1
-  } else {
-    nexp <- as.numeric(dim(exp)[dat_dim])
-    nobs <- as.numeric(dim(obs)[dat_dim])
-  }
-
 #----------------------------------------------
   # Calculate RPS
 
   # RPS of the forecast
-  rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
-                  cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds,
-                  indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, 
-                  cross.val = cross.val, na.rm = na.rm)
-  
+  if (isTRUE(na.rm)) {
+    f_NAs <- 0
+  } else if (isFALSE(na.rm)) {
+    f_NAs <- 1
+  } else {
+    f_NAs <- na.rm
+  }
+
+  if (!is.null(ref)) {
+    if (is.null(dat_dim)) {
+      nexp <- 1
+      nobs <- 1
+      nref <- 1
+      dim(obs) <- c(dim(obs), nobs = nobs)
+      dim(exp) <- c(dim(exp), nexp = nexp)
+      dim(ref) <- c(dim(ref), nexp = nref)
+    } else {
+      nexp <- as.numeric(dim(exp)[dat_dim])
+      nobs <- as.numeric(dim(obs)[dat_dim]) 
+      nref <- as.numeric(dim(ref)[dat_dim])
+      if (is.na(nref)) {
+        nref<-1
+        dim(ref) <- c(dim(ref), nref = nref)
+      }
+    }
+    rps_exp <- array(NaN,dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
+    rps_ref <- array(NaN,dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
+    for (i in 1:nexp) {
+      for (j in 1:nobs) {
+        for (k in 1:nref) {
+          if (nref != 1 & k!=i) { #nref is allowed to be either 1 or equal to nexp 
+            next
+          }
+          exp_data <- exp[ , , i]
+          obs_data <- obs[ , , j]
+          ref_data <- ref[ , , k] 
+          exp_mean <- rowMeans(exp_data)
+          obs_mean <- rowMeans(obs_data)
+          ref_mean <- rowMeans(ref_data)
+          good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean)
+          dum <- match(indices_for_clim, which(good_values))
+          good_indices_for_clim <- dum[!is.na(dum)]
+          if (f_NAs <= sum(good_values) / length(good_values)) {
+            rps_exp[good_values,i,j] <- .RPS(exp = exp[good_values,,i], obs = obs[good_values,,j], 
+                                             time_dim = time_dim, memb_dim = memb_dim, 
+                                             cat_dim = cat_dim, dat_dim = NULL, 
+                                             prob_thresholds = prob_thresholds,
+                                             indices_for_clim = good_indices_for_clim, 
+                                             Fair = Fair, weights = weights_exp,
+                                             cross.val = cross.val, na.rm = na.rm)
+             rps_ref[good_values,i,j] <- .RPS(exp = ref[good_values,,k], obs = obs[good_values,,j], 
+                                              time_dim = time_dim, memb_dim = memb_dim,
+                                              cat_dim = cat_dim, dat_dim = NULL, 
+                                              prob_thresholds = prob_thresholds, 
+                                              indices_for_clim = good_indices_for_clim, 
+                                              Fair = Fair, weights = weights_ref, 
+                                              na.rm = na.rm, cross.val = cross.val)
+          }
+        }
+      }
+    }
+    if (is.null(dat_dim)) {
+      dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim]
+    }
+  } else if (is.null(ref)) { 
+    rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
+                    cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds,
+                    indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, 
+                    cross.val = cross.val, na.rm = na.rm)
+
+    if (is.null(dat_dim)) {
+      nexp <- 1
+      nobs <- 1
+      } else {
+      nexp <- as.numeric(dim(exp)[dat_dim])
+      nobs <- as.numeric(dim(obs)[dat_dim])
+    }
+
   # RPS of the reference forecast
-  if (is.null(ref)) { ## using climatology as reference forecast
+
+# if (is.null(ref)) { ## using climatology as reference forecast
     if (!is.null(memb_dim)) {
       if (!memb_dim %in% names(dim(obs))) {
         obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
       }
     }
+
+    rps_ref <- array(NaN,dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs))
+
     if (is.null(dat_dim)) {
       dim(obs) <- c(dim(obs), nobs = nobs)
+      dim(exp) <- c(dim(exp), nexp = nexp)
+      dim(rps_exp) <- dim(rps_ref)
     }
-    rps_ref <- array(dim = c(dim(obs)[time_dim], nobs = nobs))
-
-    for (j in 1:nobs) {
-      if (is.null(cat_dim)) {  # calculate probs
-        obs_data <- obs[ , , j]
-        if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
-        obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, 
-                               prob_thresholds = prob_thresholds, 
-                               weights = NULL, cross.val = cross.val)
-        # obs_probs: [bin, sdate] 
-      } else {
-        obs_probs <- t(obs[ , , j])
-      }
 
-      good_values <- !is.na(colMeans(obs_probs))
+    # Use good values only
 
-      if (isTRUE(na.rm)) {
-        f_NAs <- 0
-      } else if (isFALSE(na.rm)) {
-        f_NAs <- 1
-      } else {
-        f_NAs <- na.rm
-      }
+    for (i in 1:nexp) {
+      for (j in 1:nobs) {
+        # Use good values only
+        good_values <- !is.na(rps_exp[, i, j])
+        if (f_NAs <= sum(good_values) / length(good_values)) {
+          obs_data <- obs[good_values, , j]
+          if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
+
+          if (is.null(cat_dim)) {  # calculate probs
 
-      if (f_NAs <= sum(good_values) / length(good_values)) {
+	    # Subset indices_for_clim
+            dum <- match(indices_for_clim, which(good_values))
+            good_indices_for_clim <- dum[!is.na(dum)]
+            obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, 
+                                 prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
+            # obs_probs: [bin, sdate] 
+          } else {
+            obs_probs <- t(obs_data)
+          }
  
         clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 
                         1 - prob_thresholds[length(prob_thresholds)])
@@ -444,8 +513,11 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
         # Calculate RPS for each time step
         probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
         probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
-        rps_ref[ , j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
-    #   if (Fair) { # FairRPS
+        rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
+        } else {
+        rps_ref[, i, j] <- NA
+        }
+    	#   if (Fair) { # FairRPS
     #     ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
     #     R <- dim(exp)[2]  #memb
     #     R_new <- Inf
@@ -454,36 +526,35 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     #     rps_ref <- rps_ref + adjustment
     #   }
 
-      } else {
-        rps_ref[ , j] <- as.numeric(NA)
       }
     }
 
     if (is.null(dat_dim)) {
-      dim(rps_ref) <- dim(exp)[time_dim]
+      dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim]
     }
 
-  } else { # use "ref" as reference forecast
-    if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) {
-      remove_dat_dim <- TRUE
-      ref <- InsertDim(ref, posdim = 3, lendim = 1, name = dat_dim)
-        if (!is.null(weights_ref)) {
-          weights_ref <- InsertDim(weights_ref, posdim = 3, lendim = 1, name = dat_dim)
-        }
-    } else {
-      remove_dat_dim <- FALSE
-    }
-
-    rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
-                    cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, 
-                    indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, 
-                    na.rm = na.rm, cross.val = cross.val)
-    if (!is.null(dat_dim)) {
-      if (isTRUE(remove_dat_dim)) {
-        dim(rps_ref) <- dim(rps_ref)[-2]
-      }
-    }
   }
+#    else { # use "ref" as reference forecast
+#    if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) {
+#      remove_dat_dim <- TRUE
+#      ref <- InsertDim(ref, posdim = 3, lendim = 1, name = dat_dim)
+#        if (!is.null(weights_ref)) {
+#          weights_ref <- InsertDim(weights_ref, posdim = 3, lendim = 1, name = dat_dim)
+#        }
+#    } else {
+#      remove_dat_dim <- FALSE
+#    }
+
+#    rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
+#                    cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, 
+#                    indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, 
+#                    na.rm = na.rm, cross.val = cross.val)
+#    if (!is.null(dat_dim)) {
+#      if (isTRUE(remove_dat_dim)) {
+#        dim(rps_ref) <- dim(rps_ref)[-2]
+#      }
+#    }
+#  }
 
 #----------------------------------------------
   # Calculate RPSS
@@ -500,7 +571,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
       for (i in 1:nexp) {
         for (j in 1:nobs) {
           rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j]
-          ind_nonNA <- !is.na(rps_exp[, i, j]) & !is.na(rps_ref[, j])
+          ind_nonNA <- !is.na(rps_exp[, i, j]) 
           if (!any(ind_nonNA)) {
             sign[i, j] <- NA
           } else {
@@ -512,7 +583,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
       for (i in 1:nexp) {
         for (j in 1:nobs) {
           rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j]
-          ind_nonNA <- !is.na(rps_exp[, i, j]) & !is.na(rps_ref[, i, j])
+          ind_nonNA <- !is.na(rps_exp[, i, j]) 
           if (!any(ind_nonNA)) {
             sign[i, j] <- NA
           } else {
@@ -528,7 +599,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE)
 
     # Significance
-    ind_nonNA <- !is.na(rps_exp) & !is.na(rps_ref)
+    ind_nonNA <- !is.na(rps_exp) 
     if (!any(ind_nonNA)) {
       sign <- NA
     } else {
-- 
GitLab


From b785beabdd75b1dd3b38c474fec9fa34f1ecf09c Mon Sep 17 00:00:00 2001
From: eduzenli <eren.duzenli@bsc.es>
Date: Mon, 21 Aug 2023 16:46:41 +0200
Subject: [PATCH 08/10] fine tuning

---
 R/RPSS.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/RPSS.R b/R/RPSS.R
index 45d8a8a..fda2beb 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -491,7 +491,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
         good_values <- !is.na(rps_exp[, i, j])
         if (f_NAs <= sum(good_values) / length(good_values)) {
           obs_data <- obs[good_values, , j]
-          if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2])
+          if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data),1)
 
           if (is.null(cat_dim)) {  # calculate probs
 
-- 
GitLab


From c5777a9ce4ca3b455aa38c4dc61badda9ad25dbc Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 6 Sep 2023 11:55:17 +0200
Subject: [PATCH 09/10] refine code

---
 R/RPSS.R | 75 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/R/RPSS.R b/R/RPSS.R
index fda2beb..9f41988 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -5,7 +5,7 @@
 #'assess whether a forecast presents an improvement or worsening with respect to
 #'a reference forecast. The RPSS ranges between minus infinite and 1. If the 
 #'RPSS is positive, it indicates that the forecast has higher skill than the 
-#'reference forecast, while a negative value means that it has a lower skill. 
+#'reference forecast, while a negative value means that it has a lower skill.\cr 
 #'Examples of reference forecasts are the climatological forecast (same 
 #'probabilities for all categories for all time steps), persistence, a previous
 #'model version, and another model. It is computed as 
@@ -13,8 +13,10 @@
 #'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, 
 #'2016).\cr  
 #'The function accepts either the ensemble members or the probabilities of
-#' each data as inputs. If there is more than one dataset, RPSS will be 
-#' computed for each pair of exp and obs data.
+#'each data as inputs. If there is more than one dataset, RPSS will be 
+#'computed for each pair of exp and obs data. The NA ratio of data will be  
+#'examined before the calculation. If the ratio is higher than the threshold
+#'(assigned by parameter \code{na.rm}), NA will be returned directly.
 #'
 #'@param exp A named numerical array of either the forecast with at least time
 #'  and member dimensions, or the probabilities with at least time and category
@@ -384,10 +386,6 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
   # obs: [sdate, bin, (dat)]
   # ref: [sdate, bin, (dat)] or NULL
 
-#----------------------------------------------
-  # Calculate RPS
-
-  # RPS of the forecast
   if (isTRUE(na.rm)) {
     f_NAs <- 0
   } else if (isFALSE(na.rm)) {
@@ -396,40 +394,53 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     f_NAs <- na.rm
   }
 
+  if (is.null(dat_dim)) {
+    nexp <- 1
+    nobs <- 1
+  } else {
+    nexp <- as.numeric(dim(exp)[dat_dim])
+    nobs <- as.numeric(dim(obs)[dat_dim])
+  }
+  
+  # Calculate RPS
+
   if (!is.null(ref)) {
+
+    # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref
+    ## Insert memb_dim in obs
+    if (!is.null(memb_dim)) {
+      if (!memb_dim %in% names(dim(obs))) {
+        obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
+      }
+    }
+    ## Insert dat_dim 
     if (is.null(dat_dim)) {
-      nexp <- 1
-      nobs <- 1
+      dim(exp) <- c(dim(exp), dat = nexp)
+      dim(obs) <- c(dim(obs), dat = nobs)
+    }
+    if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) {
       nref <- 1
-      dim(obs) <- c(dim(obs), nobs = nobs)
-      dim(exp) <- c(dim(exp), nexp = nexp)
-      dim(ref) <- c(dim(ref), nexp = nref)
+      dim(ref) <- c(dim(ref), dat = nef)
     } else {
-      nexp <- as.numeric(dim(exp)[dat_dim])
-      nobs <- as.numeric(dim(obs)[dat_dim]) 
-      nref <- as.numeric(dim(ref)[dat_dim])
-      if (is.na(nref)) {
-        nref<-1
-        dim(ref) <- c(dim(ref), nref = nref)
-      }
+      nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp
     }
-    rps_exp <- array(NaN,dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
-    rps_ref <- array(NaN,dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
+
+    # Find good values then calculate RPS
+    rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
+    rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
     for (i in 1:nexp) {
       for (j in 1:nobs) {
         for (k in 1:nref) {
-          if (nref != 1 & k!=i) { #nref is allowed to be either 1 or equal to nexp 
-            next
-          }
-          exp_data <- exp[ , , i]
-          obs_data <- obs[ , , j]
-          ref_data <- ref[ , , k] 
+          exp_data <- exp[, , i]
+          obs_data <- obs[, , j]
+          ref_data <- ref[, , k] 
           exp_mean <- rowMeans(exp_data)
           obs_mean <- rowMeans(obs_data)
           ref_mean <- rowMeans(ref_data)
           good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean)
           dum <- match(indices_for_clim, which(good_values))
           good_indices_for_clim <- dum[!is.na(dum)]
+
           if (f_NAs <= sum(good_values) / length(good_values)) {
             rps_exp[good_values,i,j] <- .RPS(exp = exp[good_values,,i], obs = obs[good_values,,j], 
                                              time_dim = time_dim, memb_dim = memb_dim, 
@@ -452,19 +463,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     if (is.null(dat_dim)) {
       dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim]
     }
-  } else if (is.null(ref)) { 
+
+  } else {  # ref is NULL 
     rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
                     cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds,
                     indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, 
                     cross.val = cross.val, na.rm = na.rm)
 
-    if (is.null(dat_dim)) {
-      nexp <- 1
-      nobs <- 1
-      } else {
-      nexp <- as.numeric(dim(exp)[dat_dim])
-      nobs <- as.numeric(dim(obs)[dat_dim])
-    }
 
   # RPS of the reference forecast
 
-- 
GitLab


From 778c376967d5716bc2a95b77407138d37883bc47 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 7 Sep 2023 17:21:33 +0200
Subject: [PATCH 10/10] Clean code, create unit tests for na.rm, update doc

---
 R/RPSS.R                   | 149 ++++++++++++++-----------------------
 man/RPSS.Rd                |  22 +++---
 tests/testthat/test-RPSS.R |  68 +++++++++++++++++
 3 files changed, 135 insertions(+), 104 deletions(-)

diff --git a/R/RPSS.R b/R/RPSS.R
index 9f41988..7e2b3ac 100644
--- a/R/RPSS.R
+++ b/R/RPSS.R
@@ -16,7 +16,9 @@
 #'each data as inputs. If there is more than one dataset, RPSS will be 
 #'computed for each pair of exp and obs data. The NA ratio of data will be  
 #'examined before the calculation. If the ratio is higher than the threshold
-#'(assigned by parameter \code{na.rm}), NA will be returned directly.
+#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are 
+#'counted by per-pair method, which means that only the time steps that all the
+#'datasets have values count as non-NA values. 
 #'
 #'@param exp A named numerical array of either the forecast with at least time
 #'  and member dimensions, or the probabilities with at least time and category
@@ -376,7 +378,6 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
                   dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
                   Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE,
                   na.rm = FALSE) {
-
   #--- if memb_dim: 
   # exp: [sdate, memb, (dat)]
   # obs: [sdate, (memb), (dat)]
@@ -413,14 +414,16 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
         obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
       }
     }
-    ## Insert dat_dim 
+    ## Insert dat_dim
     if (is.null(dat_dim)) {
-      dim(exp) <- c(dim(exp), dat = nexp)
       dim(obs) <- c(dim(obs), dat = nobs)
+      dim(exp) <- c(dim(exp), dat = nexp)
+      if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp)
     }
     if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) {
       nref <- 1
-      dim(ref) <- c(dim(ref), dat = nef)
+      dim(ref) <- c(dim(ref), dat = nref)
+      if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref)
     } else {
       nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp
     }
@@ -431,9 +434,12 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
     for (i in 1:nexp) {
       for (j in 1:nobs) {
         for (k in 1:nref) {
-          exp_data <- exp[, , i]
-          obs_data <- obs[, , j]
-          ref_data <- ref[, , k] 
+          if (nref != 1 & k!=i) { # if nref is 1 or equal to nexp, calculate rps
+            next
+          }
+          exp_data <- exp[, , i, drop = F]
+          obs_data <- obs[, , j, drop = F]
+          ref_data <- ref[, , k, drop = F] 
           exp_mean <- rowMeans(exp_data)
           obs_mean <- rowMeans(obs_data)
           ref_mean <- rowMeans(ref_data)
@@ -442,27 +448,24 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
           good_indices_for_clim <- dum[!is.na(dum)]
 
           if (f_NAs <= sum(good_values) / length(good_values)) {
-            rps_exp[good_values,i,j] <- .RPS(exp = exp[good_values,,i], obs = obs[good_values,,j], 
+            rps_exp[good_values,i,j] <- .RPS(exp = exp[good_values, , i], obs = obs[good_values, , j], 
                                              time_dim = time_dim, memb_dim = memb_dim, 
                                              cat_dim = cat_dim, dat_dim = NULL, 
                                              prob_thresholds = prob_thresholds,
                                              indices_for_clim = good_indices_for_clim, 
-                                             Fair = Fair, weights = weights_exp,
+                                             Fair = Fair, weights = weights_exp[good_values, , i],
                                              cross.val = cross.val, na.rm = na.rm)
-             rps_ref[good_values,i,j] <- .RPS(exp = ref[good_values,,k], obs = obs[good_values,,j], 
+             rps_ref[good_values,i,j] <- .RPS(exp = ref[good_values, , k], obs = obs[good_values, , j], 
                                               time_dim = time_dim, memb_dim = memb_dim,
                                               cat_dim = cat_dim, dat_dim = NULL, 
                                               prob_thresholds = prob_thresholds, 
                                               indices_for_clim = good_indices_for_clim, 
-                                              Fair = Fair, weights = weights_ref, 
+                                              Fair = Fair, weights = weights_ref[good_values, , k], 
                                               na.rm = na.rm, cross.val = cross.val)
           }
         }
       }
     }
-    if (is.null(dat_dim)) {
-      dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim]
-    }
 
   } else {  # ref is NULL 
     rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
@@ -470,121 +473,78 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
                     indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, 
                     cross.val = cross.val, na.rm = na.rm)
 
-
-  # RPS of the reference forecast
-
-# if (is.null(ref)) { ## using climatology as reference forecast
+    # RPS of the reference forecast
     if (!is.null(memb_dim)) {
       if (!memb_dim %in% names(dim(obs))) {
         obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
       }
     }
 
-    rps_ref <- array(NaN,dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs))
+    rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs))
 
     if (is.null(dat_dim)) {
       dim(obs) <- c(dim(obs), nobs = nobs)
-      dim(exp) <- c(dim(exp), nexp = nexp)
+      dim(exp) <- c(dim(exp), nexp = nexp) 
       dim(rps_exp) <- dim(rps_ref)
     }
 
-    # Use good values only
-
     for (i in 1:nexp) {
       for (j in 1:nobs) {
         # Use good values only
         good_values <- !is.na(rps_exp[, i, j])
         if (f_NAs <= sum(good_values) / length(good_values)) {
           obs_data <- obs[good_values, , j]
-          if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data),1)
+          if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1)
 
           if (is.null(cat_dim)) {  # calculate probs
-
 	    # Subset indices_for_clim
             dum <- match(indices_for_clim, which(good_values))
             good_indices_for_clim <- dum[!is.na(dum)]
             obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, 
                                  prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val)
-            # obs_probs: [bin, sdate] 
           } else {
             obs_probs <- t(obs_data)
           }
+          # obs_probs: [bin, sdate]
  
-        clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 
-                        1 - prob_thresholds[length(prob_thresholds)])
-        clim_probs <- array(clim_probs, dim = dim(obs_probs))
-        # clim_probs: [bin, sdate]
-
-        # Calculate RPS for each time step
-        probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
-        probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
-        rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
-        } else {
-        rps_ref[, i, j] <- NA
+          clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 
+                          1 - prob_thresholds[length(prob_thresholds)])
+          clim_probs <- array(clim_probs, dim = dim(obs_probs))
+          # clim_probs: [bin, sdate]
+
+          # Calculate RPS for each time step
+          probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
+          probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
+          rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
         }
-    	#   if (Fair) { # FairRPS
-    #     ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
-    #     R <- dim(exp)[2]  #memb
-    #     R_new <- Inf
-    #     adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
-    #     adjustment <- apply(adjustment, 2, sum)
-    #     rps_ref <- rps_ref + adjustment
-    #   }
+    	# if (Fair) { # FairRPS
+        #   ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps]
+        #   R <- dim(exp)[2]  #memb
+        #   R_new <- Inf
+        #   adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
+        #   adjustment <- apply(adjustment, 2, sum)
+        #   rps_ref <- rps_ref + adjustment
+        # }
 
       }
     }
+  }
 
-    if (is.null(dat_dim)) {
-      dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim]
-    }
-
+  if (is.null(dat_dim)) {
+    dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim]
   }
-#    else { # use "ref" as reference forecast
-#    if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) {
-#      remove_dat_dim <- TRUE
-#      ref <- InsertDim(ref, posdim = 3, lendim = 1, name = dat_dim)
-#        if (!is.null(weights_ref)) {
-#          weights_ref <- InsertDim(weights_ref, posdim = 3, lendim = 1, name = dat_dim)
-#        }
-#    } else {
-#      remove_dat_dim <- FALSE
-#    }
-
-#    rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
-#                    cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, 
-#                    indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, 
-#                    na.rm = na.rm, cross.val = cross.val)
-#    if (!is.null(dat_dim)) {
-#      if (isTRUE(remove_dat_dim)) {
-#        dim(rps_ref) <- dim(rps_ref)[-2]
-#      }
-#    }
-#  }
 
 #----------------------------------------------
   # Calculate RPSS
 
   if (!is.null(dat_dim)) {
-    # rps_exp: [sdate, nexp, nobs]
-    # rps_ref: [sdate, (nexp), nobs]
-    rps_exp_mean <- MeanDims(rps_exp, time_dim, na.rm = TRUE)
-    rps_ref_mean <- MeanDims(rps_ref, time_dim, na.rm = TRUE)
+    # rps_exp and rps_ref: [sdate, nexp, nobs]
+    rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE)
+    rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE)
     rpss <- array(dim = c(nexp = nexp, nobs = nobs))
     sign <- array(dim = c(nexp = nexp, nobs = nobs))
 
-    if (length(dim(rps_ref_mean)) == 1) {  # rps_ref: [sdate, nobs]
-      for (i in 1:nexp) {
-        for (j in 1:nobs) {
-          rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j]
-          ind_nonNA <- !is.na(rps_exp[, i, j]) 
-          if (!any(ind_nonNA)) {
-            sign[i, j] <- NA
-          } else {
-            sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, j])$sign
-          }
-        }
-      }
-    } else {  # rps_ref: [sdate, nexp, nobs]
+    if (any(!is.na(rps_exp_mean))) {
       for (i in 1:nexp) {
         for (j in 1:nobs) {
           rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j]
@@ -598,22 +558,21 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member',
       }
     }
 
-  } else {  # dat_dim is NULL
+    # Turn NaN into NA
+    if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA
 
-    # rps_exp and rps_ref: [sdate]
-    rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE)
+  } else {  # dat_dim is NULL
 
-    # Significance
     ind_nonNA <- !is.na(rps_exp) 
     if (!any(ind_nonNA)) {
+      rpss <- NA
       sign <- NA
     } else {
+      # rps_exp and rps_ref: [sdate]
+      rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE)
       sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], sign = T, pval = F)$sign
     }
   }
-  
+
   return(list(rpss = rpss, sign = sign))
 }
-
-
-
diff --git a/man/RPSS.Rd b/man/RPSS.Rd
index a9dd7d7..0cf7ba5 100644
--- a/man/RPSS.Rd
+++ b/man/RPSS.Rd
@@ -80,14 +80,14 @@ consistency between the weighted and unweighted methodologies is desired.}
 
 \item{weights_ref}{Same as 'weights_exp' but for the reference forecast.}
 
-\item{cross.val}{A logical indicating whether to compute the thresholds between 
-probabilistics categories in cross-validation.
-The default value is FALSE.}
+\item{cross.val}{A logical indicating whether to compute the thresholds
+between probabilistics categories in cross-validation. The default value is
+FALSE.}
 
-\item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric,
-it shows the lower limit for the fraction of the non-NA values. The function 
-returns NA, if the fraction of non-NA values in the provided data is less than 
-na.rm. Otherwise, RPSS will be calculated.}
+\item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric, it 
+means the lower limit for the fraction of the non-NA values. 1 is equal to 
+FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). 
+than na.rm. Otherwise, RPS will be calculated. The default value is FALSE.}
 
 \item{ncores}{An integer indicating the number of cores to use for parallel 
 computation. The default value is NULL.}
@@ -110,7 +110,7 @@ based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to
 assess whether a forecast presents an improvement or worsening with respect to
 a reference forecast. The RPSS ranges between minus infinite and 1. If the 
 RPSS is positive, it indicates that the forecast has higher skill than the 
-reference forecast, while a negative value means that it has a lower skill. 
+reference forecast, while a negative value means that it has a lower skill.\cr 
 Examples of reference forecasts are the climatological forecast (same 
 probabilities for all categories for all time steps), persistence, a previous
 model version, and another model. It is computed as 
@@ -119,7 +119,11 @@ based on a Random Walk test at the 95% confidence level (DelSole and Tippett,
 2016).\cr  
 The function accepts either the ensemble members or the probabilities of
 each data as inputs. If there is more than one dataset, RPSS will be 
-computed for each pair of exp and obs data.
+computed for each pair of exp and obs data. The NA ratio of data will be  
+examined before the calculation. If the ratio is higher than the threshold
+(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are 
+counted by per-pair method, which means that only the time steps that all the
+datasets have values count as non-NA values.
 }
 \examples{
 set.seed(1)
diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R
index f054325..81c5a84 100644
--- a/tests/testthat/test-RPSS.R
+++ b/tests/testthat/test-RPSS.R
@@ -15,6 +15,11 @@ exp1_2 <- GetProbs(exp1, memb_dim = 'member')
 obs1_2 <- GetProbs(obs1, memb_dim = NULL)
 ref1_2 <- GetProbs(ref1, memb_dim = 'member')
 
+# dat1_3: NAs
+exp1_3 <- exp1; exp1_3[1, 2, 1] <- NA
+obs1_3 <- obs1; obs1_3[2, 1] <- NA
+ref1_3 <- ref1; ref1_3[1, 3, 1] <- NA
+
 # dat2
 set.seed(1)
 exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10))
@@ -49,6 +54,12 @@ weights_exp4 <- array(abs(rnorm(60)), dim = c(member = 2, sdate = 10, dataset =
 set.seed(5)
 weights_ref4 <- array(abs(rnorm(20)), dim = c(member = 2, sdate = 10))
 
+# dat4_2: NAs
+exp4_2 <- exp4; exp4_2[1, 2, 1, 1] <- NA
+obs4_2 <- obs4; obs4_2[1, 1:4, 1, 1] <- NA
+ref4_2 <- ref4
+
+
 ##############################################
 test_that("1. Input checks", {
   # exp and obs (1)
@@ -268,6 +279,29 @@ test_that("2. Output checks: dat1", {
     RPSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, cat_dim = 'bin')
   )
 
+  # dat1_3
+  expect_equal(
+    as.vector(RPSS(exp1_3, obs1_3)$rpss),
+    c(NA, -0.05263158),
+    tolerance = 0.0001
+  )
+  expect_equal(
+    as.vector(RPSS(exp1_3, obs1_3)$sign),
+    c(NA, FALSE)
+  )
+  expect_equal(
+    as.vector(RPSS(exp1_3, obs1_3, na.rm = T)$rpss),
+    c(0.16666667, -0.05263158),
+    tolerance = 0.0001
+  )
+  expect_equal(
+    as.vector(RPSS(exp1_3, obs1_3, na.rm = T)$sign),
+    c(F, F)
+  )
+  expect_equal(
+    as.vector(RPSS(exp1_3, obs1_3, na.rm = 0.9)$sign),
+    c(F, F)
+  )
 
 })
 
@@ -434,4 +468,38 @@ test_that("5. Output checks: dat4", {
     RPSS(exp3, obs3, weights_exp = weights3, dat_dim = 'dataset')$rpss
   )
 
+  # dat4_2: NAs
+  expect_equal(
+    as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$rpss[, , 1]),
+    c(NA, NA, NA, NA, c(-0.3076923, 0.1538462)),
+    tolerance = 0.0001
+  )
+  expect_equal(
+    as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$rpss[, , 2]),
+    c(0, 0.1333333, -0.4, 0.1176471, 0, 0.3529412),
+    tolerance = 0.0001
+  )
+  expect_equal(
+    which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$rpss)),
+    which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$sign))
+  )
+  expect_equal(
+    as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.9)$rpss[, , 1]),
+    c(NA, NA, NA, -0.3333333, -0.3076923, 0.1538462),
+    tolerance = 0.0001
+  )
+  expect_equal(
+    which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.9)$rpss)),
+    which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.9)$sign))
+  )
+  expect_equal(
+    as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.6)$rpss[, , 1]),
+    c(0.25,  0.1666667, -0.1666667, -0.3333333, -0.3076923, 0.1538462),
+    tolerance = 0.0001
+  )
+  expect_equal(
+    which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.6)$rpss)),
+    which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.6)$sign))
+  )
+
 })
-- 
GitLab