From 9c99215cf2f49aaa6624c3be06fdd9c788adcdf7 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 17 Mar 2022 11:19:55 +0100
Subject: [PATCH 1/9] first version (without documentation)

---
 R/ResidualCorr.R | 62 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 R/ResidualCorr.R

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
new file mode 100644
index 0000000..74e6a25
--- /dev/null
+++ b/R/ResidualCorr.R
@@ -0,0 +1,62 @@
+
+library(s2dv)
+library(multiApply)
+
+ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = 1){
+  
+  ## Checkings
+  if (!is.array(exp)){stop('"exp" must be an array.')}
+  if (!is.array(obs)){stop('"obs" must be an array.')}
+  if (!is.array(ref)){stop('"ref" must be an array.')}
+  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')}
+  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
+  if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')}
+  if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
+  if (!is.null(member_dim)){
+    if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')}
+  }
+  if (!is.null(ncores)){
+    if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
+  }
+  
+  ## Ensemble mean
+  if (!is.null(member_dim)){
+    exp <- multiApply::Apply(data = exp, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1
+    ref <- multiApply::Apply(data = ref, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1
+  }
+  
+  ## Residual correlation and its significance
+  output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs),
+                              target_dims = time_dim, fun = .ResidualCorr,
+                              time_dim = time_dim, alpha = alpha, ncores = ncores)
+  return(output)
+}
+
+.ResidualCorr <- function(exp, ref, obs, time_dim, alpha){
+  
+  if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){
+    
+    ## Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs')
+    exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals
+    obs_res <- lm(formula = y ~ x, data = list(y = obs, x = ref), na.action = NULL)$residuals
+    
+    ## Residual correlation (and significance)
+    residual_cor <- NULL
+    residual_cor$r <- cor(exp_res,obs_res)
+    
+    ## Significance
+    residual_cor$n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1)
+    t_alpha2_n2 <- qt(p=alpha/2, df = residual_cor$n_eff-2, lower.tail = FALSE)
+    t <- abs(residual_cor$r) * sqrt(residual_cor$n_eff-2) / sqrt(1-residual_cor$r^2)
+    if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){
+      residual_cor$sign <- TRUE
+    } else {
+      residual_cor$sign <- FALSE
+    }
+    
+  } else {
+    residual_cor <- list(r = NA, n_eff = NA, sign = NA)
+  }
+  
+  return(residual_cor)
+}
-- 
GitLab


From 61ece2ebabd2b69f82ee38153d1ad4276fcf65c5 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 17 Mar 2022 17:20:12 +0100
Subject: [PATCH 2/9] included documentation

---
 R/ResidualCorr.R | 59 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index 74e6a25..b9cae9d 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -1,8 +1,49 @@
-
-library(s2dv)
-library(multiApply)
-
-ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = 1){
+#'Compute the residual correlation
+#'
+#'The residual correlation assesses whether a forecast captures any of the observed variability that is not 
+#'already captured by a reference forecast (Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.).
+#'The procedure is as follows: the residuals of the forecasts and observations are computed by linearly regressing out 
+#'the reference forecast's ensemble mean from the forecasts’ ensemble mean and observations, respectively. Then, the 
+#'residual correlation is computed as the correlation between both residuals. Positive values of the residual correlation
+#'indicate that the forecast capture more observed variability than the reference forecast, while negative values mean 
+#'that the reference forecast capture more. The significance of the residual correlation is computed with a two-sided t-test
+#'(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using a effective degrees of freedom to account for 
+#'the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336).
+#'
+#'@param exp A numerical array with the forecast with, at least, time dimension. 
+#'@param ref A numerical array with the reference forecast data with, at least, time dimension.
+#'  The length of the time dimension must be the same as for "exp".
+#'@param obs A numerical array with the observations with, at least, time dimension. 
+#'  The length of the time dimension must be the same as for "exp".
+#'@param time_dim A character string indicating the name of the time dimension.
+#'  The default value is 'year'.
+#'@param member_dim A character string indicating the name of the member dimension to compute the 
+#'  ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
+#'  be provided directly to the function. The default value is NULL.
+#'@param alpha Significance level to be used in the statistical significance test. The default value
+#'  is 0.05.
+#'@param ncores An integer indicating the number of cores to use for parallel 
+#'  computation. The default value is NULL.
+#'
+#'@return A list of 2:
+#'\item{$r}{
+#'  A numerical array with the residual correlation with the same dimensions as the input arrays 
+#'  except "time_dim" (and "member_dim" if provided).
+#'}
+#'\item{$sign}{
+#'  A logical array with the statistical significance of the residual correlation with the same 
+#'  dimensions as the input arrays except "time_dim" (and "member_dim" if provided).
+#'}
+#'
+#'@examples
+#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
+#' ResidualCorr(exp, ref, obs, member_dim = 'member')
+#'
+#'@import multiApply
+#'@export
+ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
@@ -45,9 +86,9 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al
     residual_cor$r <- cor(exp_res,obs_res)
     
     ## Significance
-    residual_cor$n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1)
-    t_alpha2_n2 <- qt(p=alpha/2, df = residual_cor$n_eff-2, lower.tail = FALSE)
-    t <- abs(residual_cor$r) * sqrt(residual_cor$n_eff-2) / sqrt(1-residual_cor$r^2)
+    n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1)
+    t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE)
+    t <- abs(residual_cor$r) * sqrt(n_eff-2) / sqrt(1-residual_cor$r^2)
     if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){
       residual_cor$sign <- TRUE
     } else {
@@ -55,7 +96,7 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al
     }
     
   } else {
-    residual_cor <- list(r = NA, n_eff = NA, sign = NA)
+    residual_cor <- list(r = NA, sign = NA)
   }
   
   return(residual_cor)
-- 
GitLab


From 6e50719a3978f29cad701afcf85ea1bc187de60a Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 17 Mar 2022 17:56:29 +0100
Subject: [PATCH 3/9] .

---
 R/ResidualCorr.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index b9cae9d..a69178a 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -1,4 +1,4 @@
-#'Compute the residual correlation
+#'Compute the residual correlation and its significance
 #'
 #'The residual correlation assesses whether a forecast captures any of the observed variability that is not 
 #'already captured by a reference forecast (Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.).
-- 
GitLab


From bc63bef7eb49cfbc8854e749c415605cf852cbb8 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 24 Mar 2022 13:29:13 +0100
Subject: [PATCH 4/9] added method parameter to allow computing pearson,
 kendall and spearman coefficients

---
 R/ResidualCorr.R | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index a69178a..bd551e5 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -20,6 +20,8 @@
 #'@param member_dim A character string indicating the name of the member dimension to compute the 
 #'  ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
 #'  be provided directly to the function. The default value is NULL.
+#'@param method A character string indicating the correlation coefficient to be computed ("pearson", 
+#'  "kendall", or "spearman"). The default value is "pearson".
 #'@param alpha Significance level to be used in the statistical significance test. The default value
 #'  is 0.05.
 #'@param ncores An integer indicating the number of cores to use for parallel 
@@ -43,7 +45,7 @@
 #'
 #'@import multiApply
 #'@export
-ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = NULL){
+ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = 0.05, ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
@@ -56,6 +58,7 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al
   if (!is.null(member_dim)){
     if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')}
   }
+  if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')}
   if (!is.null(ncores)){
     if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
   }
@@ -69,11 +72,12 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al
   ## Residual correlation and its significance
   output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs),
                               target_dims = time_dim, fun = .ResidualCorr,
-                              time_dim = time_dim, alpha = alpha, ncores = ncores)
+                              time_dim = time_dim, method = method, 
+                              alpha = alpha, ncores = ncores)
   return(output)
 }
 
-.ResidualCorr <- function(exp, ref, obs, time_dim, alpha){
+.ResidualCorr <- function(exp, ref, obs, time_dim, method, alpha){
   
   if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){
     
@@ -83,7 +87,7 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al
     
     ## Residual correlation (and significance)
     residual_cor <- NULL
-    residual_cor$r <- cor(exp_res,obs_res)
+    residual_cor$r <- cor(x = exp_res, y = obs_res, method = method)
     
     ## Significance
     n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1)
-- 
GitLab


From c7a87618a84302edb959436756249351988b867b Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Thu, 7 Apr 2022 16:03:49 +0200
Subject: [PATCH 5/9] p.val is returned if alpha=NULL

---
 R/ResidualCorr.R | 51 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index bd551e5..645e70c 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -22,19 +22,25 @@
 #'  be provided directly to the function. The default value is NULL.
 #'@param method A character string indicating the correlation coefficient to be computed ("pearson", 
 #'  "kendall", or "spearman"). The default value is "pearson".
-#'@param alpha Significance level to be used in the statistical significance test. The default value
-#'  is 0.05.
+#'@param alpha Significance level to be used in the statistical significance test. 
+#'  If it is a numeric, "sign" will be returned. If NULL, the p-value will be returned instead. 
+#'  The default value is NULL.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
-#'@return A list of 2:
+#'@return A list with:
 #'\item{$r}{
 #'  A numerical array with the residual correlation with the same dimensions as the input arrays 
 #'  except "time_dim" (and "member_dim" if provided).
 #'}
 #'\item{$sign}{
 #'  A logical array with the statistical significance of the residual correlation with the same 
-#'  dimensions as the input arrays except "time_dim" (and "member_dim" if provided).
+#'  dimensions as the input arrays except "time_dim" (and "member_dim" if provided). Returned only 
+#'  if "alpha" is a numeric.
+#'}
+#'\item{$p.val}{
+#'  A numeric array of the p-values with the same dimensions as the input arrays
+#'  except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is NULL.
 #'}
 #'
 #'@examples
@@ -45,7 +51,7 @@
 #'
 #'@import multiApply
 #'@export
-ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = 0.05, ncores = NULL){
+ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = NULL, ncores = NULL){
   
   ## Checkings
   if (!is.array(exp)){stop('"exp" must be an array.')}
@@ -53,7 +59,9 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, me
   if (!is.array(ref)){stop('"ref" must be an array.')}
   if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')}
   if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
-  if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')}
+  if (!is.null(alpha)){
+    if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be NULL or a number between 0 and 1.')}
+  }
   if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
   if (!is.null(member_dim)){
     if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')}
@@ -89,18 +97,35 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, me
     residual_cor <- NULL
     residual_cor$r <- cor(x = exp_res, y = obs_res, method = method)
     
-    ## Significance
+    ## Effective degrees of freedom
     n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1)
-    t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE)
+    
+    ## Statistic
     t <- abs(residual_cor$r) * sqrt(n_eff-2) / sqrt(1-residual_cor$r^2)
-    if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){
-      residual_cor$sign <- TRUE
+    
+    if(is.null(alpha)){ ## p-value
+      
+      residual_cor$p.val <- pt(q = t, df = n_eff-2, lower.tail = FALSE)
+      
+    } else {
+      
+      t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE)
+      if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){
+        residual_cor$sign <- TRUE
+      } else {
+        residual_cor$sign <- FALSE
+      }
+      
+    }
+    
+  } else {## Significance
+    
+    if (is.null(alpha)){
+      residual_cor <- list(r = NA, p.val = NA)
     } else {
-      residual_cor$sign <- FALSE
+      residual_cor <- list(r = NA, sign = NA)
     }
     
-  } else {
-    residual_cor <- list(r = NA, sign = NA)
   }
   
   return(residual_cor)
-- 
GitLab


From 003a4a10f61fbe9230fb5a366b3881ca955209fa Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 25 Apr 2022 17:26:48 +0200
Subject: [PATCH 6/9] Format adjustment

---
 NAMESPACE           |   1 +
 R/ResidualCorr.R    | 207 +++++++++++++++++++++++++++++---------------
 man/ResidualCorr.Rd |  87 +++++++++++++++++++
 3 files changed, 223 insertions(+), 72 deletions(-)
 create mode 100644 man/ResidualCorr.Rd

diff --git a/NAMESPACE b/NAMESPACE
index ca5d500..841814e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -60,6 +60,7 @@ export(RatioRMS)
 export(RatioSDRMS)
 export(Regression)
 export(Reorder)
+export(ResidualCorr)
 export(SPOD)
 export(Season)
 export(SignalNoiseRatio)
diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index 645e70c..2171da0 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -1,126 +1,189 @@
 #'Compute the residual correlation and its significance
 #'
-#'The residual correlation assesses whether a forecast captures any of the observed variability that is not 
-#'already captured by a reference forecast (Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.).
-#'The procedure is as follows: the residuals of the forecasts and observations are computed by linearly regressing out 
-#'the reference forecast's ensemble mean from the forecasts’ ensemble mean and observations, respectively. Then, the 
-#'residual correlation is computed as the correlation between both residuals. Positive values of the residual correlation
-#'indicate that the forecast capture more observed variability than the reference forecast, while negative values mean 
-#'that the reference forecast capture more. The significance of the residual correlation is computed with a two-sided t-test
-#'(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using a effective degrees of freedom to account for 
-#'the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336).
+#'The residual correlation assesses whether a forecast captures any of the 
+#'observed variability that is not already captured by a reference forecast 
+#'(Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.).
+#'The procedure is as follows: the residuals of the forecasts and observations 
+#'are computed by linearly regressing out the reference forecast's ensemble mean
+#'from the forecasts' ensemble mean and observations, respectively. Then, the 
+#'residual correlation is computed as the correlation between both residuals. 
+#'Positive values of the residual correlation indicate that the forecast capture
+#'more observed variability than the reference forecast, while negative values  
+#'mean that the reference forecast capture more. The significance of the 
+#'residual correlation is computed with a two-sided t-test
+#'(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using an 
+#'effective degrees of freedom to account for the time series' autocorrelation 
+#'(von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336).
 #'
-#'@param exp A numerical array with the forecast with, at least, time dimension. 
-#'@param ref A numerical array with the reference forecast data with, at least, time dimension.
-#'  The length of the time dimension must be the same as for "exp".
-#'@param obs A numerical array with the observations with, at least, time dimension. 
-#'  The length of the time dimension must be the same as for "exp".
+#'@param exp A named numerical array of the forecast with at least time 
+#'  dimension. 
+#'@param ref A named numerical array of the reference forecast data with at 
+#'  least time dimension. The dimensions must be the same as "exp" except 
+#'  'memb_dim'.
+#'@param obs A named numerical array of the observations with at least time 
+#'  dimension. The dimensions must be the same as "exp" except 'memb_dim'.
 #'@param time_dim A character string indicating the name of the time dimension.
 #'  The default value is 'year'.
-#'@param member_dim A character string indicating the name of the member dimension to compute the 
-#'  ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should 
-#'  be provided directly to the function. The default value is NULL.
-#'@param method A character string indicating the correlation coefficient to be computed ("pearson", 
-#'  "kendall", or "spearman"). The default value is "pearson".
-#'@param alpha Significance level to be used in the statistical significance test. 
-#'  If it is a numeric, "sign" will be returned. If NULL, the p-value will be returned instead. 
-#'  The default value is NULL.
+#'@param memb_dim A character string indicating the name of the member dimension
+#'  to compute the ensemble mean of the forecast and reference forecast. If it 
+#'  is NULL, the ensemble mean should be provided directly to the function. The
+#'  default value is NULL.
+#'@param method A character string indicating the correlation coefficient to be
+#'  computed ("pearson", "kendall", or "spearman"). The default value is 
+#'  "pearson".
+#'@param alpha A numeric of the significance level to be used in the statistical
+#'  significance test. If it is a numeric, "sign" will be returned. If NULL, the
+#'  p-value will be returned instead. The default value is NULL.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
 #'@return A list with:
 #'\item{$r}{
-#'  A numerical array with the residual correlation with the same dimensions as the input arrays 
-#'  except "time_dim" (and "member_dim" if provided).
+#'  A numerical array of the residual correlation with the same dimensions as 
+#'  the input arrays except "time_dim" (and "memb_dim" if provided).
 #'}
 #'\item{$sign}{
-#'  A logical array with the statistical significance of the residual correlation with the same 
-#'  dimensions as the input arrays except "time_dim" (and "member_dim" if provided). Returned only 
-#'  if "alpha" is a numeric.
+#'  A logical array of the statistical significance of the residual correlation
+#'  with the same dimensions as the input arrays except "time_dim" (and 
+#'  "memb_dim" if provided). Returned only if "alpha" is a numeric.
 #'}
 #'\item{$p.val}{
 #'  A numeric array of the p-values with the same dimensions as the input arrays
-#'  except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is NULL.
+#'  except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is
+#'  NULL.
 #'}
 #'
 #'@examples
 #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
 #' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
 #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
-#' ResidualCorr(exp, ref, obs, member_dim = 'member')
+#' res <- ResidualCorr(exp, ref, obs, memb_dim = 'member')
 #'
 #'@import multiApply
 #'@export
-ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = NULL, ncores = NULL){
+ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, 
+                         method = 'pearson', alpha = NULL, ncores = NULL) {
   
-  ## Checkings
-  if (!is.array(exp)){stop('"exp" must be an array.')}
-  if (!is.array(obs)){stop('"obs" must be an array.')}
-  if (!is.array(ref)){stop('"ref" must be an array.')}
-  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')}
-  if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')}
-  if (!is.null(alpha)){
-    if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be NULL or a number between 0 and 1.')}
+  # Check inputs
+  ## exp, ref, and obs (1)
+  if (!is.array(exp) | !is.numeric(exp))
+    stop('Parameter "exp" must be a numeric array.')
+  if (!is.array(ref) | !is.numeric(ref))
+    stop('Parameter "ref" must be a numeric array.')
+  if (!is.array(obs) | !is.numeric(obs))
+    stop('Parameter "obs" must be a numeric array.')
+
+  ## time_dim
+  if (!is.character(time_dim) | length(time_dim) != 1)
+    stop('Parameter "time_dim" must be a character string.')
+  if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(ref)) | 
+      !time_dim %in% names(dim(obs))) {
+    stop("Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension.")
   }
-  if (!is.character(time_dim)){stop('"time_dim" must be a string.')}
-  if (!is.null(member_dim)){
-    if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')}
+  ## memb_dim
+  if (!is.null(memb_dim)) {
+    if (!is.character(memb_dim) | length(memb_dim) > 1) {
+      stop("Parameter 'memb_dim' must be a character string.")
+    }
+    if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(ref))) {
+      stop("Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension.")
+    }
   }
-  if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')}
-  if (!is.null(ncores)){
-    if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')}
+  ## exp, ref, and obs (2)
+  name_exp <- sort(names(dim(exp)))
+  name_ref <- sort(names(dim(ref)))
+  name_obs <- sort(names(dim(obs)))
+  if (!is.null(memb_dim)) {
+    name_exp <- name_exp[-which(name_exp == memb_dim)]
+    name_ref <- name_ref[-which(name_ref == memb_dim)]
   }
-  
-  ## Ensemble mean
-  if (!is.null(member_dim)){
-    exp <- multiApply::Apply(data = exp, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1
-    ref <- multiApply::Apply(data = ref, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1
+  if (!all.equal(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) {
+    stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ",
+                "all dimensions expect 'memb_dim'."))
   }
-  
+  ## method
+  if (!method %in% c("pearson", "kendall", "spearman")) {
+    stop('Parameter "method" must be "pearson", "kendall", or "spearman".')
+  }
+  ## alpha
+  if (!is.null(alpha)) {
+    if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 |  
+        length(alpha) > 1)) {
+      stop('Parameter "alpha" must be NULL or a number between 0 and 1.')
+    }
+  }
+  ## ncores
+  if (!is.null(ncores)) {
+    if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
+        length(ncores) > 1)) {
+      stop('Parameter "ncores" must be either NULL or a positive integer.')
+    }
+  }
+
+  ###############################
+
+  # Calculate ensemble mean
+  dim_exp <- dim(exp)
+  dim_ref <- dim(ref)
+  dim_obs <- dim(obs)
+
+  if (!is.null(memb_dim)) {
+    exp_memb_dim_ind <- which(names(dim_exp) == memb_dim)
+    ref_memb_dim_ind <- which(names(dim_ref) == memb_dim)
+    exp <- apply(exp, c(1:length(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE)
+    ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE)
+    if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim]))
+    if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim]))
+  }
+ 
+  # output_dims
+  if (is.null(alpha)) {
+    output_dims <- list(r = NULL, p.val = NULL)
+  } else {
+    output_dims <- list(r = NULL, sign = NULL)
+  }
+ 
   ## Residual correlation and its significance
-  output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs),
-                              target_dims = time_dim, fun = .ResidualCorr,
-                              time_dim = time_dim, method = method, 
-                              alpha = alpha, ncores = ncores)
+  output <- Apply(data = list(exp = exp, ref = ref, obs = obs),
+                  target_dims = time_dim, fun = .ResidualCorr,
+                  method = method, 
+                  alpha = alpha, ncores = ncores)
   return(output)
 }
 
-.ResidualCorr <- function(exp, ref, obs, time_dim, method, alpha){
-  
-  if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){
+.ResidualCorr <- function(exp, ref, obs, method, alpha) {
+  # exp and ref and obs: [time]
+
+  if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)) {
     
-    ## Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs')
+    # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs')
     exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals
     obs_res <- lm(formula = y ~ x, data = list(y = obs, x = ref), na.action = NULL)$residuals
     
-    ## Residual correlation (and significance)
+    # Residual correlation (and significance)
     residual_cor <- NULL
     residual_cor$r <- cor(x = exp_res, y = obs_res, method = method)
     
-    ## Effective degrees of freedom
-    n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1)
+    # Effective degrees of freedom
+    n_eff <- .Eno(x = obs_res, na.action = na.pass)
     
     ## Statistic
-    t <- abs(residual_cor$r) * sqrt(n_eff-2) / sqrt(1-residual_cor$r^2)
+    t <- abs(residual_cor$r) * sqrt(n_eff - 2) / sqrt(1 - residual_cor$r^2)
     
-    if(is.null(alpha)){ ## p-value
-      
-      residual_cor$p.val <- pt(q = t, df = n_eff-2, lower.tail = FALSE)
-      
+    if (is.null(alpha)) { # p-value
+      residual_cor$p.val <- pt(q = t, df = n_eff - 2, lower.tail = FALSE)
     } else {
-      
-      t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE)
-      if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){
+      t_alpha2_n2 <- qt(p = alpha / 2, df = n_eff - 2, lower.tail = FALSE)
+      if (!anyNA(c(t,t_alpha2_n2)) & t >= t_alpha2_n2) {
         residual_cor$sign <- TRUE
       } else {
         residual_cor$sign <- FALSE
       }
-      
     }
     
-  } else {## Significance
+  } else { # There is NA
     
-    if (is.null(alpha)){
+    if (is.null(alpha)) {
       residual_cor <- list(r = NA, p.val = NA)
     } else {
       residual_cor <- list(r = NA, sign = NA)
diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd
new file mode 100644
index 0000000..6bc489c
--- /dev/null
+++ b/man/ResidualCorr.Rd
@@ -0,0 +1,87 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ResidualCorr.R
+\name{ResidualCorr}
+\alias{ResidualCorr}
+\title{Compute the residual correlation and its significance}
+\usage{
+ResidualCorr(
+  exp,
+  ref,
+  obs,
+  time_dim = "year",
+  memb_dim = NULL,
+  method = "pearson",
+  alpha = NULL,
+  ncores = NULL
+)
+}
+\arguments{
+\item{exp}{A named numerical array of the forecast with at least time 
+dimension.}
+
+\item{ref}{A named numerical array of the reference forecast data with at 
+least time dimension. The dimensions must be the same as "exp" except 
+'memb_dim'.}
+
+\item{obs}{A named numerical array of the observations with at least time 
+dimension. The dimensions must be the same as "exp" except 'memb_dim'.}
+
+\item{time_dim}{A character string indicating the name of the time dimension.
+The default value is 'year'.}
+
+\item{memb_dim}{A character string indicating the name of the member dimension
+to compute the ensemble mean of the forecast and reference forecast. If it 
+is NULL, the ensemble mean should be provided directly to the function. The
+default value is NULL.}
+
+\item{method}{A character string indicating the correlation coefficient to be
+computed ("pearson", "kendall", or "spearman"). The default value is 
+"pearson".}
+
+\item{alpha}{A numeric of the significance level to be used in the statistical
+significance test. If it is a numeric, "sign" will be returned. If NULL, the
+p-value will be returned instead. The default value is NULL.}
+
+\item{ncores}{An integer indicating the number of cores to use for parallel 
+computation. The default value is NULL.}
+}
+\value{
+A list with:
+\item{$r}{
+ A numerical array of the residual correlation with the same dimensions as 
+ the input arrays except "time_dim" (and "memb_dim" if provided).
+}
+\item{$sign}{
+ A logical array of the statistical significance of the residual correlation
+ with the same dimensions as the input arrays except "time_dim" (and 
+ "memb_dim" if provided). Returned only if "alpha" is a numeric.
+}
+\item{$p.val}{
+ A numeric array of the p-values with the same dimensions as the input arrays
+ except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is
+ NULL.
+}
+}
+\description{
+The residual correlation assesses whether a forecast captures any of the 
+observed variability that is not already captured by a reference forecast 
+(Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.).
+The procedure is as follows: the residuals of the forecasts and observations 
+are computed by linearly regressing out the reference forecast's ensemble mean
+from the forecasts' ensemble mean and observations, respectively. Then, the 
+residual correlation is computed as the correlation between both residuals. 
+Positive values of the residual correlation indicate that the forecast capture
+more observed variability than the reference forecast, while negative values  
+mean that the reference forecast capture more. The significance of the 
+residual correlation is computed with a two-sided t-test
+(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using an 
+effective degrees of freedom to account for the time series' autocorrelation 
+(von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336).
+}
+\examples{
+exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
+obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
+res <- ResidualCorr(exp, ref, obs, memb_dim = 'member')
+
+}
-- 
GitLab


From 7d0833b866bc4ade28efdb9e6a54a56611741789 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 26 Apr 2022 19:01:58 +0200
Subject: [PATCH 7/9] Add params 'N.eff' and 'handle.na'; Change time_dim
 default to 'sdate'; create unit tests

---
 R/ResidualCorr.R                   | 144 +++++++++++++++------
 man/ResidualCorr.Rd                |  25 +++-
 tests/testthat/test-ResidualCorr.R | 197 +++++++++++++++++++++++++++++
 3 files changed, 324 insertions(+), 42 deletions(-)
 create mode 100644 tests/testthat/test-ResidualCorr.R

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index 2171da0..ba00994 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -22,6 +22,11 @@
 #'  'memb_dim'.
 #'@param obs A named numerical array of the observations with at least time 
 #'  dimension. The dimensions must be the same as "exp" except 'memb_dim'.
+#'@param N.eff Effective sample size to be used in the statistical significance
+#'  test. It can be NA (and it will be computed with the s2dv:::.Eno), a 
+#'  numeric (which is used for all cases), or an array with the same dimensions
+#'  as "obs" except "time_dim" (for a particular N.eff to be used for each case)
+#'  . The default value is NA.
 #'@param time_dim A character string indicating the name of the time dimension.
 #'  The default value is 'year'.
 #'@param memb_dim A character string indicating the name of the member dimension
@@ -34,11 +39,17 @@
 #'@param alpha A numeric of the significance level to be used in the statistical
 #'  significance test. If it is a numeric, "sign" will be returned. If NULL, the
 #'  p-value will be returned instead. The default value is NULL.
+#'@param handle.na A charcater string indicating how to handle missing values.
+#'  If "return.na", NAs will be returned for the cases that contain at least one
+#'  NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time 
+#'  steps with no missing values in all "exp", "ref", and "obs" will be used. If
+#'  "na.fail", an error will arise if any of "exp", "ref", or "obs" contains any
+#'  NA. The default value is "return.na".
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
 #'@return A list with:
-#'\item{$r}{
+#'\item{$res.corr}{
 #'  A numerical array of the residual correlation with the same dimensions as 
 #'  the input arrays except "time_dim" (and "memb_dim" if provided).
 #'}
@@ -54,16 +65,17 @@
 #'}
 #'
 #'@examples
-#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
+#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50))
+#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
 #' res <- ResidualCorr(exp, ref, obs, memb_dim = 'member')
 #'
 #'@import multiApply
 #'@export
-ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, 
-                         method = 'pearson', alpha = NULL, ncores = NULL) {
-  
+ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate',
+                         memb_dim = NULL, method = 'pearson', alpha = NULL, 
+                         handle.na = 'return.na', ncores = NULL) {
+
   # Check inputs
   ## exp, ref, and obs (1)
   if (!is.array(exp) | !is.numeric(exp))
@@ -73,6 +85,19 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL,
   if (!is.array(obs) | !is.numeric(obs))
     stop('Parameter "obs" must be a numeric array.')
 
+  ## N.eff
+  if (is.array(N.eff)) {
+    if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.")
+    if (!all(names(dim(N.eff)) %in% names(dim(obs))) |
+        any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) {
+      stop(paste0('If parameter "N.eff" is provided with an array, it must ',
+                  'have the same dimensions as "obs" except "time_dim".'))
+    }
+  } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) {
+    stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ',
+                'the same dimensions as "obs" except "time_dim".'))
+  }
+  
   ## time_dim
   if (!is.character(time_dim) | length(time_dim) != 1)
     stop('Parameter "time_dim" must be a character string.')
@@ -97,7 +122,8 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL,
     name_exp <- name_exp[-which(name_exp == memb_dim)]
     name_ref <- name_ref[-which(name_ref == memb_dim)]
   }
-  if (!all.equal(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) {
+  if (!identical(length(name_exp), length(name_ref), length(name_obs)) |
+      !identical(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) {
     stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ",
                 "all dimensions expect 'memb_dim'."))
   }
@@ -112,6 +138,10 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL,
       stop('Parameter "alpha" must be NULL or a number between 0 and 1.')
     }
   }
+  ## handle.na
+  if (!handle.na %in% c('return.na', 'only.complete.triplets', 'na.fail')) {
+    stop('Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".')
+  }
   ## ncores
   if (!is.null(ncores)) {
     if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
@@ -122,6 +152,10 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL,
 
   ###############################
 
+  # NA check: na.fail
+  if (handle.na == "na.fail" & (anyNA(exp) | anyNA(ref) | anyNA(obs)))
+    stop('The data contain NAs.')
+
   # Calculate ensemble mean
   dim_exp <- dim(exp)
   dim_ref <- dim(ref)
@@ -138,58 +172,94 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL,
  
   # output_dims
   if (is.null(alpha)) {
-    output_dims <- list(r = NULL, p.val = NULL)
+    output_dims <- list(res.corr = NULL, p.val = NULL)
   } else {
-    output_dims <- list(r = NULL, sign = NULL)
+    output_dims <- list(res.corr = NULL, sign = NULL)
   }
  
-  ## Residual correlation and its significance
-  output <- Apply(data = list(exp = exp, ref = ref, obs = obs),
-                  target_dims = time_dim, fun = .ResidualCorr,
-                  method = method, 
-                  alpha = alpha, ncores = ncores)
+
+  # Residual correlation
+  if (is.array(N.eff)) {
+    output <- Apply(data = list(exp = exp, ref = ref,
+                                obs = obs, N.eff = N.eff),
+                    target_dims = list(exp = time_dim, ref = time_dim,
+                                       obs = time_dim, N.eff = NULL),
+                    output_dims = output_dims,
+                    fun = .ResidualCorr, method = method,
+                    alpha = alpha, handle.na = handle.na, ncores = ncores)
+  } else { 
+    output <- Apply(data = list(exp = exp, ref = ref, 
+                                obs = obs),
+                    target_dims = list(exp = time_dim, ref = time_dim, 
+                                       obs = time_dim), 
+                    output_dims = output_dims, N.eff = N.eff,
+                    fun = .ResidualCorr, method = method, 
+                    alpha = alpha, handle.na = handle.na, ncores = ncores)
+  }
+
   return(output)
 }
 
-.ResidualCorr <- function(exp, ref, obs, method, alpha) {
+.ResidualCorr <- function(exp, ref, obs, N.eff, method, alpha, handle.na) {
   # exp and ref and obs: [time]
+  .residual.corr <- function(exp, ref, obs, method, N.eff, alpha) {
 
-  if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)) {
-    
     # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs')
     exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals
     obs_res <- lm(formula = y ~ x, data = list(y = obs, x = ref), na.action = NULL)$residuals
     
     # Residual correlation (and significance)
-    residual_cor <- NULL
-    residual_cor$r <- cor(x = exp_res, y = obs_res, method = method)
+    output <- NULL
+    output$res.corr <- cor(x = exp_res, y = obs_res, method = method)
     
     # Effective degrees of freedom
-    n_eff <- .Eno(x = obs_res, na.action = na.pass)
-    
-    ## Statistic
-    t <- abs(residual_cor$r) * sqrt(n_eff - 2) / sqrt(1 - residual_cor$r^2)
+    if (is.na(N.eff)) {
+      N.eff <- .Eno(x = obs_res, na.action = na.pass)
+    }  
+    t <- abs(output$res.corr) * sqrt(N.eff - 2) / sqrt(1 - output$res.corr^2)
     
     if (is.null(alpha)) { # p-value
-      residual_cor$p.val <- pt(q = t, df = n_eff - 2, lower.tail = FALSE)
+      output$p.val <- pt(q = t, df = N.eff - 2, lower.tail = FALSE)
     } else {
-      t_alpha2_n2 <- qt(p = alpha / 2, df = n_eff - 2, lower.tail = FALSE)
-      if (!anyNA(c(t,t_alpha2_n2)) & t >= t_alpha2_n2) {
-        residual_cor$sign <- TRUE
+      t_alpha2_n2 <- qt(p = alpha / 2, df = N.eff - 2, lower.tail = FALSE)
+      if (!anyNA(c(t, t_alpha2_n2)) & t >= t_alpha2_n2) {
+        output$sign <- TRUE
       } else {
-        residual_cor$sign <- FALSE
+        output$sign <- FALSE
       }
     }
     
-  } else { # There is NA
-    
-    if (is.null(alpha)) {
-      residual_cor <- list(r = NA, p.val = NA)
-    } else {
-      residual_cor <- list(r = NA, sign = NA)
+    return(output)
+  }
+
+
+  #==================================================
+  
+  if (anyNA(exp) | anyNA(ref) | anyNA(obs)) { ## There are NAs 
+    if (handle.na == 'only.complete.triplets') {
+      nna <- is.na(exp) | is.na(ref) | is.na(obs) # A vector of T/F
+      if (all(nna)) stop("There is no complete set of forecasts and observations.")
+      # Remove the incomplete set
+      exp <- exp[!nna]
+      ref <- ref[!nna]
+      obs <- obs[!nna]
+
+      output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, 
+                               N.eff = N.eff, alpha = alpha)
+      
+    } else if (handle.na == 'return.na') {
+      # Data contain NA, return NAs directly without passing to .diff.corr
+      if (is.null(alpha)) {
+        output <- list(res.corr = NA, p.val = NA)
+      } else {
+        output <- list(res.corr = NA, sign = NA)
+      }
     }
     
+  } else { ## There is no NA  
+    output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, 
+                         N.eff = N.eff, alpha = alpha)
   }
-  
-  return(residual_cor)
+
+  return(output)
 }
diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd
index 6bc489c..1e0adad 100644
--- a/man/ResidualCorr.Rd
+++ b/man/ResidualCorr.Rd
@@ -8,10 +8,12 @@ ResidualCorr(
   exp,
   ref,
   obs,
-  time_dim = "year",
+  N.eff = NA,
+  time_dim = "sdate",
   memb_dim = NULL,
   method = "pearson",
   alpha = NULL,
+  handle.na = "return.na",
   ncores = NULL
 )
 }
@@ -26,6 +28,12 @@ least time dimension. The dimensions must be the same as "exp" except
 \item{obs}{A named numerical array of the observations with at least time 
 dimension. The dimensions must be the same as "exp" except 'memb_dim'.}
 
+\item{N.eff}{Effective sample size to be used in the statistical significance
+test. It can be NA (and it will be computed with the s2dv:::.Eno), a 
+numeric (which is used for all cases), or an array with the same dimensions
+as "obs" except "time_dim" (for a particular N.eff to be used for each case)
+. The default value is NA.}
+
 \item{time_dim}{A character string indicating the name of the time dimension.
 The default value is 'year'.}
 
@@ -42,12 +50,19 @@ computed ("pearson", "kendall", or "spearman"). The default value is
 significance test. If it is a numeric, "sign" will be returned. If NULL, the
 p-value will be returned instead. The default value is NULL.}
 
+\item{handle.na}{A charcater string indicating how to handle missing values.
+If "return.na", NAs will be returned for the cases that contain at least one
+NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time 
+steps with no missing values in all "exp", "ref", and "obs" will be used. If
+"na.fail", an error will arise if any of "exp", "ref", or "obs" contains any
+NA. The default value is "return.na".}
+
 \item{ncores}{An integer indicating the number of cores to use for parallel 
 computation. The default value is NULL.}
 }
 \value{
 A list with:
-\item{$r}{
+\item{$res.corr}{
  A numerical array of the residual correlation with the same dimensions as 
  the input arrays except "time_dim" (and "memb_dim" if provided).
 }
@@ -79,9 +94,9 @@ effective degrees of freedom to account for the time series' autocorrelation
 (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336).
 }
 \examples{
-exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50))
-obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50))
+exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
+ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50))
+obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50))
 res <- ResidualCorr(exp, ref, obs, memb_dim = 'member')
 
 }
diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R
new file mode 100644
index 0000000..e1eec1a
--- /dev/null
+++ b/tests/testthat/test-ResidualCorr.R
@@ -0,0 +1,197 @@
+context("s2dv::ResidualCorr tests")
+
+##############################################
+
+# dat1
+set.seed(1)
+exp1 <- array(rnorm(180), dim = c(lat = 3, lon = 2, memb = 3, sdate = 10))
+set.seed(2)
+ref1 <- array(rnorm(120), dim = c(lat = 3, lon = 2, memb = 2, sdate = 10))
+set.seed(3)
+obs1 <- array(rnorm(60), dim = c(lat = 3, lon = 2, sdate = 10))
+Neff1 <- array(rep(9:10, 3), dim = c(lat = 3, lon = 2))
+
+# dat2
+set.seed(1)
+exp2 <- array(rnorm(10), dim = c(sdate = 10))
+set.seed(2)
+ref2 <- array(rnorm(10), dim = c(sdate = 10))
+set.seed(3)
+obs2 <- array(rnorm(10), dim = c(sdate = 10))
+
+
+
+##############################################
+test_that("1. Input checks", {
+  # exp and obs (1)
+  expect_error(
+  ResidualCorr(c()),
+  'Parameter "exp" must be a numeric array.'
+  )
+  expect_error(
+  ResidualCorr(exp1, c('a')),
+  'Parameter "ref" must be a numeric array.'
+  )
+  expect_error(
+  ResidualCorr(exp1, ref1, c()),
+  'Parameter "obs" must be a numeric array.'
+  )
+  # N.eff
+  expect_error(
+  ResidualCorr(exp1, ref1, obs1, N.eff = array(1, dim = c(lat = 2, lon = 2))),
+'If parameter "N.eff" is provided with an array, it must have the same dimensions as "obs" except "time_dim".'
+  )
+  expect_error(
+  ResidualCorr(exp1, ref1, obs1, N.eff = 1:3),
+  'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".'
+  )
+  # time_dim
+  expect_error(
+  ResidualCorr(exp1, ref1, obs1, time_dim = 1),
+  'Parameter "time_dim" must be a character string.'
+  )
+  expect_error(
+  ResidualCorr(exp1, ref1, obs1, time_dim = 'time'),
+  "Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension."
+  )
+  # memb_dim
+  expect_error(
+  ResidualCorr(exp1, ref1, obs1, memb_dim = TRUE),
+  "Parameter 'memb_dim' must be a character string."
+  )
+  expect_error(
+  ResidualCorr(exp1, ref1, obs1, memb_dim = 'member'),
+  "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension."
+  )
+  # exp, ref, and obs (2)
+  expect_error(
+  ResidualCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), obs1, memb_dim = 'memb'),
+  "Parameter 'exp', 'ref', and 'obs' must have same length of all dimensions expect 'memb_dim'."
+  )
+  # method
+  expect_error(
+  ResidualCorr(exp1, ref1, obs1, method = 'asd', memb_dim = 'memb'),
+  'Parameter "method" must be "pearson", "kendall", or "spearman".'
+  )
+  # alpha
+  expect_error(
+  ResidualCorr(exp2, ref2, obs2, alpha = 1),
+  'Parameter "alpha" must be NULL or a number between 0 and 1.'
+  )
+  # handle.na
+  expect_error(
+  ResidualCorr(exp2, ref2, obs2, handle.na = TRUE),
+  'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".'
+  )
+  # ncores
+  expect_error(
+  ResidualCorr(exp2, ref2, obs2, ncores = 1.5),
+  'Parameter "ncores" must be either NULL or a positive integer.'
+  )
+
+})
+
+##############################################
+test_that("2. Output checks: dat1", {
+expect_equal(
+names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')),
+c("res.corr", "p.val")
+)
+expect_equal(
+dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr),
+c(lat = 3, lon = 2)
+)
+expect_equal(
+dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p.val),
+c(lat = 3, lon = 2)
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr),
+c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p),
+c(0.49695468, 0.05446055, 0.25203961, 0.23522967, 0.16960864, 0.10618145),
+tolerance = 0.0001
+)
+expect_equal(
+names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)),
+c("res.corr", "sign")
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$res.corr),
+c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign),
+rep(FALSE, 6)
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$res),
+c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$p),
+c(0.49716394, 0.05446055, 0.26690777, 0.23522967, 0.18671025, 0.10618145),
+tolerance = 0.0001
+)
+
+
+#---------------------------
+exp1[1] <- NA
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr),
+c(NA, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$res.corr),
+c(-0.1082588, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
+tolerance = 0.0001
+)
+expect_error(
+ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'na.fail'),
+"The data contain NAs."
+)
+
+exp1[1,1,1,1:3] <- NA
+ref1[1,1,1,4:8] <- NA
+obs1[1,1,9:10] <- NA
+expect_error(
+ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets'),
+"There is no complete set of forecasts and observations."
+)
+
+
+})
+
+##############################################
+
+test_that("3. Output checks: dat2", {
+expect_equal(
+names(ResidualCorr(exp2, ref2, obs2)),
+c("res.corr", "p.val")
+)
+expect_equal(
+dim(ResidualCorr(exp2, ref2, obs2)$res.corr),
+NULL
+)
+expect_equal(
+dim(ResidualCorr(exp2, ref2, obs2)$p),
+NULL
+)
+expect_equal(
+ResidualCorr(exp2, ref2, obs2)$p,
+0.2209847,
+tolerance = 0.0001
+)
+expect_equal(
+ResidualCorr(exp2, ref2, obs2)$res,
+-0.274962,
+tolerance = 0.0001
+)
+
+})
-- 
GitLab


From c27f3bb6210b04fe3ff545a1010019a631d7312b Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 26 Apr 2022 19:15:11 +0200
Subject: [PATCH 8/9] Add unit tests for method kendall and spearman

---
 tests/testthat/test-ResidualCorr.R | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R
index e1eec1a..c9ac4b9 100644
--- a/tests/testthat/test-ResidualCorr.R
+++ b/tests/testthat/test-ResidualCorr.R
@@ -128,6 +128,28 @@ expect_equal(
 as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign),
 rep(FALSE, 6)
 )
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$res),
+c(0.11111111, 0.42222222, -0.20000000, 0.02222222, 0.20000000, 0.42222222),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$p),
+c(0.3799615, 0.1120891, 0.2897920, 0.4757064, 0.2897920, 0.1120891),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$res),
+c(0.13939394, 0.61212121, -0.27272727, 0.07878788, 0.28484848, 0.47878788),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$p),
+c(0.35046594, 0.02998607, 0.22291917, 0.41435870, 0.21251908, 0.08076146),
+tolerance = 0.0001
+)
+
+
 expect_equal(
 as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$res),
 c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476),
-- 
GitLab


From 090925f232660f25eeb246671adf1459633d0dea Mon Sep 17 00:00:00 2001
From: Carlos Delgado Torres <carlos.delgado@bsc.es>
Date: Wed, 27 Apr 2022 17:13:37 +0200
Subject: [PATCH 9/9] clarified "$sign" output

---
 R/ResidualCorr.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R
index ba00994..dae0cb8 100644
--- a/R/ResidualCorr.R
+++ b/R/ResidualCorr.R
@@ -54,9 +54,9 @@
 #'  the input arrays except "time_dim" (and "memb_dim" if provided).
 #'}
 #'\item{$sign}{
-#'  A logical array of the statistical significance of the residual correlation
-#'  with the same dimensions as the input arrays except "time_dim" (and 
-#'  "memb_dim" if provided). Returned only if "alpha" is a numeric.
+#'  A logical array indicating whether the residual correlation is statistically 
+#'  significant or not with the same dimensions as the input arrays except "time_dim"
+#'  (and "memb_dim" if provided). Returned only if "alpha" is a numeric.
 #'}
 #'\item{$p.val}{
 #'  A numeric array of the p-values with the same dimensions as the input arrays
-- 
GitLab