From c88492d7dcd209b1b193e641f387abfd533ec6c8 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 17 Nov 2022 12:33:29 +0100
Subject: [PATCH 1/3] Add 'sign' parameter to Corr

---
 R/Corr.R | 55 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/R/Corr.R b/R/Corr.R
index 67efb09..1cee56d 100644
--- a/R/Corr.R
+++ b/R/Corr.R
@@ -42,8 +42,10 @@
 #'  of the test Ho: Corr = 0. The default value is TRUE.
 #'@param conf A logical value indicating whether to retrieve the confidence 
 #'  intervals or not. The default value is TRUE.
-#'@param conf.lev A numeric indicating the confidence level for the 
-#'  regression computation. The default value is 0.95.
+#'@param sign A logical value indicating whether to retrieve the statistical
+#'  significance of the test Ho: Corr = 0. The default value is FALSE.
+#'@param alpha A numeric indicating the significance level for the 
+#'  regression computation. The default value is 0.05.
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
@@ -100,8 +102,8 @@
 Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', 
                  comp_dim = NULL, limits = NULL, method = 'pearson', 
                  memb_dim = NULL, memb = TRUE,
-                 pval = TRUE, conf = TRUE,
-                 conf.lev = 0.95, ncores = NULL) {
+                 pval = TRUE, conf = TRUE, sign = FALSE,
+                 alpha = 0.05, ncores = NULL) {
 
   # Check inputs 
   ## exp and obs (1)
@@ -185,9 +187,13 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
   if (!is.logical(conf) | length(conf) > 1) {
     stop("Parameter 'conf' must be one logical value.")
   }
-  ## conf.lev
-  if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) {
-    stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.")
+  ## sign
+  if (!is.logical(sign) | length(sign) > 1) {
+    stop("Parameter 'sign' must be one logical value.")
+  }
+  ## alpha
+  if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) {
+    stop("Parameter 'alpha' must be a numeric number between 0 and 1.")
   }
   ## ncores
   if (!is.null(ncores)) {
@@ -259,14 +265,17 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
                    fun = .Corr,
                    dat_dim = dat_dim, memb_dim = memb_dim,
                    time_dim = time_dim, method = method,
-                   pval = pval, conf = conf, conf.lev = conf.lev, ncores_input = ncores,
+                   pval = pval, conf = conf, sign = sign, alpha = alpha,
+		   ncores_input = ncores,
                    ncores = ncores)
 
  return(res)
 }
 
-.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', time_dim = 'sdate', method = 'pearson',
-                  conf = TRUE, pval = TRUE, conf.lev = 0.95, ncores_input = NULL) {
+.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member',
+		  time_dim = 'sdate', method = 'pearson',
+                  conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05,
+		  ncores_input = NULL) {
   if (is.null(memb_dim)) {
     if (is.null(dat_dim)) {
       # exp: [sdate]
@@ -372,7 +381,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
 #    }
 #  }
 
-  if (pval | conf) {
+  if (pval || conf || sign) {
     if (method == "kendall" | method == "spearman") {
       if (!is.null(dat_dim) | !is.null(memb_dim)) {
         tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank)  # for memb_dim = NULL, 2; for memb_dim, c(2, 3)
@@ -406,13 +415,14 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
 
 #############old#################
 #This doesn't return error but it's diff from cor.test() when method is spearman and kendall
-  if (pval) {
+  if (pval || sign) {
     t <-sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2)))
     p.val <- pt(t, eno_expand - 2, lower.tail = FALSE)
+    if (sign) signif <- p.val <= alpha
   } 
 ###################################
   if (conf) {
-    conf.lower <- (1 - conf.lev) / 2
+    conf.lower <- (1 - alpha) / 2
     conf.upper <- 1 - conf.lower
     conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3))
     confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3))
@@ -433,16 +443,15 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
 
 ###################################
 
-  if (pval & conf) {
-    res <- list(corr = CORR, p.val = p.val, 
-                conf.lower = conflow, conf.upper = confhigh)
-  } else if (pval & !conf) {
-    res <- list(corr = CORR, p.val = p.val)
-  } else if (!pval & conf) {
-    res <- list(corr = CORR,
-                conf.lower = conflow, conf.upper = confhigh)
-  } else {
-    res <- list(corr = CORR)
+  res <- list(corr = CORR)
+  if (pval) {
+    res <- c(res, list(p.val = p.val))
+  }
+  if (conf) {
+    res <- c(res, list(conf.lower = conflow, conf.upper = confhigh))
+  }
+  if (sign) {
+    res <- c(res, list(sign = signif))
   }
 
   return(res)
-- 
GitLab


From 13d802653e185c6fa9d3a4b59e406596e36ca0ec Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 17 Nov 2022 12:52:22 +0100
Subject: [PATCH 2/3] Fix computation of confidence level (alpha = 1 -
 conf.lev)

---
 R/Corr.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/Corr.R b/R/Corr.R
index 1cee56d..01237a1 100644
--- a/R/Corr.R
+++ b/R/Corr.R
@@ -422,7 +422,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
   } 
 ###################################
   if (conf) {
-    conf.lower <- (1 - alpha) / 2
+    conf.lower <- alpha / 2
     conf.upper <- 1 - conf.lower
     conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3))
     confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3))
-- 
GitLab


From 956dc25405fc035659ca9d621956e40a28ad2c53 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 17 Nov 2022 17:51:02 +0100
Subject: [PATCH 3/3] Refine doc and add unit tests

---
 R/Corr.R                   | 32 ++++++++++++++++++--------------
 man/Corr.Rd                | 20 ++++++++++++++------
 tests/testthat/test-Corr.R | 35 ++++++++++++++++++++++++++++++++---
 3 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/R/Corr.R b/R/Corr.R
index 01237a1..c95b103 100644
--- a/R/Corr.R
+++ b/R/Corr.R
@@ -38,14 +38,15 @@
 #'@param memb A logical value indicating whether to remain 'memb_dim' dimension
 #'  (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when 
 #'  'memb_dim' is not NULL. The default value is TRUE.
-#'@param pval A logical value indicating whether to compute or not the p-value 
+#'@param pval A logical value indicating whether to return or not the p-value 
 #'  of the test Ho: Corr = 0. The default value is TRUE.
-#'@param conf A logical value indicating whether to retrieve the confidence 
-#'  intervals or not. The default value is TRUE.
+#'@param conf A logical value indicating whether to return or not the confidence 
+#'  intervals. The default value is TRUE.
 #'@param sign A logical value indicating whether to retrieve the statistical
-#'  significance of the test Ho: Corr = 0. The default value is FALSE.
-#'@param alpha A numeric indicating the significance level for the 
-#'  regression computation. The default value is 0.05.
+#'  significance of the test Ho: Corr = 0 based on 'alpha'. The default value is
+#'  FALSE.
+#'@param alpha A numeric indicating the significance level for the statistical
+#'  significance test. The default value is 0.05. 
 #'@param ncores An integer indicating the number of cores to use for parallel 
 #'  computation. The default value is NULL.
 #'
@@ -70,6 +71,9 @@
 #'\item{$conf.upper}{
 #'  The upper confidence interval. Only present if \code{conf = TRUE}.
 #'}
+#'\item{$sign}{
+#'  The statistical significance. Only present if \code{sign = TRUE}.
+#'}
 #'
 #'@examples
 #'# Case 1: Load sample data as in Load() example: 
@@ -266,7 +270,6 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
                    dat_dim = dat_dim, memb_dim = memb_dim,
                    time_dim = time_dim, method = method,
                    pval = pval, conf = conf, sign = sign, alpha = alpha,
-		   ncores_input = ncores,
                    ncores = ncores)
 
  return(res)
@@ -274,8 +277,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
 
 .Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member',
 		  time_dim = 'sdate', method = 'pearson',
-                  conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05,
-		  ncores_input = NULL) {
+                  conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) {
   if (is.null(memb_dim)) {
     if (is.null(dat_dim)) {
       # exp: [sdate]
@@ -386,15 +388,15 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
       if (!is.null(dat_dim) | !is.null(memb_dim)) {
         tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank)  # for memb_dim = NULL, 2; for memb_dim, c(2, 3)
         names(dim(tmp))[1] <- time_dim
-        eno <- Eno(tmp, time_dim, ncores = ncores_input)
+        eno <- Eno(tmp, time_dim)
       } else {
         tmp <- rank(obs)
         tmp <- array(tmp)
         names(dim(tmp)) <- time_dim
-        eno <- Eno(tmp, time_dim, ncores = ncores_input)
+        eno <- Eno(tmp, time_dim)
       }
     } else if (method == "pearson") {
-      eno <- Eno(obs, time_dim, ncores = ncores_input)  
+      eno <- Eno(obs, time_dim)  
     }
 
     if (is.null(memb_dim)) {
@@ -416,16 +418,18 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset',
 #############old#################
 #This doesn't return error but it's diff from cor.test() when method is spearman and kendall
   if (pval || sign) {
-    t <-sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2)))
+    t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2)))
     p.val <- pt(t, eno_expand - 2, lower.tail = FALSE)
-    if (sign) signif <- p.val <= alpha
+    if (sign) signif <- !is.na(p.val) & p.val <= alpha
   } 
 ###################################
   if (conf) {
     conf.lower <- alpha / 2
     conf.upper <- 1 - conf.lower
+    suppressWarnings({
     conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3))
     confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3))
+    })
   }
 
 ###################################  
diff --git a/man/Corr.Rd b/man/Corr.Rd
index 4cbd098..10bdf71 100644
--- a/man/Corr.Rd
+++ b/man/Corr.Rd
@@ -16,7 +16,8 @@ Corr(
   memb = TRUE,
   pval = TRUE,
   conf = TRUE,
-  conf.lev = 0.95,
+  sign = FALSE,
+  alpha = 0.05,
   ncores = NULL
 )
 }
@@ -52,14 +53,18 @@ member dimension, set NULL. The default value is NULL.}
 (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when 
 'memb_dim' is not NULL. The default value is TRUE.}
 
-\item{pval}{A logical value indicating whether to compute or not the p-value 
+\item{pval}{A logical value indicating whether to return or not the p-value 
 of the test Ho: Corr = 0. The default value is TRUE.}
 
-\item{conf}{A logical value indicating whether to retrieve the confidence 
-intervals or not. The default value is TRUE.}
+\item{conf}{A logical value indicating whether to return or not the confidence 
+intervals. The default value is TRUE.}
 
-\item{conf.lev}{A numeric indicating the confidence level for the 
-regression computation. The default value is 0.95.}
+\item{sign}{A logical value indicating whether to retrieve the statistical
+significance of the test Ho: Corr = 0 based on 'alpha'. The default value is
+FALSE.}
+
+\item{alpha}{A numeric indicating the significance level for the statistical
+significance test. The default value is 0.05.}
 
 \item{ncores}{An integer indicating the number of cores to use for parallel 
 computation. The default value is NULL.}
@@ -85,6 +90,9 @@ nobs are omitted. exp_memb is the number of member in experiment (i.e.,
 \item{$conf.upper}{
  The upper confidence interval. Only present if \code{conf = TRUE}.
 }
+\item{$sign}{
+ The statistical significance. Only present if \code{sign = TRUE}.
+}
 }
 \description{
 Calculate the correlation coefficient (Pearson, Kendall or Spearman) for 
diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R
index 6e3c016..ef013ca 100644
--- a/tests/testthat/test-Corr.R
+++ b/tests/testthat/test-Corr.R
@@ -22,6 +22,11 @@ context("s2dv::Corr tests")
   obs2 <- array(rnorm(30), dim = c(member = 1, dataset = 1, sdate = 5,
                                     lat = 2, lon = 3))
 
+  exp2_2 <- exp2
+  exp2_2[1, 1, 1, 1, 1] <- NA
+  obs2_2 <- obs2
+  obs2_2[1, 1, 1:3, 1, 1] <- NA
+
   # dat3: memb_dim = member, obs has multiple memb
   set.seed(1)
   exp3 <- array(rnorm(180), dim = c(member = 3, dataset = 2, sdate = 5,
@@ -121,8 +126,8 @@ test_that("1. Input checks", {
                   "integers smaller than the length of paramter 'comp_dim'.")
   )
   expect_error(
-  Corr(exp1, obs1, conf.lev = -1),
-  "Parameter 'conf.lev' must be a numeric number between 0 and 1."
+  Corr(exp1, obs1, alpha = -1),
+  "Parameter 'alpha' must be a numeric number between 0 and 1."
   )
   expect_error(
   Corr(exp1, obs1, method = 1),
@@ -207,7 +212,7 @@ suppressWarnings(
 )
 suppressWarnings(
   expect_equal(
-    min(Corr(exp1, obs1, conf.lev = 0.99)$conf.upper, na.rm = TRUE),
+    min(Corr(exp1, obs1, alpha = 0.01)$conf.upper, na.rm = TRUE),
     0.2747904,
     tolerance = 0.0001
   )
@@ -253,6 +258,10 @@ test_that("3. Output checks: dat2", {
     dim(Corr(exp2, obs2, memb_dim = 'member')$corr),
     c(nexp = 2, nobs = 1, exp_memb = 3, obs_memb = 1, lat = 2, lon = 3)
   )
+  expect_equal(
+    dim(Corr(exp2, obs2, memb_dim = 'member')$corr),
+    dim(Corr(exp2, obs2, memb_dim = 'member')$p)
+  )
   expect_equal(
     names(Corr(exp2, obs2, memb_dim = 'member')),
     c("corr", "p.val", "conf.lower", "conf.upper")
@@ -269,6 +278,11 @@ test_that("3. Output checks: dat2", {
     names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE)),
     c("corr", "conf.lower", "conf.upper")
   )
+  expect_equal(
+    names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, sign = T)),
+    c("corr", "conf.lower", "conf.upper", "sign")
+  )
+
   expect_equal(
     mean(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr),
     0.01645575,
@@ -299,6 +313,10 @@ test_that("3. Output checks: dat2", {
     c(-0.9500121, -0.9547642, -0.9883400, -0.8817478, -0.6879465),
     tolerance = 0.0001
   )
+  expect_equal(
+    which(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = F, sign = T)$sign),
+    c(3, 6, 12, 17, 23, 34)
+  )
 
   # ensemble mean
   expect_equal(
@@ -336,7 +354,18 @@ test_that("3. Output checks: dat2", {
     tolerance = 0.0001
   )
 
+  # exp2_2
+  expect_equal(
+    which(is.na(Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr)),
+    1:2
+  )
+  expect_equal(
+  Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr[-c(1:2)],
+  Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr[-c(1:2)]
+  )
+
 })
+
 ##############################################
 test_that("4. Output checks: dat3", {
   # individual member
-- 
GitLab