From c88492d7dcd209b1b193e641f387abfd533ec6c8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 17 Nov 2022 12:33:29 +0100 Subject: [PATCH 1/3] Add 'sign' parameter to Corr --- R/Corr.R | 55 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 67efb09..1cee56d 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -42,8 +42,10 @@ #' of the test Ho: Corr = 0. The default value is TRUE. #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. -#'@param conf.lev A numeric indicating the confidence level for the -#' regression computation. The default value is 0.95. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0. The default value is FALSE. +#'@param alpha A numeric indicating the significance level for the +#' regression computation. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -100,8 +102,8 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', comp_dim = NULL, limits = NULL, method = 'pearson', memb_dim = NULL, memb = TRUE, - pval = TRUE, conf = TRUE, - conf.lev = 0.95, ncores = NULL) { + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -185,9 +187,13 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") } - ## conf.lev - if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") } ## ncores if (!is.null(ncores)) { @@ -259,14 +265,17 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', fun = .Corr, dat_dim = dat_dim, memb_dim = memb_dim, time_dim = time_dim, method = method, - pval = pval, conf = conf, conf.lev = conf.lev, ncores_input = ncores, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores_input = ncores, ncores = ncores) return(res) } -.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', time_dim = 'sdate', method = 'pearson', - conf = TRUE, pval = TRUE, conf.lev = 0.95, ncores_input = NULL) { +.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05, + ncores_input = NULL) { if (is.null(memb_dim)) { if (is.null(dat_dim)) { # exp: [sdate] @@ -372,7 +381,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', # } # } - if (pval | conf) { + if (pval || conf || sign) { if (method == "kendall" | method == "spearman") { if (!is.null(dat_dim) | !is.null(memb_dim)) { tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) @@ -406,13 +415,14 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', #############old################# #This doesn't return error but it's diff from cor.test() when method is spearman and kendall - if (pval) { + if (pval || sign) { t <-sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- p.val <= alpha } ################################### if (conf) { - conf.lower <- (1 - conf.lev) / 2 + conf.lower <- (1 - alpha) / 2 conf.upper <- 1 - conf.lower conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) @@ -433,16 +443,15 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', ################################### - if (pval & conf) { - res <- list(corr = CORR, p.val = p.val, - conf.lower = conflow, conf.upper = confhigh) - } else if (pval & !conf) { - res <- list(corr = CORR, p.val = p.val) - } else if (!pval & conf) { - res <- list(corr = CORR, - conf.lower = conflow, conf.upper = confhigh) - } else { - res <- list(corr = CORR) + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) } return(res) -- GitLab From 13d802653e185c6fa9d3a4b59e406596e36ca0ec Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 17 Nov 2022 12:52:22 +0100 Subject: [PATCH 2/3] Fix computation of confidence level (alpha = 1 - conf.lev) --- R/Corr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Corr.R b/R/Corr.R index 1cee56d..01237a1 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -422,7 +422,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', } ################################### if (conf) { - conf.lower <- (1 - alpha) / 2 + conf.lower <- alpha / 2 conf.upper <- 1 - conf.lower conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) -- GitLab From 956dc25405fc035659ca9d621956e40a28ad2c53 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 17 Nov 2022 17:51:02 +0100 Subject: [PATCH 3/3] Refine doc and add unit tests --- R/Corr.R | 32 ++++++++++++++++++-------------- man/Corr.Rd | 20 ++++++++++++++------ tests/testthat/test-Corr.R | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 64 insertions(+), 23 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 01237a1..c95b103 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -38,14 +38,15 @@ #'@param memb A logical value indicating whether to remain 'memb_dim' dimension #' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when #' 'memb_dim' is not NULL. The default value is TRUE. -#'@param pval A logical value indicating whether to compute or not the p-value +#'@param pval A logical value indicating whether to return or not the p-value #' of the test Ho: Corr = 0. The default value is TRUE. -#'@param conf A logical value indicating whether to retrieve the confidence -#' intervals or not. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. #'@param sign A logical value indicating whether to retrieve the statistical -#' significance of the test Ho: Corr = 0. The default value is FALSE. -#'@param alpha A numeric indicating the significance level for the -#' regression computation. The default value is 0.05. +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -70,6 +71,9 @@ #'\item{$conf.upper}{ #' The upper confidence interval. Only present if \code{conf = TRUE}. #'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} #' #'@examples #'# Case 1: Load sample data as in Load() example: @@ -266,7 +270,6 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', dat_dim = dat_dim, memb_dim = memb_dim, time_dim = time_dim, method = method, pval = pval, conf = conf, sign = sign, alpha = alpha, - ncores_input = ncores, ncores = ncores) return(res) @@ -274,8 +277,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', .Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', time_dim = 'sdate', method = 'pearson', - conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05, - ncores_input = NULL) { + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { if (is.null(memb_dim)) { if (is.null(dat_dim)) { # exp: [sdate] @@ -386,15 +388,15 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (!is.null(dat_dim) | !is.null(memb_dim)) { tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) names(dim(tmp))[1] <- time_dim - eno <- Eno(tmp, time_dim, ncores = ncores_input) + eno <- Eno(tmp, time_dim) } else { tmp <- rank(obs) tmp <- array(tmp) names(dim(tmp)) <- time_dim - eno <- Eno(tmp, time_dim, ncores = ncores_input) + eno <- Eno(tmp, time_dim) } } else if (method == "pearson") { - eno <- Eno(obs, time_dim, ncores = ncores_input) + eno <- Eno(obs, time_dim) } if (is.null(memb_dim)) { @@ -416,16 +418,18 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', #############old################# #This doesn't return error but it's diff from cor.test() when method is spearman and kendall if (pval || sign) { - t <-sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) - if (sign) signif <- p.val <= alpha + if (sign) signif <- !is.na(p.val) & p.val <= alpha } ################################### if (conf) { conf.lower <- alpha / 2 conf.upper <- 1 - conf.lower + suppressWarnings({ conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) } ################################### diff --git a/man/Corr.Rd b/man/Corr.Rd index 4cbd098..10bdf71 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -16,7 +16,8 @@ Corr( memb = TRUE, pval = TRUE, conf = TRUE, - conf.lev = 0.95, + sign = FALSE, + alpha = 0.05, ncores = NULL ) } @@ -52,14 +53,18 @@ member dimension, set NULL. The default value is NULL.} (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when 'memb_dim' is not NULL. The default value is TRUE.} -\item{pval}{A logical value indicating whether to compute or not the p-value +\item{pval}{A logical value indicating whether to return or not the p-value of the test Ho: Corr = 0. The default value is TRUE.} -\item{conf}{A logical value indicating whether to retrieve the confidence -intervals or not. The default value is TRUE.} +\item{conf}{A logical value indicating whether to return or not the confidence +intervals. The default value is TRUE.} -\item{conf.lev}{A numeric indicating the confidence level for the -regression computation. The default value is 0.95.} +\item{sign}{A logical value indicating whether to retrieve the statistical +significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +FALSE.} + +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} @@ -85,6 +90,9 @@ nobs are omitted. exp_memb is the number of member in experiment (i.e., \item{$conf.upper}{ The upper confidence interval. Only present if \code{conf = TRUE}. } +\item{$sign}{ + The statistical significance. Only present if \code{sign = TRUE}. +} } \description{ Calculate the correlation coefficient (Pearson, Kendall or Spearman) for diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R index 6e3c016..ef013ca 100644 --- a/tests/testthat/test-Corr.R +++ b/tests/testthat/test-Corr.R @@ -22,6 +22,11 @@ context("s2dv::Corr tests") obs2 <- array(rnorm(30), dim = c(member = 1, dataset = 1, sdate = 5, lat = 2, lon = 3)) + exp2_2 <- exp2 + exp2_2[1, 1, 1, 1, 1] <- NA + obs2_2 <- obs2 + obs2_2[1, 1, 1:3, 1, 1] <- NA + # dat3: memb_dim = member, obs has multiple memb set.seed(1) exp3 <- array(rnorm(180), dim = c(member = 3, dataset = 2, sdate = 5, @@ -121,8 +126,8 @@ test_that("1. Input checks", { "integers smaller than the length of paramter 'comp_dim'.") ) expect_error( - Corr(exp1, obs1, conf.lev = -1), - "Parameter 'conf.lev' must be a numeric number between 0 and 1." + Corr(exp1, obs1, alpha = -1), + "Parameter 'alpha' must be a numeric number between 0 and 1." ) expect_error( Corr(exp1, obs1, method = 1), @@ -207,7 +212,7 @@ suppressWarnings( ) suppressWarnings( expect_equal( - min(Corr(exp1, obs1, conf.lev = 0.99)$conf.upper, na.rm = TRUE), + min(Corr(exp1, obs1, alpha = 0.01)$conf.upper, na.rm = TRUE), 0.2747904, tolerance = 0.0001 ) @@ -253,6 +258,10 @@ test_that("3. Output checks: dat2", { dim(Corr(exp2, obs2, memb_dim = 'member')$corr), c(nexp = 2, nobs = 1, exp_memb = 3, obs_memb = 1, lat = 2, lon = 3) ) + expect_equal( + dim(Corr(exp2, obs2, memb_dim = 'member')$corr), + dim(Corr(exp2, obs2, memb_dim = 'member')$p) + ) expect_equal( names(Corr(exp2, obs2, memb_dim = 'member')), c("corr", "p.val", "conf.lower", "conf.upper") @@ -269,6 +278,11 @@ test_that("3. Output checks: dat2", { names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE)), c("corr", "conf.lower", "conf.upper") ) + expect_equal( + names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, sign = T)), + c("corr", "conf.lower", "conf.upper", "sign") + ) + expect_equal( mean(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), 0.01645575, @@ -299,6 +313,10 @@ test_that("3. Output checks: dat2", { c(-0.9500121, -0.9547642, -0.9883400, -0.8817478, -0.6879465), tolerance = 0.0001 ) + expect_equal( + which(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = F, sign = T)$sign), + c(3, 6, 12, 17, 23, 34) + ) # ensemble mean expect_equal( @@ -336,7 +354,18 @@ test_that("3. Output checks: dat2", { tolerance = 0.0001 ) + # exp2_2 + expect_equal( + which(is.na(Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr)), + 1:2 + ) + expect_equal( + Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr[-c(1:2)], + Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr[-c(1:2)] + ) + }) + ############################################## test_that("4. Output checks: dat3", { # individual member -- GitLab