From d2b5c786c57e4be602d624d1a7cd85cc40486a35 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 2 Jan 2023 15:25:03 +0100 Subject: [PATCH 01/13] Modify alpha, add 'pval' and 'sign' --- R/DiffCorr.R | 82 +++++++++++++++++++++------------- man/DiffCorr.Rd | 14 ++++-- tests/testthat/test-DiffCorr.R | 14 +++--- 3 files changed, 68 insertions(+), 42 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 1e07458..6aaa7df 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -30,8 +30,7 @@ #'@param method A character string indicating the correlation coefficient to be #' computed ("pearson" or "spearman"). The default value is "pearson". #'@param alpha A numeric of the significance level to be used in the statistical -#' significance test. If it is a numeric, "sign" will be returned. If NULL, the -#' p-value will be returned instead. The default value is NULL. +#' significance test (output "sign"). The default value is 0.05. #'@param handle.na A charcater string indicating how to handle missing values. #' If "return.na", NAs will be returned for the cases that contain at least one #' NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time @@ -43,6 +42,11 @@ #' significantly different) or "one-sided" (to assess whether the skill of #' "exp" is significantly higher than that of "ref") following Steiger (1980). #' The default value is "two-sided". +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test Ho: DiffCorr = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test Ho: DiffCorr = 0 based on 'alpha'. The default +#' value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -79,8 +83,9 @@ #'@import multiApply #'@export DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', - memb_dim = NULL, method = 'pearson', alpha = NULL, - handle.na = 'return.na', test.type = "two-sided", ncores = NULL) { + memb_dim = NULL, method = 'pearson', alpha = 0.05, + handle.na = 'return.na', test.type = "two-sided", + pval = TRUE, sign = FALSE, ncores = NULL) { # Check inputs ## exp, ref, and obs (1) @@ -141,11 +146,8 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', "Monte-Carlo simulations that are done in Siegert et al., 2017")) } ## alpha - if (!is.null(alpha)) { - if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | - length(alpha) > 1)) { - stop('Parameter "alpha" must be NULL or a number between 0 and 1.') - } + if (sign & any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop('Parameter "alpha" must be a number between 0 and 1.') } ## handle.na if (!handle.na %in% c('return.na', 'only.complete.triplets', 'na.fail')) { @@ -157,7 +159,14 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', } #NOTE: warning can be removed in the next release .warning("The default significance test has changed after s2dv_1.2.0. The default method is 'two-sided'.") - + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } ## ncores if (!is.null(ncores)) { if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -187,10 +196,12 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', } # output_dims - if (is.null(alpha)) { - output_dims <- list(diff.corr = NULL, p.val = NULL) - } else { - output_dims <- list(diff.corr = NULL, sign = NULL) + output_dims <- list(diff.corr = NULL) + if (pval) { + output_dims <- c(output_dims, list(p.val = NULL)) + } + if (sign) { + output_dims <- c(output_dims, list(sign = NULL)) } # Correlation difference if (is.array(N.eff)) { @@ -201,7 +212,7 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', output_dims = output_dims, fun = .DiffCorr, method = method, alpha = alpha, handle.na = handle.na, - test.type = test.type, ncores = ncores) + test.type = test.type, pval = pval, sign = sign, ncores = ncores) } else { output <- Apply(data = list(exp = exp, obs = obs, ref = ref), target_dims = list(exp = time_dim, obs = time_dim, @@ -209,16 +220,18 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', output_dims = output_dims, N.eff = N.eff, fun = .DiffCorr, method = method, alpha = alpha, handle.na = handle.na, - test.type = test.type, ncores = ncores) + test.type = test.type, pval = pval, sign = sign, ncores = ncores) } return(output) } -.DiffCorr <- function(exp, obs, ref, N.eff = NA, method = 'pearson', alpha = NULL, - handle.na = 'return.na', test.type = 'two.sided') { +.DiffCorr <- function(exp, obs, ref, N.eff = NA, method = 'pearson', alpha = 0.05, + handle.na = 'return.na', test.type = 'two.sided', + pval = TRUE, sign = FALSE) { - .diff.corr <- function(exp, obs, ref, method = 'pearson', N.eff = NA, alpha = NULL, test.type = 'two.sided') { + .diff.corr <- function(exp, obs, ref, method = 'pearson', N.eff = NA, alpha = 0.05, + test.type = 'two.sided', pval = TRUE, sign = FALSE) { # Correlation difference cor.exp <- cor(x = exp, y = obs, method = method) cor.ref <- cor(x = ref, y = obs, method = method) @@ -239,12 +252,14 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', ## H0: the skill of exp is not higher than that of ref ## H1: the skill of exp is higher than that of ref - - p.value <- pt(t, df = N.eff - 3, lower.tail = FALSE) - - if (is.null(alpha)) { + + if (pval | sign) { + p.value <- pt(t, df = N.eff - 3, lower.tail = FALSE) + } + if (pval) { output$p.val <- p.value - } else { + } + if (sign) { output$sign <- ifelse(!is.na(p.value) & p.value <= alpha & output$diff.corr > 0, TRUE, FALSE) } @@ -252,12 +267,14 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', ## H0: the skill difference of exp and ref is zero ## H1: the skill difference of exp and ref is different from zero - - p.value <- pt(abs(t), df = N.eff - 3, lower.tail = FALSE) - if (is.null(alpha)) { + if (pval | sign) { + p.value <- pt(abs(t), df = N.eff - 3, lower.tail = FALSE) + } + if (pval) { output$p.val <- p.value - } else { + } + if (sign) { output$sign <- ifelse(!is.na(p.value) & p.value <= alpha / 2, TRUE, FALSE) } @@ -280,20 +297,21 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', ref <- ref[!nna] output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, - N.eff = N.eff, alpha = alpha, test.type = test.type) + N.eff = N.eff, alpha = alpha, pval = pval, sign = sign, test.type = test.type) } else if (handle.na == 'return.na') { # Data contain NA, return NAs directly without passing to .diff.corr - if (is.null(alpha)) { + if (pval) { output <- list(diff.corr = NA, p.val = NA) - } else { + } + if (sign) { output <- list(diff.corr = NA, sign = NA) } } } else { ## There is no NA output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, - N.eff = N.eff, alpha = alpha, test.type = test.type) + N.eff = N.eff, alpha = alpha, pval = pval, sign = sign, test.type = test.type) } return(output) diff --git a/man/DiffCorr.Rd b/man/DiffCorr.Rd index d127af8..52171e8 100644 --- a/man/DiffCorr.Rd +++ b/man/DiffCorr.Rd @@ -12,9 +12,11 @@ DiffCorr( time_dim = "sdate", memb_dim = NULL, method = "pearson", - alpha = NULL, + alpha = 0.05, handle.na = "return.na", test.type = "two-sided", + pval = TRUE, + sign = FALSE, ncores = NULL ) } @@ -47,8 +49,7 @@ directly to the function.} computed ("pearson" or "spearman"). The default value is "pearson".} \item{alpha}{A numeric of the significance level to be used in the statistical -significance test. If it is a numeric, "sign" will be returned. If NULL, the -p-value will be returned instead. The default value is NULL.} +significance test (output "sign"). The default value is 0.05.} \item{handle.na}{A charcater string indicating how to handle missing values. If "return.na", NAs will be returned for the cases that contain at least one @@ -63,6 +64,13 @@ significantly different) or "one-sided" (to assess whether the skill of "exp" is significantly higher than that of "ref") following Steiger (1980). The default value is "two-sided".} +\item{pval}{A logical value indicating whether to return the p-value of the +significance test Ho: DiffCorr = 0. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to return the statistical +significance of the test Ho: DiffCorr = 0 based on 'alpha'. The default +value is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/tests/testthat/test-DiffCorr.R b/tests/testthat/test-DiffCorr.R index f47ac1b..f7b771b 100644 --- a/tests/testthat/test-DiffCorr.R +++ b/tests/testthat/test-DiffCorr.R @@ -82,8 +82,8 @@ test_that("1. Input checks", { ) # alpha expect_error( - DiffCorr(exp2, obs2, ref2, alpha = 1), - 'Parameter "alpha" must be NULL or a number between 0 and 1.' + DiffCorr(exp2, obs2, ref2, alpha = 1, sign = T), + 'Parameter "alpha" must be a number between 0 and 1.' ) # handle.na expect_error( @@ -130,7 +130,7 @@ c(0.26166060, 0.15899774, 0.39264452, 0.27959883, 0.34736305, 0.07479832), tolerance = 0.0001 ) expect_equal( -names(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)), +names(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, sign = T, pval = F)), c("diff.corr", "sign") ) expect_equal( @@ -143,11 +143,11 @@ as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, test.type as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, test.type = "two-sided")$diff.corr) ) expect_equal( -as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, sign = T)$sign), rep(FALSE, 6) ) expect_equal( -as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, test.type = "one-sided")$sign), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, sign = T, test.type = "one-sided")$sign), rep(FALSE, 6) ) expect_equal( @@ -228,11 +228,11 @@ DiffCorr(exp2, obs2, ref2, test.type = 'one-sided')$p, tolerance = 0.0001 ) expect_equal( -DiffCorr(exp2, obs2, ref2, test.type = 'one-sided', alpha = 0.7)$sign, +DiffCorr(exp2, obs2, ref2, test.type = 'one-sided', alpha = 0.7, sign = T)$sign, FALSE ) expect_equal( -DiffCorr(exp2, obs2, ref2, test.type = 'two-sided', alpha = 0.7)$sign, +DiffCorr(exp2, obs2, ref2, test.type = 'two-sided', alpha = 0.7, sign = T)$sign, TRUE ) expect_equal( -- GitLab From 9a7c19fc0e48d3d83f0c27fdadbd79d07fac7813 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 3 Jan 2023 09:39:55 +0100 Subject: [PATCH 02/13] Add param 'sign' and 'pval'; modify 'alpha' --- R/DiffCorr.R | 11 ++-- R/ResidualCorr.R | 84 ++++++++++++++++++------------ man/DiffCorr.Rd | 6 +-- man/ResidualCorr.Rd | 20 ++++--- tests/testthat/test-ResidualCorr.R | 8 +-- 5 files changed, 79 insertions(+), 50 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 6aaa7df..4c6ffa0 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -58,12 +58,12 @@ #'\item{$sign}{ #' A logical array of the statistical significance of the correlation #' differences with the same dimensions as the input arrays except "time_dim" -#' (and "memb_dim" if provided). Returned only if "alpha" is a numeric. +#' (and "memb_dim" if provided). Returned only if "sign" is TRUE. #'} #'\item{$p.val}{ #' A numeric array of the p-values with the same dimensions as the input arrays -#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is -#' NULL. +#' except "time_dim" (and "memb_dim" if provided). Returned only if "pval" is +#' TRUE. #'} #' #'@references @@ -301,11 +301,12 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', } else if (handle.na == 'return.na') { # Data contain NA, return NAs directly without passing to .diff.corr + output <- list(diff.corr = NA) if (pval) { - output <- list(diff.corr = NA, p.val = NA) + output <- c(output, list(p.val = NA)) } if (sign) { - output <- list(diff.corr = NA, sign = NA) + output <- c(output, list(sign = NA)) } } diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index 8d9f040..18ca539 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -37,14 +37,18 @@ #' computed ("pearson", "kendall", or "spearman"). The default value is #' "pearson". #'@param alpha A numeric of the significance level to be used in the statistical -#' significance test. If it is a numeric, "sign" will be returned. If NULL, the -#' p-value will be returned instead. The default value is NULL. +#' significance test (output "sign"). The default value is 0.05. #'@param handle.na A charcater string indicating how to handle missing values. #' If "return.na", NAs will be returned for the cases that contain at least one #' NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time #' steps with no missing values in all "exp", "ref", and "obs" will be used. If #' "na.fail", an error will arise if any of "exp", "ref", or "obs" contains any #' NA. The default value is "return.na". +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test Ho: DiffCorr = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test Ho: DiffCorr = 0 based on 'alpha'. The default +#' value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -56,12 +60,12 @@ #'\item{$sign}{ #' A logical array indicating whether the residual correlation is statistically #' significant or not with the same dimensions as the input arrays except "time_dim" -#' (and "memb_dim" if provided). Returned only if "alpha" is a numeric. +#' (and "memb_dim" if provided). Returned only if "sign" is TRUE. #'} #'\item{$p.val}{ #' A numeric array of the p-values with the same dimensions as the input arrays -#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is -#' NULL. +#' except "time_dim" (and "memb_dim" if provided). Returned only if "pval" is +#' TRUE. #'} #' #'@examples @@ -73,8 +77,8 @@ #'@import multiApply #'@export ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', - memb_dim = NULL, method = 'pearson', alpha = NULL, - handle.na = 'return.na', ncores = NULL) { + memb_dim = NULL, method = 'pearson', alpha = 0.05, + handle.na = 'return.na', pval = TRUE, sign = FALSE, ncores = NULL) { # Check inputs ## exp, ref, and obs (1) @@ -132,16 +136,21 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', stop('Parameter "method" must be "pearson", "kendall", or "spearman".') } ## alpha - if (!is.null(alpha)) { - if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | - length(alpha) > 1)) { - stop('Parameter "alpha" must be NULL or a number between 0 and 1.') - } + if (sign & any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop('Parameter "alpha" must be a number between 0 and 1.') } ## handle.na if (!handle.na %in% c('return.na', 'only.complete.triplets', 'na.fail')) { stop('Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".') } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } ## ncores if (!is.null(ncores)) { if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -169,14 +178,15 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) } - + # output_dims - if (is.null(alpha)) { - output_dims <- list(res.corr = NULL, p.val = NULL) - } else { - output_dims <- list(res.corr = NULL, sign = NULL) + output_dims <- list(res.corr = NULL) + if (pval) { + output_dims <- c(output_dims, list(p.val = NULL)) } - + if (sign) { + output_dims <- c(output_dims, list(sign = NULL)) + } # Residual correlation if (is.array(N.eff)) { @@ -186,22 +196,26 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', ref = time_dim, N.eff = NULL), output_dims = output_dims, fun = .ResidualCorr, method = method, - alpha = alpha, handle.na = handle.na, ncores = ncores) + alpha = alpha, handle.na = handle.na, pval = pval, sign = sign, + ncores = ncores) } else { output <- Apply(data = list(exp = exp, obs = obs, ref = ref), target_dims = list(exp = time_dim, obs = time_dim, ref = time_dim), output_dims = output_dims, N.eff = N.eff, fun = .ResidualCorr, method = method, - alpha = alpha, handle.na = handle.na, ncores = ncores) + alpha = alpha, handle.na = handle.na, pval = pval, sign = sign, + ncores = ncores) } return(output) } -.ResidualCorr <- function(exp, obs, ref, N.eff, method, alpha, handle.na) { +.ResidualCorr <- function(exp, obs, ref, N.eff = NA, method = 'pearson', alpha = 0.05, + handle.na = 'return.na', pval = TRUE, sign = FALSE) { # exp and ref and obs: [time] - .residual.corr <- function(exp, obs, ref, method, N.eff, alpha) { + .residual.corr <- function(exp, obs, ref, N.eff = NA, method = 'pearson', alpha = 0.05, + pval = TRUE, sign = FALSE) { # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs') exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals @@ -217,9 +231,13 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', } t <- abs(output$res.corr) * sqrt(N.eff - 2) / sqrt(1 - output$res.corr^2) - if (is.null(alpha)) { # p-value - output$p.val <- pt(q = t, df = N.eff - 2, lower.tail = FALSE) - } else { + if (pval | sign) { # p-value + p.value <- pt(q = t, df = N.eff - 2, lower.tail = FALSE) + } + if (pval) { + output$p.val <- p.value + } + if (sign) { t_alpha2_n2 <- qt(p = alpha / 2, df = N.eff - 2, lower.tail = FALSE) if (!anyNA(c(t, t_alpha2_n2)) & t >= t_alpha2_n2) { output$sign <- TRUE @@ -244,20 +262,22 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', ref <- ref[!nna] output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, - N.eff = N.eff, alpha = alpha) + N.eff = N.eff, alpha = alpha, pval = pval, sign = sign) } else if (handle.na == 'return.na') { - # Data contain NA, return NAs directly without passing to .diff.corr - if (is.null(alpha)) { - output <- list(res.corr = NA, p.val = NA) - } else { - output <- list(res.corr = NA, sign = NA) + # Data contain NA, return NAs directly without passing to .residual.corr + output <- list(res.corr = NA) + if (pval) { + output <- c(output, list(p.val = NA)) + } + if (sign) { + output <- c(output, list(sign = NA)) } } } else { ## There is no NA output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, - N.eff = N.eff, alpha = alpha) + N.eff = N.eff, alpha = alpha, pval = pval, sign = sign) } return(output) diff --git a/man/DiffCorr.Rd b/man/DiffCorr.Rd index 52171e8..44bd52b 100644 --- a/man/DiffCorr.Rd +++ b/man/DiffCorr.Rd @@ -83,12 +83,12 @@ A list with: \item{$sign}{ A logical array of the statistical significance of the correlation differences with the same dimensions as the input arrays except "time_dim" - (and "memb_dim" if provided). Returned only if "alpha" is a numeric. + (and "memb_dim" if provided). Returned only if "sign" is TRUE. } \item{$p.val}{ A numeric array of the p-values with the same dimensions as the input arrays - except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is - NULL. + except "time_dim" (and "memb_dim" if provided). Returned only if "pval" is + TRUE. } } \description{ diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd index fe7dd10..ad40f63 100644 --- a/man/ResidualCorr.Rd +++ b/man/ResidualCorr.Rd @@ -12,8 +12,10 @@ ResidualCorr( time_dim = "sdate", memb_dim = NULL, method = "pearson", - alpha = NULL, + alpha = 0.05, handle.na = "return.na", + pval = TRUE, + sign = FALSE, ncores = NULL ) } @@ -47,8 +49,7 @@ computed ("pearson", "kendall", or "spearman"). The default value is "pearson".} \item{alpha}{A numeric of the significance level to be used in the statistical -significance test. If it is a numeric, "sign" will be returned. If NULL, the -p-value will be returned instead. The default value is NULL.} +significance test (output "sign"). The default value is 0.05.} \item{handle.na}{A charcater string indicating how to handle missing values. If "return.na", NAs will be returned for the cases that contain at least one @@ -57,6 +58,13 @@ steps with no missing values in all "exp", "ref", and "obs" will be used. If "na.fail", an error will arise if any of "exp", "ref", or "obs" contains any NA. The default value is "return.na".} +\item{pval}{A logical value indicating whether to return the p-value of the +significance test Ho: DiffCorr = 0. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to return the statistical +significance of the test Ho: DiffCorr = 0 based on 'alpha'. The default +value is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -69,12 +77,12 @@ A list with: \item{$sign}{ A logical array indicating whether the residual correlation is statistically significant or not with the same dimensions as the input arrays except "time_dim" - (and "memb_dim" if provided). Returned only if "alpha" is a numeric. + (and "memb_dim" if provided). Returned only if "sign" is TRUE. } \item{$p.val}{ A numeric array of the p-values with the same dimensions as the input arrays - except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is - NULL. + except "time_dim" (and "memb_dim" if provided). Returned only if "pval" is + TRUE. } } \description{ diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R index be71b47..61c677e 100644 --- a/tests/testthat/test-ResidualCorr.R +++ b/tests/testthat/test-ResidualCorr.R @@ -75,8 +75,8 @@ test_that("1. Input checks", { ) # alpha expect_error( - ResidualCorr(exp2, obs2, ref2, alpha = 1), - 'Parameter "alpha" must be NULL or a number between 0 and 1.' + ResidualCorr(exp2, obs2, ref2, alpha = 1, sign = T), + 'Parameter "alpha" must be a number between 0 and 1.' ) # handle.na expect_error( @@ -116,7 +116,7 @@ c(0.49695468, 0.05446055, 0.25203961, 0.23522967, 0.16960864, 0.10618145), tolerance = 0.0001 ) expect_equal( -names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)), +names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, sign = T, pval = F)), c("res.corr", "sign") ) expect_equal( @@ -125,7 +125,7 @@ c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476) tolerance = 0.0001 ) expect_equal( -as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign), +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05, sign = T)$sign), rep(FALSE, 6) ) expect_equal( -- GitLab From 7034991fd39a4c2c49e09bd9c3bf94bd25c00505 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 3 Jan 2023 14:03:41 +0100 Subject: [PATCH 03/13] Change conf.lev to alpha; add sign --- R/ACC.R | 155 +++++++++++++++++++------------------- man/ACC.Rd | 34 +++++---- tests/testthat/test-ACC.R | 14 ++-- 3 files changed, 104 insertions(+), 99 deletions(-) diff --git a/R/ACC.R b/R/ACC.R index 64036f4..9da53aa 100644 --- a/R/ACC.R +++ b/R/ACC.R @@ -42,6 +42,13 @@ #'@param lonlatbox A numeric vector of 4 indicating the corners of the domain of #' interested: c(lonmin, lonmax, latmin, latmax). The default value is NULL #' and the whole data will be used. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param pval A logical value indicating whether to compute the p-value or not. +#' The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. #'@param conftype A charater string of "parametric" or "bootstrap". @@ -53,10 +60,6 @@ #' make sure that your experiment and observation always have the same number #' of members. "bootstrap" requires 'memb_dim' has value. The default value is #' 'parametric'. -#'@param conf.lev A numeric indicating the confidence level for the -#' regression computation. The default value is 0.95. -#'@param pval A logical value indicating whether to compute the p-value or not. -#' The default value is TRUE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -68,6 +71,12 @@ #' exp), and nobs is the number of observation (i.e., dat_dim in obs). If #' dat_dim is NULL, nexp and nobs are omitted. #'} +#'\item{macc}{ +#' The mean anomaly correlation coefficient with dimensions +#' c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and +#' avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp +#' and nobs are omitted. +#'} #'\item{conf.lower (if conftype = "parametric") or acc_conf.lower (if #' conftype = "bootstrap")}{ #' The lower confidence interval of ACC with the same dimensions as ACC. Only @@ -82,11 +91,8 @@ #' The p-value with the same dimensions as ACC. Only present if #' \code{pval = TRUE} and code{conftype = "parametric"}. #'} -#'\item{macc}{ -#' The mean anomaly correlation coefficient with dimensions -#' c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and -#' avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp -#' and nobs are omitted. +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. #'} #'\item{macc_conf.lower}{ #' The lower confidence interval of MACC with the same dimensions as MACC. @@ -134,8 +140,8 @@ #'@export ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', space_dim = c('lat', 'lon'), avg_dim = 'sdate', memb_dim = 'member', - lat = NULL, lon = NULL, lonlatbox = NULL, - conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE, + lat = NULL, lon = NULL, lonlatbox = NULL, alpha = 0.05, + pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric", ncores = NULL) { # Check inputs @@ -234,6 +240,18 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', } else { select_lonlat <- FALSE } + ## alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } ## conf if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") @@ -246,15 +264,6 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', if (conftype == 'bootstrap' & is.null(memb_dim)) { stop("Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'.") } - ## conf.lev - if (!is.numeric(conf.lev) | any(conf.lev < 0) | any(conf.lev > 1) | - length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") - } - } - ## pval - if (!is.logical(pval) | length(pval) > 1) { - stop("Parameter 'pval' must be one logical value.") } ## ncores if (!is.null(ncores)) { @@ -329,8 +338,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', fun = .ACC, dat_dim = dat_dim, avg_dim = avg_dim, lat = lat, - conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, - ncores = ncores) + conftype = conftype, pval = pval, conf = conf, alpha = alpha, + sign = sign, ncores = ncores) # If bootstrap, calculate confidence level if (conftype == 'bootstrap') { @@ -346,8 +355,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', fun = .ACC_bootstrap, dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim, lat = lat, - conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, - ncores = ncores) + conftype = conftype, pval = pval, conf = conf, alpha = alpha, + sign = sign, ncores = ncores) #NOTE: pval? res <- list(acc = res$acc, acc_conf.lower = res_conf$acc_conf.lower, @@ -360,8 +369,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', return(res) } -.ACC <- function(exp, obs, dat_dim = 'dataset', avg_dim = 'sdate', lat, - conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE) { +.ACC <- function(exp, obs, dat_dim = 'dataset', avg_dim = 'sdate', lat, alpha = 0.05, + pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") { # .ACC() should use all the spatial points to calculate ACC. It returns [nexp, nobs]. # If dat_dim = NULL, it returns a number. @@ -384,6 +393,7 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', if (is.null(avg_dim)) { acc <- array(dim = c(nexp = nexp, nobs = nobs)) if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs)) + if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs)) if (conf) { conf.upper <- array(dim = c(nexp = nexp, nobs = nobs)) conf.lower <- array(dim = c(nexp = nexp, nobs = nobs)) @@ -394,6 +404,7 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', names(dim(acc))[3] <- avg_dim macc <- array(dim = c(nexp = nexp, nobs = nobs)) if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) + if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) if (conf) { conf.upper <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) conf.lower <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) @@ -452,19 +463,21 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', # handle bottom = 0 if (is.infinite(acc[iexp, iobs])) acc[iexp, iobs] <- NA - # pval and conf - if (pval | conf) { + # pval, sign, and conf + if (pval | conf | sign) { if (conftype == "parametric") { # calculate effective sample size eno <- .Eno(as.vector(obs_sub), na.action = na.pass) - if (pval) { - t <- qt(conf.lev, eno - 2) # a number - p.val[iexp, iobs] <- sqrt(t^2 / (t^2 + eno - 2)) + if (pval | sign) { + t <- qt(1 - alpha, eno - 2) # a number + p.value <- sqrt(t^2 / (t^2 + eno - 2)) + if (pval) p.val[iexp, iobs] <- p.value + if (sign) signif[iexp, iobs] <- !is.na(p.value) & p.value <= alpha } if (conf) { - conf.upper[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3)) - conf.lower[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm((1 - conf.lev) / 2) / sqrt(eno - 3)) + conf.upper[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(1 - alpha / 2) / sqrt(eno - 3)) + conf.lower[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(alpha / 2) / sqrt(eno - 3)) } } } @@ -491,8 +504,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', if (is.infinite(acc[iexp, iobs, i])) acc[iexp, iobs, i] <- NA } - # pval and conf - if (pval | conf) { + # pval, sign, and conf + if (pval | sign | conf) { if (conftype == "parametric") { # calculate effective sample size along lat_dim and lon_dim # combine lat_dim and lon_dim into one dim first @@ -500,15 +513,17 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', dim = c(space = prod(dim(obs_sub)[1:2]), dim(obs_sub)[3])) eno <- apply(obs_tmp, 2, .Eno, na.action = na.pass) # a vector of avg_dim - if (pval) { - t <- qt(conf.lev, eno - 2) # a vector of avg_dim - p.val[iexp, iobs, ] <- sqrt(t^2 / (t^2 + eno - 2)) + if (pval | sign) { + t <- qt(1 - alpha, eno - 2) # a vector of avg_dim + p.value <- sqrt(t^2 / (t^2 + eno - 2)) + if (pval) p.val[iexp, iobs, ] <- p.value + if (sign) signif[iexp, iobs, ] <- !is.na(p.value) & p.value <= alpha } if (conf) { conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + - qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3)) + qnorm(1 - alpha / 2) / sqrt(eno - 3)) conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + - qnorm((1 - conf.lev) / 2) / sqrt(eno - 3)) + qnorm(alpha / 2) / sqrt(eno - 3)) } } } @@ -527,9 +542,9 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', conf.lower <- as.vector(conf.lower) conf.upper <- as.vector(conf.upper) } - if (pval) { - p.val <- as.vector(p.val) - } + if (pval) p.val <- as.vector(p.val) + if (sign) signif <- as.vector(signif) + } else { dim(acc) <- dim(acc)[3:length(dim(acc))] macc <- as.vector(macc) @@ -537,46 +552,28 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', dim(conf.lower) <- dim(conf.lower)[3:length(dim(conf.lower))] dim(conf.upper) <- dim(conf.upper)[3:length(dim(conf.upper))] } - if (pval) { - dim(p.val) <- dim(p.val)[3:length(dim(p.val))] - } + if (pval) dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + if (sign) dim(signif) <- dim(signif)[3:length(dim(signif))] } } # Return output if (is.null(avg_dim)) { - if (conf & pval) { - return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, - p.val = p.val)) - } else if (conf & !pval) { - return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, - macc = macc)) - } else if (!conf & pval) { - return(list(acc = acc, p.val = p.val)) - } else { - return(list(acc = acc)) - } + output <- list(acc = acc) } else { - if (conf & pval) { - return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, - p.val = p.val, macc = macc)) - } else if (conf & !pval) { - return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, - macc = macc)) - } else if (!conf & pval) { - return(list(acc = acc, p.val = p.val, macc = macc)) - } else { - return(list(acc = acc, macc = macc)) - } + output <- list(acc = acc, macc = macc) } + if (conf) output <- c(output, list(conf.lower = conf.lower, conf.upper = conf.upper)) + if (pval) output <- c(output, list(p.val = p.val)) + if (sign) output <- c(output, list(sign = signif)) + return(output) } .ACC_bootstrap <- function(exp, obs, dat_dim = 'dataset', - avg_dim = 'sdate', memb_dim = NULL, lat, - conf = TRUE, conftype = "parametric", conf.lev = 0.95, - pval = TRUE) { + avg_dim = 'sdate', memb_dim = NULL, lat, alpha = 0.05, + pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") { # if (is.null(avg_dim)) # exp: [memb_exp, dat_exp, lat, lon] # obs: [memb_obs, dat_obs, lat, lon] @@ -633,8 +630,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', } #calculate the ACC of the randomized field - tmpACC <- .ACC(drawexp, drawobs, conf = FALSE, pval = FALSE, avg_dim = avg_dim, - lat = lat) + tmpACC <- .ACC(drawexp, drawobs, conf = FALSE, pval = FALSE, sign = FALSE, + avg_dim = avg_dim, lat = lat) if (is.null(avg_dim)) { acc_draw[, , jdraw] <- tmpACC$acc } else { @@ -647,24 +644,24 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', if (is.null(avg_dim)) { acc_conf.upper <- apply(acc_draw, c(1, 2), function (x) { - quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)}) + quantile(x, 1 - alpha / 2, na.rm = TRUE)}) acc_conf.lower <- apply(acc_draw, c(1, 2), function (x) { - quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)}) + quantile(x, alpha / 2, na.rm = TRUE)}) } else { acc_conf.upper <- apply(acc_draw, c(1, 2, 3), function (x) { - quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)}) + quantile(x, 1 - alpha / 2, na.rm = TRUE)}) acc_conf.lower <- apply(acc_draw, c(1, 2, 3), function (x) { - quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)}) + quantile(x, alpha / 2, na.rm = TRUE)}) macc_conf.upper <- apply(macc_draw, c(1, 2), function (x) { - quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)}) + quantile(x, 1 - alpha / 2, na.rm = TRUE)}) macc_conf.lower <- apply(macc_draw, c(1, 2), function (x) { - quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)}) + quantile(x, alpha / 2, na.rm = TRUE)}) } # Return output diff --git a/man/ACC.Rd b/man/ACC.Rd index 7df6abb..522843a 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -16,10 +16,11 @@ ACC( lat = NULL, lon = NULL, lonlatbox = NULL, + alpha = 0.05, + pval = TRUE, + sign = FALSE, conf = TRUE, conftype = "parametric", - conf.lev = 0.95, - pval = TRUE, ncores = NULL ) } @@ -67,6 +68,16 @@ NULL.} interested: c(lonmin, lonmax, latmin, latmax). The default value is NULL and the whole data will be used.} +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} + +\item{pval}{A logical value indicating whether to compute the p-value or not. +The default value is TRUE.} + +\item{sign}{A logical value indicating whether to retrieve the statistical +significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +FALSE.} + \item{conf}{A logical value indicating whether to retrieve the confidence intervals or not. The default value is TRUE.} @@ -80,12 +91,6 @@ make sure that your experiment and observation always have the same number of members. "bootstrap" requires 'memb_dim' has value. The default value is 'parametric'.} -\item{conf.lev}{A numeric indicating the confidence level for the -regression computation. The default value is 0.95.} - -\item{pval}{A logical value indicating whether to compute the p-value or not. -The default value is TRUE.} - \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -97,6 +102,12 @@ A list containing the numeric arrays:\cr exp), and nobs is the number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. } +\item{macc}{ + The mean anomaly correlation coefficient with dimensions + c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and + avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp + and nobs are omitted. +} \item{conf.lower (if conftype = "parametric") or acc_conf.lower (if conftype = "bootstrap")}{ The lower confidence interval of ACC with the same dimensions as ACC. Only @@ -111,11 +122,8 @@ A list containing the numeric arrays:\cr The p-value with the same dimensions as ACC. Only present if \code{pval = TRUE} and code{conftype = "parametric"}. } -\item{macc}{ - The mean anomaly correlation coefficient with dimensions - c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and - avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp - and nobs are omitted. +\item{$sign}{ + The statistical significance. Only present if \code{sign = TRUE}. } \item{macc_conf.lower}{ The lower confidence interval of MACC with the same dimensions as MACC. diff --git a/tests/testthat/test-ACC.R b/tests/testthat/test-ACC.R index 6431a9c..9753922 100644 --- a/tests/testthat/test-ACC.R +++ b/tests/testthat/test-ACC.R @@ -131,10 +131,10 @@ test_that("1. Input checks", { ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), memb_dim = NULL, conftype = 'bootstrap'), "Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'." ) - # conf.lev + # alpha expect_error( - ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), conf.lev = -1), - "Parameter 'conf.lev' must be a numeric number between 0 and 1." + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), alpha = -1), + "Parameter 'alpha' must be a numeric number between 0 and 1." ) # pval expect_error( @@ -166,7 +166,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( names(ACC(exp1, obs1, lat = lat1)), - c("acc", "conf.lower", "conf.upper", "p.val", "macc") + c("acc", "macc", "conf.lower", "conf.upper", "p.val") ) expect_equal( as.vector(ACC(exp1, obs1, lat = lat1)$acc), @@ -198,18 +198,18 @@ test_that("2. Output checks: dat1", { ) expect_equal( names(ACC(exp1, obs1, lat = lat1, conf = FALSE)), - c("acc", "p.val", "macc") + c("acc", "macc", "p.val") ) expect_equal( names(ACC(exp1, obs1, lat = lat1, pval = FALSE)), - c("acc", "conf.lower", "conf.upper", "macc") + c("acc", "macc", "conf.lower", "conf.upper") ) expect_equal( names(ACC(exp1, obs1, lat = lat1, conf = FALSE, pval = FALSE)), c("acc", "macc") ) expect_equal( - as.vector(ACC(exp1, obs1, lat = lat1, conf = FALSE, avg_dim = NULL, conf.lev = 0.9)$p.val), + as.vector(ACC(exp1, obs1, lat = lat1, conf = FALSE, avg_dim = NULL, alpha = 0.1)$p.val), c(0.6083998, 0.6083998, 0.6083998, 0.6083998, 0.6083998), tolerance = 0.00001 ) -- GitLab From 0b8e38b864d0fdc0dd2af28cfcf83f54caa31892 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 3 Jan 2023 14:49:05 +0100 Subject: [PATCH 04/13] Change conf.lev to alpha; add sign. Wait for Trend sig bug to merge --- R/Trend.R | 87 +++++++++++++++++++------------------ man/Trend.Rd | 19 +++++--- tests/testthat/test-Trend.R | 8 ++-- 3 files changed, 61 insertions(+), 53 deletions(-) diff --git a/R/Trend.R b/R/Trend.R index 1f714a6..a901082 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -15,12 +15,14 @@ #' points along 'time_dim' dimension. The default value is 1. #'@param polydeg A positive integer indicating the degree of polynomial #' regression. The default value is 1. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. -#'@param conf.lev A numeric indicating the confidence level for the -#' regression computation. The default value is 0.95. #'@param pval A logical value indicating whether to compute the p-value or not. #' The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance based on 'alpha'. The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -37,7 +39,7 @@ #' A numeric array with the first dimension 'stats', followed by the same #' dimensions as parameter 'data' except the 'time_dim' dimension. The length #' of the 'stats' dimension should be \code{polydeg + 1}, containing the -#' lower limit of the \code{conf.lev}\% confidence interval for all the +#' lower limit of the \code{(1-alpha)}\% confidence interval for all the #' regression coefficients with the same order as \code{$trend}. Only present #' \code{conf = TRUE}. #'} @@ -45,13 +47,16 @@ #' A numeric array with the first dimension 'stats', followed by the same #' dimensions as parameter 'data' except the 'time_dim' dimension. The length #' of the 'stats' dimension should be \code{polydeg + 1}, containing the -#' upper limit of the \code{conf.lev}\% confidence interval for all the +#' upper limit of the \code{(1-alpha)}\% confidence interval for all the #' regression coefficients with the same order as \code{$trend}. Only present #' \code{conf = TRUE}. #'} #'\item{$p.val}{ #' The p-value calculated by anova(). Only present if \code{pval = TRUE}. #'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} #'\item{$detrended}{ #' A numeric array with the same dimensions as paramter 'data', containing the #' detrended values along the 'time_dim' dimension. @@ -67,8 +72,8 @@ #'@import multiApply #'@importFrom stats anova #'@export -Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, - conf = TRUE, conf.lev = 0.95, pval = TRUE, ncores = NULL) { +Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0.05, + conf = TRUE, pval = TRUE, sign = FALSE, ncores = NULL) { # Check inputs ## data @@ -101,18 +106,22 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, length(polydeg) > 1) { stop("Parameter 'polydeg' must be a positive integer.") } + ## alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } ## conf if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") } - ## conf.lev - if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") - } ## pval if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores)) { @@ -124,32 +133,27 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, ############################### # Calculate Trend - if (conf & pval) { - output_dims <- list(trend = 'stats', conf.lower = 'stats', - conf.upper = 'stats', p.val = 'stats', detrended = time_dim) - } else if (conf & !pval) { - output_dims <- list(trend = 'stats', conf.lower = 'stats', - conf.upper = 'stats', detrended = time_dim) - } else if (!conf & pval) { - output_dims <- list(trend = 'stats', p.val = 'stats', detrended = time_dim) - } else { - output_dims <- list(trend = 'stats', detrended = time_dim) - } + + ## output_dims + output_dims <- list(trend = 'stats') + if (conf) output_dims <- c(output_dims, list(conf.lower = 'stats', conf.upper = 'stats')) + if (pval) output_dims <- c(output_dims, list(p.val = 'stats')) + output_dims <- c(output_dims, list(detrended = time_dim)) output <- Apply(list(data), target_dims = time_dim, fun = .Trend, output_dims = output_dims, interval = interval, - polydeg = polydeg, conf = conf, - conf.lev = conf.lev, pval = pval, + polydeg = polydeg, alpha = alpha, conf = conf, + pval = pval, sign = sign, ncores = ncores) return(invisible(output)) } -.Trend <- function(x, interval = 1, polydeg = 1, - conf = TRUE, conf.lev = 0.95, pval = TRUE) { +.Trend <- function(x, interval = 1, polydeg = 1, alpha = 0.05, + conf = TRUE, pval = TRUE, sign = FALSE) { # x: [ftime] mon <- seq(x) * interval @@ -166,12 +170,14 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, trend <- lm.out$coefficients #intercept, slope1, slope2,... if (conf) { - conf.lower <- confint(lm.out, level = conf.lev)[, 1] - conf.upper <- confint(lm.out, level = conf.lev)[, 2] + conf.lower <- confint(lm.out, level = (1 - alpha))[, 1] + conf.upper <- confint(lm.out, level = (1 - alpha))[, 2] } - if (pval) { - p.val <- as.array(stats::anova(lm.out)$'Pr(>F)'[1]) + if (pval | sign) { + p.value <- as.array(stats::anova(lm.out)$'Pr(>F)'[1]) + if (pval) p.val <- p.value + if (sign) signif <- !is.na(p.value) & p.value <= alpha } detrended <- c() @@ -187,21 +193,16 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, conf.upper <- rep(NA, polydeg + 1) } - if (pval) { - p.val <- rep(NA, polydeg + 1) - } + if (pval) p.val <- rep(NA, polydeg + 1) + if (sign) signif <- rep(FALSE, polydeg + 1) } - if (conf & pval) { - return(list(trend = trend, conf.lower = conf.lower, conf.upper = conf.upper, - p.val = p.val, detrended = detrended)) - } else if (conf & !pval) { - return(list(trend = trend, conf.lower = conf.lower, conf.upper = conf.upper, - detrended = detrended)) - } else if (!conf & pval) { - return(list(trend = trend, p.val = p.val, detrended = detrended)) - } else { - return(list(trend = trend, detrended = detrended)) - } + output <- list(trend = trend) + if (conf) output <- c(output, list(conf.lower = conf.lower, conf.upper = conf.upper)) + if (pval) output <- c(output, list(p.val = p.val)) + if (sign) output <- c(output, list(sign = signif)) + output <- c(output, list(detrended = detrended)) + + return(output) } diff --git a/man/Trend.Rd b/man/Trend.Rd index d283ee6..9400205 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -9,9 +9,10 @@ Trend( time_dim = "ftime", interval = 1, polydeg = 1, + alpha = 0.05, conf = TRUE, - conf.lev = 0.95, pval = TRUE, + sign = FALSE, ncores = NULL ) } @@ -28,15 +29,18 @@ points along 'time_dim' dimension. The default value is 1.} \item{polydeg}{A positive integer indicating the degree of polynomial regression. The default value is 1.} +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} + \item{conf}{A logical value indicating whether to retrieve the confidence intervals or not. The default value is TRUE.} -\item{conf.lev}{A numeric indicating the confidence level for the -regression computation. The default value is 0.95.} - \item{pval}{A logical value indicating whether to compute the p-value or not. The default value is TRUE.} +\item{sign}{A logical value indicating whether to retrieve the statistical +significance based on 'alpha'. The default value is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -53,7 +57,7 @@ A list containing: A numeric array with the first dimension 'stats', followed by the same dimensions as parameter 'data' except the 'time_dim' dimension. The length of the 'stats' dimension should be \code{polydeg + 1}, containing the - lower limit of the \code{conf.lev}\% confidence interval for all the + lower limit of the \code{(1-alpha)}\% confidence interval for all the regression coefficients with the same order as \code{$trend}. Only present \code{conf = TRUE}. } @@ -61,13 +65,16 @@ A list containing: A numeric array with the first dimension 'stats', followed by the same dimensions as parameter 'data' except the 'time_dim' dimension. The length of the 'stats' dimension should be \code{polydeg + 1}, containing the - upper limit of the \code{conf.lev}\% confidence interval for all the + upper limit of the \code{(1-alpha)}\% confidence interval for all the regression coefficients with the same order as \code{$trend}. Only present \code{conf = TRUE}. } \item{$p.val}{ The p-value calculated by anova(). Only present if \code{pval = TRUE}. } +\item{$sign}{ + The statistical significance. Only present if \code{sign = TRUE}. +} \item{$detrended}{ A numeric array with the same dimensions as paramter 'data', containing the detrended values along the 'time_dim' dimension. diff --git a/tests/testthat/test-Trend.R b/tests/testthat/test-Trend.R index 49a9982..a86f122 100644 --- a/tests/testthat/test-Trend.R +++ b/tests/testthat/test-Trend.R @@ -65,12 +65,12 @@ test_that("1. Input checks", { "Parameter 'conf' must be one logical value." ) expect_error( - Trend(dat1, conf.lev = 3), - "Parameter 'conf.lev' must be a numeric number between 0 and 1." + Trend(dat1, alpha = 3), + "Parameter 'alpha' must be a numeric number between 0 and 1." ) expect_error( - Trend(dat1, conf.lev = TRUE), - "Parameter 'conf.lev' must be a numeric number between 0 and 1." + Trend(dat1, alpha = TRUE), + "Parameter 'alpha' must be a numeric number between 0 and 1." ) expect_error( Trend(dat1, pval = 0.95), -- GitLab From a8094b5161c7f88ec134dc40e6cd96cd564bcb70 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Jan 2023 10:56:18 +0100 Subject: [PATCH 05/13] Update doc --- man/clim.palette.Rd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man/clim.palette.Rd b/man/clim.palette.Rd index 5d17947..94c9055 100644 --- a/man/clim.palette.Rd +++ b/man/clim.palette.Rd @@ -12,8 +12,9 @@ clim.colors(n, palette = "bluered") \arguments{ \item{palette}{Which type of palette to generate: from blue through white to red ('bluered'), from red through white to blue ('redblue'), from -yellow through orange to red ('yellowred'), or from red through orange -to red ('redyellow').} +yellow through orange to red ('yellowred'), from red through orange to +red ('redyellow'), from purple through white to orange ('purpleorange'), +and from orange through white to purple ('orangepurple').} \item{n}{Number of colors to generate.} } -- GitLab From 644d3aa94fc102dc34f5d80b8df7c2ee0b5b9bde Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 Jan 2023 10:56:36 +0100 Subject: [PATCH 06/13] Add unit test for sign; correct output --- R/Trend.R | 2 ++ tests/testthat/test-Trend.R | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/R/Trend.R b/R/Trend.R index 63f71d5..e10fe19 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -140,6 +140,8 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0 output_dims <- list(trend = 'stats') if (conf) output_dims <- c(output_dims, list(conf.lower = 'stats', conf.upper = 'stats')) if (pval) output_dims <- c(output_dims, list(p.val = 'stats')) + if (sign) output_dims <- c(output_dims, list(sign = 'stats')) + output_dims <- c(output_dims, list(detrended = time_dim)) output <- Apply(list(data), diff --git a/tests/testthat/test-Trend.R b/tests/testthat/test-Trend.R index aedcbac..b47a202 100644 --- a/tests/testthat/test-Trend.R +++ b/tests/testthat/test-Trend.R @@ -88,7 +88,14 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { - + expect_equal( + names(Trend(dat1)), + c('trend', 'conf.lower', 'conf.upper', 'p.val', 'detrended') + ) + expect_equal( + names(Trend(dat1, conf = F, sign = T)), + c('trend', 'p.val', 'sign', 'detrended') + ) expect_equal( Trend(dat1)$trend, array(c(-9.7692308, 0.6593407, 0.9615385, 0.7967033), @@ -107,6 +114,10 @@ test_that("2. Output checks: dat1", { dim = c(stats = 1, dat = 1, sdate = 2)), tolerance = 0.0001 ) + expect_equal( + Trend(dat1, sign = T)$sign, + array(c(T, T), dim = c(stats = 1, dat = 1, sdate = 2)) + ) expect_equal( median(Trend(dat1)$detrended, na.rm = TRUE), 0.1153846, -- GitLab From a7817a68340ef873b5815535e3937a346b2b202a Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 3 Apr 2023 12:52:27 +0200 Subject: [PATCH 07/13] Add pval and sign to RMS.R --- R/RMS.R | 112 +++++++++++++++++++++++++------------- R/RMSSS.R | 19 +++---- man/RMS.Rd | 37 +++++++++---- man/RMSSS.Rd | 17 +++--- tests/testthat/test-RMS.R | 56 +++++++++++++------ 5 files changed, 155 insertions(+), 86 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index 645e34b..8a1899a 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -20,17 +20,23 @@ #' 'dat_dim' will be 1. #'@param time_dim A character string indicating the name of dimension along #' which the correlations are computed. The default value is 'sdate'. -#'@param dat_dim A character string indicating the name of member (nobs/nexp) -#' dimension. The default value is 'dataset'. +#'@param dat_dim A character string indicating the name of dataset or member +#' (nobs/nexp) dimension. The datasets of exp and obs will be paired and +#' computed RMS for each pair. The default value is NULL. #'@param comp_dim A character string indicating the name of dimension along which #' obs is taken into account only if it is complete. The default value #' is NULL. #'@param limits A vector of two integers indicating the range along comp_dim to #' be completed. The default value is c(1, length(comp_dim dimension)). +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. -#'@param conf.lev A numeric indicating the confidence level for the -#' regression computation. The default value is 0.95. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -38,37 +44,43 @@ #'A list containing the numeric arrays with dimension:\cr #' c(nexp, nobs, all other dimensions of exp except time_dim).\cr #'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -#'number of observation (i.e., dat_dim in obs).\cr +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr #'\item{$rms}{ #' The root mean square error. #'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} #'\item{$conf.lower}{ #' The lower confidence interval. Only present if \code{conf = TRUE}. #'} #'\item{$conf.upper}{ #' The upper confidence interval. Only present if \code{conf = TRUE}. #'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} #' #'@examples #'# Load sample data as in Load() example: #' set.seed(1) -#' exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) #' set.seed(2) -#' obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) +#' obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) #' set.seed(2) #' na <- floor(runif(10, min = 1, max = 80)) #' obs1[na] <- NA -#' res <- RMS(exp1, obs1, comp_dim = 'ftime') +#' res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_time = 'dat') #' # Renew example when Ano and Smoothing are ready #' -#'@rdname RMS #'@import multiApply #'@importFrom ClimProjDiags Subset #'@importFrom stats qchisq #'@export -RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - comp_dim = NULL, limits = NULL, - conf = TRUE, conf.lev = 0.95, ncores = NULL) { +RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + comp_dim = NULL, limits = NULL, pval = TRUE, conf = TRUE, + sign = FALSE, alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) if (is.null(exp) | is.null(obs)) { @@ -79,13 +91,13 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', } if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector if (length(exp) == length(obs)) { - exp <- array(exp, dim = c(length(exp), 1)) - names(dim(exp)) <- c(time_dim, dat_dim) - obs <- array(obs, dim = c(length(obs), 1)) - names(dim(obs)) <- c(time_dim, dat_dim) + exp <- array(exp, dim = c(length(exp))) + names(dim(exp)) <- c(time_dim) + obs <- array(obs, dim = c(length(obs))) + names(dim(obs)) <- c(time_dim) } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) } } else if (is.null(dim(exp)) | is.null(dim(obs))) { stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", @@ -136,13 +148,21 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', "integers smaller than the length of paramter 'comp_dim'.")) } } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } ## conf if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") } - ## conf.lev - if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") } ## ncores if (!is.null(ncores)) { @@ -195,21 +215,21 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', c(time_dim, dat_dim)), fun = .RMS, time_dim = time_dim, dat_dim = dat_dim, - conf = conf, conf.lev = conf.lev, ncores_input = ncores, + pval = pval, conf = conf, sign = sign, alpha = alpha, ncores = ncores) return(res) } -.RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - conf = TRUE, conf.lev = 0.95, ncores_input = NULL) { +.RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + pval = TRUE, conf = TRUE, sign = FALSE, alpha = 0.05) { if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] nexp <- 1 nobs <- 1 ini_dims <- dim(exp) - dim(exp) <- c(ini_dims, dat_dim = 1) - dim(obs) <- c(ini_dims, dat_dim = 1) + dim(exp) <- c(ini_dims, dat = 1) + dim(obs) <- c(ini_dims, dat = 1) } else { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] @@ -218,10 +238,9 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', } dif <- array(dim = c(dim(exp)[1], nexp = nexp, nobs = nobs)) - chi <- array(dim = c(nexp = nexp, nobs = nobs)) if (conf) { - conflow <- (1 - conf.lev) / 2 + conflow <- alpha / 2 confhigh <- 1 - conflow conf.lower <- array(dim = c(nexp = nexp, nobs = nobs)) conf.upper <- array(dim = c(nexp = nexp, nobs = nobs)) @@ -232,11 +251,17 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) } - rms <- apply(dif^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(_exp, nobs)) + rms <- colMeans(dif^2, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) - if (conf) { - #eno <- Eno(dif, 1) #count effective sample along sdate. dim = c(nexp, nobs) - eno <- Eno(dif, time_dim, ncores = ncores_input) #change to this line when Eno() is done + if (conf | pval | sign) { + #count effective sample along sdate. eno: c(nexp, nobs) +# eno <- Eno(dif, time_dim) # slower than for loop below? + eno <- array(dim = c(nexp = nexp, nobs = nobs)) + for (n_obs in 1:nobs) { + for (n_exp in 1:nexp) { + eno[n_exp, n_obs] <- .Eno(dif[, n_exp, n_obs], na.action = na.pass) + } + } # conf.lower chi <- sapply(1:nobs, function(i) { @@ -251,6 +276,15 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', conf.upper <- (eno * rms ** 2 / chi) ** 0.5 } + if (pval | sign) { + chi <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nobs) { + chi[, i] <- sapply(1:nexp, function(x) {sum((exp[, x] - obs[, i])^2 / exp[, x])}) + } + p_val <- pchisq(chi, eno - 1, lower.tail = FALSE) + if (sign) signif <- p_val <= alpha + } + ################################### # Remove nexp and nobs if dat_dim = NULL if (is.null(dat_dim)) { @@ -259,16 +293,16 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', dim(conf.lower) <- NULL dim(conf.upper) <- NULL } + if (pval) dim(p_val) <- NULL + if (sign) dim(signif) <- NULL } ################################### - - if (conf) { - res <- list(rms = rms, conf.lower = conf.lower, conf.upper = conf.upper) - } else { - res <- list(rms = rms) - } + res <- list(rms = rms) + if (pval) res <- c(res, list(p.val = p_val)) + if (sign) res <- c(res, list(sign = signif)) + if (conf) res <- c(res, list(conf.lower = conf.lower, conf.upper = conf.upper)) return(res) -} \ No newline at end of file +} diff --git a/R/RMSSS.R b/R/RMSSS.R index b8b3cc0..d81b81d 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -14,15 +14,12 @@ #'Fisher test or Random Walk test.\cr #' #'@param exp A named numeric array of experimental data which contains at least -#' two dimensions for dat_dim and time_dim. It can also be a vector with the -#' same length as 'obs', then the vector will automatically be 'time_dim' and -#' 'dat_dim' will be 1. +#' time dimension (time_dim). It can also be a vector with the same length as +#' 'obs', then the vector will automatically be 'time_dim'. #'@param obs A named numeric array of observational data which contains at least -#' two dimensions for dat_dim and time_dim. The dimensions should be the same -#' as paramter 'exp' except the length of 'dat_dim' dimension. The order of -#' dimension can be different. It can also be a vector with the same length as -#' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will -#' be 1. +#' time dimension (time_dim). The dimensions should be the same as parameter +#' 'exp' except the length of 'dat_dim' dimension. It can also be a vector with +#' the same length as 'exp', then the vector will automatically be 'time_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time dimension, or 0 (typical climatological forecast) or 1 #' (normalized climatological forecast). If it is an array, the dimensions must @@ -33,7 +30,7 @@ #' climatological forecast is used as reference forecast (equivelant to 0.) #' The default value is NULL. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. The default value is 'dataset'. +#' dimension. The default value is NULL. #'@param time_dim A character string indicating the name of dimension along #' which the RMSSS are computed. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension @@ -80,7 +77,7 @@ #'@import multiApply #'@importFrom stats pf #'@export -RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', +RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', ncores = NULL) { @@ -297,7 +294,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', return(res) } -.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, +.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { # exp: [sdate, (dat)] # obs: [sdate, (dat)] diff --git a/man/RMS.Rd b/man/RMS.Rd index 4391df4..f5e46f6 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -8,11 +8,13 @@ RMS( exp, obs, time_dim = "sdate", - dat_dim = "dataset", + dat_dim = NULL, comp_dim = NULL, limits = NULL, + pval = TRUE, conf = TRUE, - conf.lev = 0.95, + sign = FALSE, + alpha = 0.05, ncores = NULL ) } @@ -30,8 +32,9 @@ length as 'exp', then the vector will automatically be 'time_dim' and \item{time_dim}{A character string indicating the name of dimension along which the correlations are computed. The default value is 'sdate'.} -\item{dat_dim}{A character string indicating the name of member (nobs/nexp) -dimension. The default value is 'dataset'.} +\item{dat_dim}{A character string indicating the name of dataset or member +(nobs/nexp) dimension. The datasets of exp and obs will be paired and +computed RMS for each pair. The default value is NULL.} \item{comp_dim}{A character string indicating the name of dimension along which obs is taken into account only if it is complete. The default value @@ -40,11 +43,18 @@ is NULL.} \item{limits}{A vector of two integers indicating the range along comp_dim to be completed. The default value is c(1, length(comp_dim dimension)).} +\item{pval}{A logical value indicating whether to return or not the p-value +of the test Ho: Corr = 0. The default value is TRUE.} + \item{conf}{A logical value indicating whether to retrieve the confidence intervals or not. The default value is TRUE.} -\item{conf.lev}{A numeric indicating the confidence level for the -regression computation. The default value is 0.95.} +\item{sign}{A logical value indicating whether to retrieve the statistical +significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +FALSE.} + +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} @@ -53,16 +63,23 @@ computation. The default value is NULL.} A list containing the numeric arrays with dimension:\cr c(nexp, nobs, all other dimensions of exp except time_dim).\cr nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -number of observation (i.e., dat_dim in obs).\cr +number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +nobs are omitted.\cr \item{$rms}{ The root mean square error. } +\item{$p.val}{ + The p-value. Only present if \code{pval = TRUE}. +} \item{$conf.lower}{ The lower confidence interval. Only present if \code{conf = TRUE}. } \item{$conf.upper}{ The upper confidence interval. Only present if \code{conf = TRUE}. } +\item{$sign}{ + The statistical significance. Only present if \code{sign = TRUE}. +} } \description{ Compute the root mean square error for an array of forecasts and an array of @@ -78,13 +95,13 @@ The confidence interval is computed by the chi2 distribution.\cr \examples{ # Load sample data as in Load() example: set.seed(1) - exp1 <- array(rnorm(120), dim = c(dataset = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) + exp1 <- array(rnorm(120), dim = c(dat = 3, sdate = 5, ftime = 2, lon = 1, lat = 4)) set.seed(2) - obs1 <- array(rnorm(80), dim = c(dataset = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) + obs1 <- array(rnorm(80), dim = c(dat = 2, sdate = 5, ftime = 2, lon = 1, lat = 4)) set.seed(2) na <- floor(runif(10, min = 1, max = 80)) obs1[na] <- NA - res <- RMS(exp1, obs1, comp_dim = 'ftime') + res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_time = 'dat') # Renew example when Ano and Smoothing are ready } diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index bcf221c..c647c71 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -9,7 +9,7 @@ RMSSS( obs, ref = NULL, time_dim = "sdate", - dat_dim = "dataset", + dat_dim = NULL, memb_dim = NULL, pval = TRUE, sign = FALSE, @@ -20,16 +20,13 @@ RMSSS( } \arguments{ \item{exp}{A named numeric array of experimental data which contains at least -two dimensions for dat_dim and time_dim. It can also be a vector with the -same length as 'obs', then the vector will automatically be 'time_dim' and -'dat_dim' will be 1.} +time dimension (time_dim). It can also be a vector with the same length as +'obs', then the vector will automatically be 'time_dim'.} \item{obs}{A named numeric array of observational data which contains at least -two dimensions for dat_dim and time_dim. The dimensions should be the same -as paramter 'exp' except the length of 'dat_dim' dimension. The order of -dimension can be different. It can also be a vector with the same length as -'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will -be 1.} +time dimension (time_dim). The dimensions should be the same as parameter +'exp' except the length of 'dat_dim' dimension. It can also be a vector with +the same length as 'exp', then the vector will automatically be 'time_dim'.} \item{ref}{A named numerical array of the reference forecast data with at least time dimension, or 0 (typical climatological forecast) or 1 @@ -45,7 +42,7 @@ The default value is NULL.} which the RMSSS are computed. The default value is 'sdate'.} \item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) -dimension. The default value is 'dataset'.} +dimension. The default value is NULL.} \item{memb_dim}{A character string indicating the name of the member dimension to compute the ensemble mean; it should be set to NULL if the parameter 'exp' diff --git a/tests/testthat/test-RMS.R b/tests/testthat/test-RMS.R index bf059ef..d45d59c 100644 --- a/tests/testthat/test-RMS.R +++ b/tests/testthat/test-RMS.R @@ -88,8 +88,8 @@ test_that("1. Input checks", { "integers smaller than the length of paramter 'comp_dim'.") ) expect_error( - RMS(exp1, obs1, conf.lev = -1), - "Parameter 'conf.lev' must be a numeric number between 0 and 1." + RMS(exp1, obs1, alpha = -1), + "Parameter 'alpha' must be a numeric number between 0 and 1." ) expect_error( RMS(exp1, obs1, conf = 1), @@ -106,7 +106,7 @@ test_that("1. Input checks", { ) expect_error( RMS(exp = array(1:5, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2))), + obs = array(1:2, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), "The length of time_dim must be at least 2 to compute RMS." ) @@ -118,65 +118,65 @@ test_that("1. Input checks", { test_that("2. Output checks: dat1", { suppressWarnings( expect_equal( - dim(RMS(exp1, obs1)$rms), + dim(RMS(exp1, obs1, dat_dim = 'dataset')$rms), c(nexp = 3, nobs = 2, ftime = 2, lon = 1, lat = 4) ) ) suppressWarnings( expect_equal( - RMS(exp1, obs1)$rms[1:6], + RMS(exp1, obs1, dat_dim = 'dataset')$rms[1:6], c(1.2815677, 2.0832803, 1.1894637, 1.3000403, 1.4053807, 0.8157563), tolerance = 0.001 ) ) suppressWarnings( expect_equal( - length(which(is.na(RMS(exp1, obs1)$conf.lower))), + length(which(is.na(RMS(exp1, obs1, dat_dim = 'dataset')$conf.lower))), 4 ) ) suppressWarnings( expect_equal( - max(RMS(exp1, obs1)$conf.lower, na.rm = T), + max(RMS(exp1, obs1, dat_dim = 'dataset')$conf.lower, na.rm = T), 1.399509, tolerance = 0.001 ) ) suppressWarnings( expect_equal( - length(which(is.na(RMS(exp1, obs1, comp_dim = 'ftime')$rms))), + length(which(is.na(RMS(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime')$rms))), 0 ) ) suppressWarnings( expect_equal( - length(which(is.na(RMS(exp1, obs1, comp_dim = 'ftime')$conf.upper))), + length(which(is.na(RMS(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime')$conf.upper))), 8 ) ) suppressWarnings( expect_equal( - length(which(is.na(RMS(exp1, obs1, comp_dim = 'lat')$conf.lower))), + length(which(is.na(RMS(exp1, obs1, dat_dim = 'dataset', comp_dim = 'lat')$conf.lower))), 36 ) ) suppressWarnings( expect_equal( - length(which(is.na(RMS(exp1, obs1, comp_dim = 'lat', limits = c(1, 2))$conf.lower))), + length(which(is.na(RMS(exp1, obs1, dat_dim = 'dataset', comp_dim = 'lat', limits = c(1, 2))$conf.lower))), 21 ) ) suppressWarnings( expect_equal( - min(RMS(exp1, obs1, conf.lev = 0.99)$conf.upper, na.rm = TRUE), + min(RMS(exp1, obs1, dat_dim = 'dataset', alpha = 0.01)$conf.upper, na.rm = TRUE), 1.406368, tolerance = 0.0001 ) ) suppressWarnings( expect_equal( - length(RMS(exp1, obs1, conf = FALSE)), - 1 + length(RMS(exp1, obs1, dat_dim = 'dataset', conf = FALSE)), + 2 ) ) @@ -188,7 +188,7 @@ test_that("3. Output checks: dat2", { expect_equal( dim(RMS(exp2, obs2)$rms), - c(nexp = 1, nobs = 1) + NULL ) expect_equal( @@ -219,6 +219,30 @@ test_that("4. Output checks: dat3", { dim(RMS(exp4, obs4, time_dim = 'sdates', dat_dim = NULL, conf = TRUE)$rms), c(ftimes = 2, lon = 1, lat = 1) ) + expect_equal( + length(RMS(exp3, obs3, dat_dim = NULL, sign = T, conf = F)), + 3 + ) + expect_equal( + c(RMS(exp3, obs3, dat_dim = NULL, sign = T, conf = F)$sign[1,1,]), + c(FALSE, FALSE, TRUE, FALSE) + ) + expect_equal( + c(RMS(exp3, obs3, dat_dim = NULL, sign = T, conf = F)$p.val[1,1,]), + c(1, 0.8498872, 4.686846e-06, 1), + tolerance = 0.0001 + ) + expect_equal( + c(RMS(exp3, obs3, dat_dim = NULL, pval = F)$conf.lower[1,1,]), + c(1.1024490, 0.5533838, 1.4531443, 0.3606632), + tolerance = 0.0001 + ) + expect_equal( + c(RMS(exp3, obs3, dat_dim = NULL, pval = F)$conf.upper[1,1,]), + c(5.287554, 2.654133, 6.969554, 1.729809), + tolerance = 0.0001 + ) + }) -############################################## \ No newline at end of file +############################################## -- GitLab From b10b2e7d892dc5b477907c7175dc499e0798c5b1 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 3 Apr 2023 13:23:15 +0200 Subject: [PATCH 08/13] Add sign; change conf.lev to alpha --- R/Regression.R | 83 +++++++++++++++----------------- man/Regression.Rd | 16 ++++-- tests/testthat/test-Regression.R | 14 ++++-- 3 files changed, 63 insertions(+), 50 deletions(-) diff --git a/R/Regression.R b/R/Regression.R index 1cd12e6..535f179 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -19,8 +19,10 @@ #' or not. The default value is TRUE. #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. -#'@param conf.lev A numeric indicating the confidence level for the -#' regression computation. The default value is 0.95. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test The default value is FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. #'@param na.action A function or an integer. A function (e.g., na.omit, #' na.exclude, na.fail, na.pass) indicates what should happen when the data #' contain NAs. A numeric indicates the maximum number of NA position (it @@ -60,6 +62,10 @@ #' A numeric array with same dimensions as parameter 'daty' and 'datax' except #' the 'reg_dim' dimension, The array contains the p-value. #'} +#'\item{sign}{ +#' A logical array of the statistical significance of the regression with the +#' same dimensions as $regression. Only present if \code{sign = TRUE}. +#'} #'\item{$filtered}{ #' A numeric array with the same dimension as paramter 'datay' and 'datax', #' the filtered datay from the regression onto datax along the 'reg_dim' @@ -74,13 +80,13 @@ #'datax <- sampleData$obs[, 1, , ] #'names(dim(datax)) <- c('sdate', 'ftime') #'res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) -#'res2 <- Regression(datay, datax, conf.lev = 0.9) +#'res2 <- Regression(datay, datax, alpha = 0.1) #' #'@importFrom stats lm na.omit confint #'@import multiApply #'@export Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, - pval = TRUE, conf = TRUE, conf.lev = 0.95, + pval = TRUE, conf = TRUE, sign = FALSE, alpha = 0.05, na.action = na.omit, ncores = NULL) { # Check inputs @@ -134,9 +140,13 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") } - ##conf.lev - if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") } ## na.action if (!is.function(na.action) & !is.numeric(na.action)) { @@ -169,33 +179,27 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, ############################### # Calculate Regression - if (conf & pval) { - output_dims <- list(regression = 'stats', conf.lower = 'stats', - conf.upper = 'stats', p.val = NULL, filtered = reg_dim) - } else if (conf & !pval) { - output_dims <- list(regression = 'stats', conf.lower = 'stats', - conf.upper = 'stats', filtered = reg_dim) - } else if (!conf & pval) { - output_dims <- list(regression = 'stats', - p.val = NULL, filtered = reg_dim) - } else if (!conf & !pval) { - output_dims <- list(regression = 'stats', filtered = reg_dim) - } - + + ## output_dims + output_dims <- list(regression = 'stats', filtered = reg_dim) + if (conf) output_dims <- c(output_dims, list(conf.lower = 'stats', conf.upper = 'stats')) + if (pval) output_dims <- c(output_dims, list(p.val = NULL)) + if (sign) output_dims <- c(output_dims, list(sign = NULL)) + res <- Apply(list(datay, datax), target_dims = reg_dim, output_dims = output_dims, fun = .Regression, - formula = formula, pval = pval, conf = conf, - conf.lev = conf.lev, na.action = na.action, + formula = formula, pval = pval, conf = conf, sign = sign, + alpha = alpha, na.action = na.action, ncores = ncores) return(invisible(res)) } -.Regression <- function(y, x, formula = y~x, pval = TRUE, conf = TRUE, - conf.lev = 0.95, na.action = na.omit) { +.Regression <- function(y, x, formula = y~x, pval = TRUE, conf = TRUE, + sign = FALSE, alpha = 0.05, na.action = na.omit) { NApos <- 1:length(x) NApos[which(is.na(x) | is.na(y))] <- NA @@ -211,12 +215,13 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, lm.out <- lm(formula, data = data.frame(x = x, y = y), na.action = na.action) coeff <- lm.out$coefficients if (conf) { - conf.lower <- confint(lm.out, level = conf.lev)[, 1] - conf.upper <- confint(lm.out, level = conf.lev)[, 2] + conf.lower <- confint(lm.out, level = 1 - alpha)[, 1] + conf.upper <- confint(lm.out, level = 1 - alpha)[, 2] } - if (pval) { + if (pval | sign) { f <- summary(lm.out)$fstatistic - p.val <- pf(f[1], f[2], f[3],lower.tail = F) + p.val <- pf(f[1], f[2], f[3], lower.tail = F) + if (sign) signif <- !is.na(p.val) & p.val <= alpha } filtered[!is.na(NApos)] <- y[!is.na(NApos)] - lm.out$fitted.values @@ -228,25 +233,17 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, conf.lower[which(!is.na(conf.lower))] <- NA conf.upper[which(!is.na(conf.upper))] <- NA } - if (pval) { - p.val[which(!is.na(p.val))] <- NA - } + if (pval) p.val[which(!is.na(p.val))] <- NA + if (sign) signif[which(!is.na(signif))] <- NA filtered[which(!is.na(filtered))] <- NA } } - if (conf & pval) { - return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, - p.val = p.val, filtered = filtered)) - } else if (conf & !pval) { - return(list(regression = coeff, conf.lower = conf.lower, conf.upper = conf.upper, - filtered = filtered)) - } else if (!conf & pval) { - return(list(regression = coeff, - p.val = p.val, filtered = filtered)) - } else if (!conf & !pval) { - return(list(regression = coeff, filtered = filtered)) - } + res <- list(regression = coeff, filtered = filtered) + if (conf) res <- c(res, list(conf.lower = conf.lower, conf.upper = conf.upper)) + if (pval) res <- c(res, list(p.val = p.val)) + if (sign) res <- c(res, list(sign = signif)) + return(res) } diff --git a/man/Regression.Rd b/man/Regression.Rd index 8e27295..9ac0c94 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -11,7 +11,8 @@ Regression( formula = y ~ x, pval = TRUE, conf = TRUE, - conf.lev = 0.95, + sign = FALSE, + alpha = 0.05, na.action = na.omit, ncores = NULL ) @@ -34,8 +35,11 @@ or not. The default value is TRUE.} \item{conf}{A logical value indicating whether to retrieve the confidence intervals or not. The default value is TRUE.} -\item{conf.lev}{A numeric indicating the confidence level for the -regression computation. The default value is 0.95.} +\item{sign}{A logical value indicating whether to compute or not the +statistical significance of the test The default value is FALSE.} + +\item{alpha}{A numeric of the significance level to be used in the +statistical significance test. The default value is 0.05.} \item{na.action}{A function or an integer. A function (e.g., na.omit, na.exclude, na.fail, na.pass) indicates what should happen when the data @@ -75,6 +79,10 @@ A list containing: A numeric array with same dimensions as parameter 'daty' and 'datax' except the 'reg_dim' dimension, The array contains the p-value. } +\item{sign}{ + A logical array of the statistical significance of the regression with the + same dimensions as $regression. Only present if \code{sign = TRUE}. +} \item{$filtered}{ A numeric array with the same dimension as paramter 'datay' and 'datax', the filtered datay from the regression onto datax along the 'reg_dim' @@ -98,6 +106,6 @@ names(dim(datay)) <- c('sdate', 'ftime') datax <- sampleData$obs[, 1, , ] names(dim(datax)) <- c('sdate', 'ftime') res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) -res2 <- Regression(datay, datax, conf.lev = 0.9) +res2 <- Regression(datay, datax, alpha = 0.1) } diff --git a/tests/testthat/test-Regression.R b/tests/testthat/test-Regression.R index a29076f..d0898bf 100644 --- a/tests/testthat/test-Regression.R +++ b/tests/testthat/test-Regression.R @@ -83,8 +83,8 @@ test_that("1. Input checks", { "Parameter 'conf' must be one logical value." ) expect_error( - Regression(datay1, datax1, conf.lev = 1.5), - "Parameter 'conf.lev' must be a numeric number between 0 and 1." + Regression(datay1, datax1, alpha = 2), + "Parameter 'alpha' must be a numeric number between 0 and 1." ) expect_error( Regression(datay1, datax1, ncores = T), @@ -127,7 +127,11 @@ test_that("2. Output checks: dat1", { 2 ) expect_equal( - range(Regression(datay1, datax1, conf.lev = 0.99)$conf.low, na.rm = T), + length(Regression(datay1, datax1, pval = F, sign = T)), + 5 + ) + expect_equal( + range(Regression(datay1, datax1, alpha = 0.01)$conf.low, na.rm = T), c(-380.888744, 0.220794), tolerance = 0.001 ) @@ -136,6 +140,10 @@ test_that("2. Output checks: dat1", { 0.005335, tolerance = 0.0001 ) + expect_equal( + c(Regression(datay1, datax1, sign = T, conf = F)$sign[1, 2, ]), + c(TRUE, FALSE, FALSE, FALSE) + ) expect_equal( mean(Regression(datay1, datax1, formula = y~poly(x, 2, raw = T))$p.val, na.rm = TRUE), 0.3407307, -- GitLab From ac5903b0d90ff8fc6d253056d804554a6d070fe6 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 3 Apr 2023 14:22:56 +0200 Subject: [PATCH 09/13] Change conf.lev to alpha --- R/Spread.R | 36 ++++++++++++++++++------------------ man/Spread.Rd | 6 +++--- tests/testthat/test-Spread.R | 6 +++--- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/R/Spread.R b/R/Spread.R index d1d8f6d..09bbe05 100644 --- a/R/Spread.R +++ b/R/Spread.R @@ -16,8 +16,8 @@ #' kept (FALSE) for computation. The default value is TRUE. #'@param conf A logical value indicating whether to compute the confidence #' intervals or not. The default value is TRUE. -#'@param conf.lev A numeric value of the confidence level for the computation. -#' The default value is 0.95. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -81,7 +81,7 @@ #'@importFrom stats IQR sd mad runif quantile #'@export Spread <- function(data, compute_dim = 'member', na.rm = TRUE, - conf = TRUE, conf.lev = 0.95, ncores = NULL) { + conf = TRUE, alpha = 0.05, ncores = NULL) { # Check inputs ## data @@ -113,9 +113,9 @@ Spread <- function(data, compute_dim = 'member', na.rm = TRUE, if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") } - ## conf.lev - if (!is.numeric(conf.lev) | any(conf.lev < 0) | any(conf.lev > 1) | length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + ## alpha + if (any(!is.numeric(alpha) | alpha < 0 | alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") } ## ncores if (!is.null(ncores)) { @@ -134,14 +134,14 @@ Spread <- function(data, compute_dim = 'member', na.rm = TRUE, output_dims = list(iqr = 'stats', maxmin = 'stats', sd = 'stats', mad = 'stats'), na.rm = na.rm, - conf = conf, conf.lev = conf.lev, + conf = conf, alpha = alpha, ncores = ncores) return(output) } .Spread <- function(data, compute_dim = 'member', na.rm = TRUE, - conf = TRUE, conf.lev = 0.95) { + conf = TRUE, alpha = 0.05) { # data: compute_dim. [member] or [member, sdate] for example @@ -159,24 +159,24 @@ Spread <- function(data, compute_dim = 'member', na.rm = TRUE, res_sd <- rep(res_sd, 3) res_mad <- rep(res_mad, 3) - conf_low <- (1 - conf.lev) / 2 + conf_low <- alpha / 2 conf_high <- 1 - conf_low # Create vector for saving bootstrap result - iqr_bs <- c() - maxmin_bs <- c() - sd_bs <- c() - mad_bs <- c() + iqr_bs <- rep(NA, 100) + maxmin_bs <- rep(NA, 100) + sd_bs <- rep(NA, 100) + mad_bs <- rep(NA, 100) # bootstrapping for 100 times num <- length(data) for (jmix in 1:100) { drawings <- round(runif(num, 0.5, num + 0.5)) - iqr_bs <- c(iqr_bs, IQR(data[drawings], na.rm = na.rm)) - maxmin_bs <- c(maxmin_bs, max(data[drawings], na.rm = na.rm) - - min(data[drawings], na.rm = na.rm)) - sd_bs <- c(sd_bs, sd(data[drawings], na.rm = na.rm)) - mad_bs <- c(mad_bs, mad(data[drawings], na.rm = na.rm)) + iqr_bs[jmix] <- IQR(data[drawings], na.rm = na.rm) + maxmin_bs[jmix] <- max(data[drawings], na.rm = na.rm) - + min(data[drawings], na.rm = na.rm) + sd_bs[jmix] <- sd(data[drawings], na.rm = na.rm) + mad_bs[jmix] <- mad(data[drawings], na.rm = na.rm) } # Calculate confidence interval with the bootstrapping results diff --git a/man/Spread.Rd b/man/Spread.Rd index e26bc14..c5fc4d1 100644 --- a/man/Spread.Rd +++ b/man/Spread.Rd @@ -10,7 +10,7 @@ Spread( compute_dim = "member", na.rm = TRUE, conf = TRUE, - conf.lev = 0.95, + alpha = 0.05, ncores = NULL ) } @@ -27,8 +27,8 @@ kept (FALSE) for computation. The default value is TRUE.} \item{conf}{A logical value indicating whether to compute the confidence intervals or not. The default value is TRUE.} -\item{conf.lev}{A numeric value of the confidence level for the computation. -The default value is 0.95.} +\item{alpha}{A numeric of the significance level to be used in the +statistical significance test. The default value is 0.05.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} diff --git a/tests/testthat/test-Spread.R b/tests/testthat/test-Spread.R index 1d299a6..d0d55cd 100644 --- a/tests/testthat/test-Spread.R +++ b/tests/testthat/test-Spread.R @@ -45,10 +45,10 @@ test_that("1. Input checks", { Spread(dat1, conf = 0.1), "Parameter 'conf' must be one logical value." ) - # conf.lev + # alpha expect_error( - Spread(dat1, conf.lev = c(0.05, 0.95)), - "Parameter 'conf.lev' must be a numeric number between 0 and 1." + Spread(dat1, alpha = c(0.05, 0.95)), + "Parameter 'alpha' must be a numeric number between 0 and 1." ) # ncores expect_error( -- GitLab From 91d11c2c8b01f2253a95675ff80ca15b5a0f79fd Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 3 Apr 2023 15:44:49 +0200 Subject: [PATCH 10/13] dat_dim default change to NULL --- R/RMS.R | 23 ++++++++++++----------- R/RMSSS.R | 12 ++++++------ tests/testthat/test-RMSSS.R | 10 +++++----- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index 8a1899a..3f4ae1f 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -262,18 +262,19 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, eno[n_exp, n_obs] <- .Eno(dif[, n_exp, n_obs], na.action = na.pass) } } + if (conf) { + # conf.lower + chi <- sapply(1:nobs, function(i) { + qchisq(confhigh, eno[, i] - 1) + }) + conf.lower <- (eno * rms ** 2 / chi) ** 0.5 - # conf.lower - chi <- sapply(1:nobs, function(i) { - qchisq(confhigh, eno[, i] - 1) - }) - conf.lower <- (eno * rms ** 2 / chi) ** 0.5 - - # conf.upper - chi <- sapply(1:nobs, function(i) { - qchisq(conflow, eno[, i] - 1) - }) - conf.upper <- (eno * rms ** 2 / chi) ** 0.5 + # conf.upper + chi <- sapply(1:nobs, function(i) { + qchisq(conflow, eno[, i] - 1) + }) + conf.upper <- (eno * rms ** 2 / chi) ** 0.5 + } } if (pval | sign) { diff --git a/R/RMSSS.R b/R/RMSSS.R index d81b81d..0124b4a 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -91,10 +91,10 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector if (length(exp) == length(obs)) { - exp <- array(exp, dim = c(length(exp), 1)) - names(dim(exp)) <- c(time_dim, dat_dim) - obs <- array(obs, dim = c(length(obs), 1)) - names(dim(obs)) <- c(time_dim, dat_dim) + exp <- array(exp, dim = c(length(exp))) + names(dim(exp)) <- c(time_dim) + obs <- array(obs, dim = c(length(obs))) + names(dim(obs)) <- c(time_dim) } else { stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) @@ -103,8 +103,8 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", "dimensions time_dim and dat_dim, or vector of same length.")) } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if(!all(names(dim(exp)) %in% names(dim(obs))) | diff --git a/tests/testthat/test-RMSSS.R b/tests/testthat/test-RMSSS.R index fa019e6..5619f46 100644 --- a/tests/testthat/test-RMSSS.R +++ b/tests/testthat/test-RMSSS.R @@ -110,7 +110,7 @@ test_that("1. Input checks", { ) expect_error( RMSSS(exp = array(1:10, dim = c(sdate = 1, dataset = 5, a = 1)), - obs = array(1:4, dim = c(a = 1, sdate = 1, dataset = 2))), + obs = array(1:4, dim = c(a = 1, sdate = 1, dataset = 2)), dat_dim = "dataset"), "The length of time_dim must be more than 2 to compute RMSSS." ) }) @@ -221,16 +221,16 @@ test_that("2. Output checks: case 1", { test_that("3. Output checks: case 2", { expect_equal( - dim(RMSSS(exp2, obs2, time_dim = 'time')$rmsss), + dim(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$rmsss), c(nexp = 2, nobs = 1, dat = 1, lon = 3, lat = 2) ) expect_equal( - mean(RMSSS(exp2, obs2, time_dim = 'time')$rmsss), + mean(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$rmsss), -0.3912208, tolerance = 0.00001 ) expect_equal( - range(RMSSS(exp2, obs2, time_dim = 'time')$p.val), + range(RMSSS(exp2, obs2, time_dim = 'time', dat_dim = "dataset")$p.val), c(0.2627770, 0.9868412), tolerance = 0.00001 ) @@ -243,7 +243,7 @@ test_that("4. Output checks: case 3", { expect_equal( dim(RMSSS(exp3, obs3)$rmsss), - c(nexp = 1, nobs = 1) + NULL ) expect_equal( as.vector(RMSSS(exp3, obs3)$rmsss), -- GitLab From 4ab1b5efab53f7572d12f6f2a3d9742dbe14b3ce Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 3 Apr 2023 15:51:18 +0200 Subject: [PATCH 11/13] Modification for conf.lev --- R/Corr.R | 9 +-------- R/RMSSS.R | 2 +- R/Spectrum.R | 18 +++++++++--------- man/Corr.Rd | 3 --- man/Spectrum.Rd | 6 +++--- tests/testthat/test-Spectrum.R | 6 +++--- 6 files changed, 17 insertions(+), 27 deletions(-) diff --git a/R/Corr.R b/R/Corr.R index 3430647..76a1af4 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -47,7 +47,6 @@ #' FALSE. #'@param alpha A numeric indicating the significance level for the statistical #' significance test. The default value is 0.05. -#'@param conf.lev Deprecated. Use alpha now instead. alpha = 1 - conf.lev. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -108,7 +107,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', comp_dim = NULL, limits = NULL, method = 'pearson', memb_dim = NULL, memb = TRUE, pval = TRUE, conf = TRUE, sign = FALSE, - alpha = 0.05, conf.lev = NULL, ncores = NULL) { + alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -196,12 +195,6 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (!is.logical(sign) | length(sign) > 1) { stop("Parameter 'sign' must be one logical value.") } - ## conf.lev - ##NOTE: remove the parameter and the warning after v1.4.0 - if (!missing("conf.lev")) { - .warning(paste0("Argument 'conf.lev' is deprecated. Please use 'alpha' instead. ", - "'alpha' = ", 1 - conf.lev, " is used."), tag = '! Deprecation: ') - } ## alpha if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { stop("Parameter 'alpha' must be a numeric number between 0 and 1.") diff --git a/R/RMSSS.R b/R/RMSSS.R index 0124b4a..cf45fa6 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -368,7 +368,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, ################################################# # if (conf) { -# conflow <- (1 - conf.lev) / 2 +# conflow <- alpha / 2 # confhigh <- 1 - conflow # conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) # conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) diff --git a/R/Spectrum.R b/R/Spectrum.R index 2cbb167..a75ead6 100644 --- a/R/Spectrum.R +++ b/R/Spectrum.R @@ -15,8 +15,8 @@ #' evenly spaced in time. #'@param time_dim A character string indicating the dimension along which to #' compute the frequency spectrum. The default value is 'ftime'. -#'@param conf.lev A numeric indicating the confidence level for the Monte-Carlo -#' significance test. The default value is 0.95. +#'@param alpha A numeric indicating the significance level for the Monte-Carlo +#' significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -45,7 +45,7 @@ #'@import multiApply #'@importFrom stats spectrum cor rnorm sd quantile #'@export -Spectrum <- function(data, time_dim = 'ftime', conf.lev = 0.95, ncores = NULL) { +Spectrum <- function(data, time_dim = 'ftime', alpha = 0.05, ncores = NULL) { # Check inputs ## data @@ -69,9 +69,9 @@ Spectrum <- function(data, time_dim = 'ftime', conf.lev = 0.95, ncores = NULL) { if (!time_dim %in% names(dim(data))) { stop("Parameter 'time_dim' is not found in 'data' dimension.") } - ## conf.lev - if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { - stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") } ## ncores if (!is.null(ncores)) { @@ -88,13 +88,13 @@ Spectrum <- function(data, time_dim = 'ftime', conf.lev = 0.95, ncores = NULL) { target_dims = time_dim, fun = .Spectrum, output_dims = c(time_dim, 'stats'), - conf.lev = conf.lev, + alpha = alpha, ncores = ncores)$output1 return(output) } -.Spectrum <- function(data, conf.lev = 0.95) { +.Spectrum <- function(data, alpha = 0.05) { # data: [time] data <- data[is.na(data) == FALSE] @@ -119,7 +119,7 @@ Spectrum <- function(data, time_dim = 'ftime', conf.lev = 0.95, ncores = NULL) { store[jt, ] <- toto2$spec } for (jx in 1:length(tmp$spec)) { - output[jx, 3] <- quantile(store[, jx], conf.lev) + output[jx, 3] <- quantile(store[, jx], 1 - alpha) } } else { output <- NA diff --git a/man/Corr.Rd b/man/Corr.Rd index bbb1e34..10bdf71 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -18,7 +18,6 @@ Corr( conf = TRUE, sign = FALSE, alpha = 0.05, - conf.lev = NULL, ncores = NULL ) } @@ -67,8 +66,6 @@ FALSE.} \item{alpha}{A numeric indicating the significance level for the statistical significance test. The default value is 0.05.} -\item{conf.lev}{Deprecated. Use alpha now instead. alpha = 1 - conf.lev.} - \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/man/Spectrum.Rd b/man/Spectrum.Rd index 84b39c0..18671e5 100644 --- a/man/Spectrum.Rd +++ b/man/Spectrum.Rd @@ -4,7 +4,7 @@ \alias{Spectrum} \title{Estimate frequency spectrum} \usage{ -Spectrum(data, time_dim = "ftime", conf.lev = 0.95, ncores = NULL) +Spectrum(data, time_dim = "ftime", alpha = 0.05, ncores = NULL) } \arguments{ \item{data}{A vector or numeric array of which the frequency spectrum is @@ -15,8 +15,8 @@ evenly spaced in time.} \item{time_dim}{A character string indicating the dimension along which to compute the frequency spectrum. The default value is 'ftime'.} -\item{conf.lev}{A numeric indicating the confidence level for the Monte-Carlo -significance test. The default value is 0.95.} +\item{alpha}{A numeric indicating the significance level for the Monte-Carlo +significance test. The default value is 0.05.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} diff --git a/tests/testthat/test-Spectrum.R b/tests/testthat/test-Spectrum.R index caf53d3..9c64259 100644 --- a/tests/testthat/test-Spectrum.R +++ b/tests/testthat/test-Spectrum.R @@ -38,10 +38,10 @@ test_that("1. Input checks", { Spectrum(dat1, time_dim = 2), "Parameter 'time_dim' must be a character string." ) - # conf.lev + # alpha expect_error( - Spectrum(dat1, conf.lev = -1), - "Parameter 'conf.lev' must be a numeric number between 0 and 1.", + Spectrum(dat1, alpha = -1), + "Parameter 'alpha' must be a numeric number between 0 and 1.", fixed = T ) # ncores -- GitLab From 9342c5b047e0322d08313f7d098fc87a9fdab6fc Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 5 Apr 2023 17:48:41 +0200 Subject: [PATCH 12/13] Change dat_dim default to NULL; fix examples --- R/ACC.R | 44 ++++++----- R/Corr.R | 14 ++-- R/GetProbs.R | 41 +++++----- R/Plot2VarsVsLTime.R | 6 +- R/PlotACC.R | 8 +- R/PlotVsLTime.R | 5 +- R/RMS.R | 2 +- R/RatioSDRMS.R | 14 ++-- R/Spread.R | 2 + R/UltimateBrier.R | 10 +-- man/ACC.Rd | 10 +-- man/Corr.Rd | 12 +-- man/GetProbs.Rd | 42 +++++----- man/Plot2VarsVsLTime.Rd | 6 +- man/PlotACC.Rd | 5 +- man/PlotVsLTime.Rd | 5 +- man/RMS.Rd | 2 +- man/RatioSDRMS.Rd | 14 ++-- man/Spread.Rd | 2 + man/UltimateBrier.Rd | 8 +- tests/testthat/test-ACC.R | 14 ++-- tests/testthat/test-Corr.R | 114 ++++++++++++++-------------- tests/testthat/test-RatioSDRMS.R | 36 ++++----- tests/testthat/test-UltimateBrier.R | 76 +++++++++---------- 24 files changed, 254 insertions(+), 238 deletions(-) diff --git a/R/ACC.R b/R/ACC.R index 9da53aa..fcd1735 100644 --- a/R/ACC.R +++ b/R/ACC.R @@ -16,8 +16,7 @@ #' The dimension should be the same as 'exp' except the length of 'dat_dim' #' and 'memb_dim'. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. The default value is 'dataset'. If there is no dataset -#' dimension, set NULL. +#' dimension. The default value is NULL (no dataset). #'@param lat_dim A character string indicating the name of the latitude #' dimension of 'exp' and 'obs' along which ACC is computed. The default value #' is 'lat'. @@ -119,8 +118,9 @@ #'clim <- Clim(sampleData$mod, sampleData$obs) #'ano_exp <- Ano(sampleData$mod, clim$clim_exp) #'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) -#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap', lat = sampleData$lat) +#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat, dat_dim = 'dataset') +#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap', +#' lat = sampleData$lat, dat_dim = 'dataset') #'# Combine acc results for PlotACC #'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), #' dim = c(dim(acc$acc), 4)) @@ -138,7 +138,7 @@ #'@importFrom stats qt qnorm quantile #'@importFrom ClimProjDiags Subset #'@export -ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', +ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', space_dim = c('lat', 'lon'), avg_dim = 'sdate', memb_dim = 'member', lat = NULL, lon = NULL, lonlatbox = NULL, alpha = 0.05, pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric", @@ -369,7 +369,7 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', return(res) } -.ACC <- function(exp, obs, dat_dim = 'dataset', avg_dim = 'sdate', lat, alpha = 0.05, +.ACC <- function(exp, obs, dat_dim = NULL, avg_dim = 'sdate', lat, alpha = 0.05, pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") { # .ACC() should use all the spatial points to calculate ACC. It returns [nexp, nobs]. # If dat_dim = NULL, it returns a number. @@ -386,8 +386,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', nexp <- 1 nobs <- 1 } else { - nexp <- as.numeric(dim(exp)[length(dim(exp))]) - nobs <- as.numeric(dim(obs)[length(dim(obs))]) + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) } if (is.null(avg_dim)) { @@ -427,12 +427,12 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', } else { # [lat, lon, dat], [lat, lon, avg_dim], or [lat, lon, avg_dim, dat] # exp exp <- array(exp, dim = c(prod(dim_exp[1:2]), dim_exp[3:length(dim_exp)])) - mean_exp <- apply(exp, 2:length(dim(exp)), mean, na.rm = TRUE) # [avg_dim, (dat)] + mean_exp <- colMeans(exp, na.rm = TRUE) # [avg_dim, (dat)] mean_exp <- rep(as.vector(mean_exp), each = prod(dim_exp[1:2])) exp <- array(sqrt(wt) * (as.vector(exp) - mean_exp), dim = dim_exp) # obs obs <- array(obs, dim = c(prod(dim_obs[1:2]), dim_obs[3:length(dim_obs)])) - mean_obs <- apply(obs, 2:length(dim(obs)), mean, na.rm = TRUE) # [avg_dim, (dat)] + mean_obs <- colMeans(obs, na.rm = TRUE) # [avg_dim, (dat)] mean_obs <- rep(as.vector(mean_obs), each = prod(dim_obs[1:2])) obs <- array(sqrt(wt) * (as.vector(obs) - mean_obs), dim = dim_obs) } @@ -571,18 +571,26 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', } -.ACC_bootstrap <- function(exp, obs, dat_dim = 'dataset', +.ACC_bootstrap <- function(exp, obs, dat_dim = NULL, avg_dim = 'sdate', memb_dim = NULL, lat, alpha = 0.05, pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") { # if (is.null(avg_dim)) - # exp: [memb_exp, dat_exp, lat, lon] - # obs: [memb_obs, dat_obs, lat, lon] + # exp: [memb_exp, (dat_exp), lat, lon] + # obs: [memb_obs, (dat_obs), lat, lon] # if (!is.null(avg_dim)) - # exp: [memb_exp, dat_exp, avg_dim, lat, lon] - # obs: [memb_obs, dat_obs, avg_dim, lat, lon] + # exp: [memb_exp, (dat_exp), avg_dim, lat, lon] + # obs: [memb_obs, (dat_obs), avg_dim, lat, lon] + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp)[1], dat = 1, dim(exp)[-1]) + dim(obs) <- c(dim(obs)[1], dat = 1, dim(obs)[-1]) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } - nexp <- as.numeric(dim(exp)[2]) - nobs <- as.numeric(dim(obs)[2]) nmembexp <- as.numeric(dim(exp)[1]) nmembobs <- as.numeric(dim(obs)[1]) @@ -631,7 +639,7 @@ ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', #calculate the ACC of the randomized field tmpACC <- .ACC(drawexp, drawobs, conf = FALSE, pval = FALSE, sign = FALSE, - avg_dim = avg_dim, lat = lat) + avg_dim = avg_dim, lat = lat, dat_dim = dat_dim) if (is.null(avg_dim)) { acc_draw[, , jdraw] <- tmpACC$acc } else { diff --git a/R/Corr.R b/R/Corr.R index 76a1af4..d00f755 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -23,8 +23,7 @@ #'@param time_dim A character string indicating the name of dimension along #' which the correlations are computed. The default value is 'sdate'. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. The default value is 'dataset'. If there is no dataset -#' dimension, set NULL. +#' dimension. The default value is NULL (no dataset). #'@param comp_dim A character string indicating the name of dimension along which #' obs is taken into account only if it is complete. The default value #' is NULL. @@ -90,20 +89,21 @@ #'leadtimes_per_startdate <- 60 #'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), #' MeanDims(smooth_ano_obs, 'member'), -#' comp_dim = 'ftime', +#' comp_dim = 'ftime', dat_dim = 'dataset', #' limits = c(ceiling((runmean_months + 1) / 2), #' leadtimes_per_startdate - floor(runmean_months / 2))) #' #'# Case 2: Keep member dimension -#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') #'# ensemble mean -#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, +#' dat_dim = 'dataset') #' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@importFrom stats cor pt qnorm #'@export -Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, limits = NULL, method = 'pearson', memb_dim = NULL, memb = TRUE, pval = TRUE, conf = TRUE, sign = FALSE, @@ -275,7 +275,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', return(res) } -.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', +.Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_dim = 'sdate', method = 'pearson', conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { if (is.null(memb_dim)) { diff --git a/R/GetProbs.R b/R/GetProbs.R index 6a4dcfe..59304b4 100644 --- a/R/GetProbs.R +++ b/R/GetProbs.R @@ -1,13 +1,14 @@ #'Compute probabilistic forecasts or the corresponding observations #' -#'Compute probabilistic forecasts from an ensemble based on the relative thresholds, -#'or the probabilistic observations (i.e., which probabilistic category was observed). -#'A reference period can be specified to calculate the absolute thresholds between -#'each probabilistic category. The absolute thresholds can be computed in cross-validation -#'mode. If data is an ensemble, the probabilities are calculated as the percentage of -#'members that fall into each category. For observations (or forecast without member -#'dimension), 1 means that the event happened, while 0 indicates that the event did -#'not happen. Weighted probabilities can be computed if the weights are provided for +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for #'each ensemble member and time step. #' #'@param data A named numerical array of the forecasts or observations with, at @@ -18,21 +19,20 @@ #' to compute the probabilities of the forecast, or NULL if there is no member #' dimension (e.g., for observations, or for forecast with only one ensemble #' member). The default value is 'member'. -#'@param dat_dim A character string indicating the name of dataset dimension. -#' The default value is NULL, which means no dataset dimension. #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. -#'@param indices_for_quantiles A vector of the indices to be taken along 'time_dim' -#' for computing the absolute thresholds between the probabilistic categories. -#' If NULL, the whole period is used. The default value is NULL. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL, the whole period is used. The default value is NULL. #'@param weights A named numerical array of the weights for 'data' with -#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value is -#' NULL. The ensemble should have at least 70 members or span at least 10 time -#' steps and have more than 45 members if consistency between the weighted and -#' unweighted methodologies is desired. -#'@param cross.val A logical indicating whether to compute the thresholds between -#' probabilistic categories in cross-validation mode. The default value is FALSE. +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -43,7 +43,8 @@ #' #'@examples #'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) -#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', indices_for_quantiles = 4:17) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) #' #'@import multiApply #'@importFrom easyVerification convert2prob diff --git a/R/Plot2VarsVsLTime.R b/R/Plot2VarsVsLTime.R index 1c784dd..12e04ae 100644 --- a/R/Plot2VarsVsLTime.R +++ b/R/Plot2VarsVsLTime.R @@ -63,15 +63,17 @@ #'leadtimes_per_startdate <- 60 #'rms <- RMS(MeanDims(smooth_ano_exp, dim_to_mean), #' MeanDims(smooth_ano_obs, dim_to_mean), -#' comp_dim = required_complete_row, +#' comp_dim = required_complete_row, dat_dim = 'dataset', #' limits = c(ceiling((runmean_months + 1) / 2), -#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' leadtimes_per_startdate - floor(runmean_months / 2))) #'smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member', #' na.rm = TRUE), #' posdim = 3, #' lendim = dim(smooth_ano_exp)['member'], #' name = 'member') +#'suppressWarnings({ #'spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) +#'}) #'#Combine rms outputs into one array #'rms_combine <- abind::abind(rms$conf.lower, rms$rms, rms$conf.upper, along = 0) #'rms_combine <- Reorder(rms_combine, c(2, 3, 1, 4)) diff --git a/R/PlotACC.R b/R/PlotACC.R index a674ff6..6ea5182 100644 --- a/R/PlotACC.R +++ b/R/PlotACC.R @@ -65,8 +65,9 @@ #'ano_exp <- Ano(sampleData$mod, clim$clim_exp) #'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) -#'acc_bootstrap <- ACC(ano_exp, ano_obs, lat = sampleData$lat, conftype = 'bootstrap') +#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat, dat_dim = 'dataset') +#'acc_bootstrap <- ACC(ano_exp, ano_obs, lat = sampleData$lat, conftype = 'bootstrap', +#' dat_dim = 'dataset') #'# Combine acc results for PlotACC #'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), #' dim = c(dim(acc$acc), 4)) @@ -86,7 +87,8 @@ PlotACC <- function(ACC, sdates, toptitle = "", sizetit = 1, ytitle = "", width = 8, height = 5, size_units = 'in', res = 100, ...) { # Process the user graphical parameters that may be passed in the call ## Graphical parameters to exclude - excludedArgs <- c("cex", "cex.axis", "cex.lab", "cex.main", "col", "lab", "las", "lty", "lwd", "mai", "mgp", "new", "pch", "pin", "ps", "pty") + excludedArgs <- c("cex", "cex.axis", "cex.lab", "cex.main", "col", "lab", "las", "lty", + "lwd", "mai", "mgp", "new", "pch", "pin", "ps", "pty") userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) # If there is any filenames to store the graphics, process them diff --git a/R/PlotVsLTime.R b/R/PlotVsLTime.R index 94c82e0..c51e31b 100644 --- a/R/PlotVsLTime.R +++ b/R/PlotVsLTime.R @@ -79,11 +79,12 @@ #'leadtimes_per_startdate <- 60 #'corr <- Corr(MeanDims(smooth_ano_exp, dim_to_mean), #' MeanDims(smooth_ano_obs, dim_to_mean), -#' comp_dim = required_complete_row, +#' comp_dim = required_complete_row, dat_dim = 'dataset', #' limits = c(ceiling((runmean_months + 1) / 2), #' leadtimes_per_startdate - floor(runmean_months / 2))) #'# Combine corr results for plotting -#'corr_combine <- abind::abind(corr$conf.lower, corr$corr, corr$conf.upper, corr$p.val, along = 0) +#'corr_combine <- abind::abind(corr$conf.lower, corr$corr, corr$conf.upper, corr$p.val, +#' along = 0) #'corr_combine <- Reorder(corr_combine, c(2, 3, 1, 4)) #'\donttest{ #'PlotVsLTime(corr_combine, toptitle = "correlations", ytitle = "correlation", diff --git a/R/RMS.R b/R/RMS.R index 3f4ae1f..7664d97 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -71,7 +71,7 @@ #' set.seed(2) #' na <- floor(runif(10, min = 1, max = 80)) #' obs1[na] <- NA -#' res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_time = 'dat') +#' res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') #' # Renew example when Ano and Smoothing are ready #' #'@import multiApply diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index d527625..2fe259c 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -3,7 +3,7 @@ #'Compute the ratio between the standard deviation of the members around the #'ensemble mean in experimental data and the RMSE between the ensemble mean of #'experimental and observational data. The p-value is provided by a one-sided -#'Fischer test. +#'Fisher's test. #' #'@param exp A named numeric array of experimental data with at least two #' dimensions 'memb_dim' and 'time_dim'. @@ -11,8 +11,7 @@ #' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as #' parameter 'exp' except along 'dat_dim' and 'memb_dim'. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. If there is no dataset dimension, set as NULL. The default value -#' is 'dataset'. +#' dimension. The default value is NULL (no dataset). #'@param memb_dim A character string indicating the name of the member #' dimension. It must be one dimension in 'exp' and 'obs'. The default value #' is 'member'. @@ -26,20 +25,19 @@ #'@return A list of two arrays with dimensions c(nexp, nobs, the rest of #' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is #' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. -#' (only present if \code{pval = TRUE}) of the one-sided Fisher test with -#'Ho: SD/RMSE = 1.\cr\cr +#' If dat_dim is NULL, nexp and nobs are omitted. \cr #'\item{$ratio}{ #' The ratio of the ensemble spread and RMSE. #'} #'\item{$p_val}{ -#' The p-value of the one-sided Fisher test with Ho: SD/RMSE = 1. Only present +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present #' if \code{pval = TRUE}. #'} #' #'@examples #'# Load sample data as in Load() example: #'example(Load) -#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') #'# Reorder the data in order to plot it with PlotVsLTime #'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) #'rsdrms_plot[, , 2, ] <- rsdrms$ratio @@ -52,7 +50,7 @@ #' #'@import multiApply #'@export -RatioSDRMS <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', +RatioSDRMS <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_dim = 'sdate', pval = TRUE, ncores = NULL) { # Check inputs diff --git a/R/Spread.R b/R/Spread.R index 09bbe05..5fba8ca 100644 --- a/R/Spread.R +++ b/R/Spread.R @@ -52,7 +52,9 @@ #' posdim = 3, #' lendim = dim(smooth_ano_exp)['member'], #' name = 'member') +#'suppressWarnings({ #'spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) +#'}) #' #'\dontrun{ #'PlotVsLTime(Reorder(spread$iqr, c('dataset', 'stats', 'ftime')), diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R index d2c4ac9..44498a3 100644 --- a/R/UltimateBrier.R +++ b/R/UltimateBrier.R @@ -12,7 +12,7 @@ #' 'memb_dim', the length must be 1. The dimensions should be consistent with #' 'exp' except 'dat_dim' and 'memb_dim'. It can be provided by \code{Ano()}. #'@param dat_dim A character string indicating the name of the dataset -#' dimension in 'exp' and 'obs'. The default value is 'dataset'. If there is no dataset +#' dimension in 'exp' and 'obs'. The default value is NULL (no dataset). #' dimension, set NULL. #'@param memb_dim A character string indicating the name of the member #' dimension in 'exp' (and 'obs') for ensemble mean calculation. The default @@ -81,12 +81,12 @@ #'clim <- Clim(sampleData$mod, sampleData$obs) #'exp <- Ano(sampleData$mod, clim$clim_exp) #'obs <- Ano(sampleData$obs, clim$clim_obs) -#'bs <- UltimateBrier(exp, obs) -#'bss <- UltimateBrier(exp, obs, type = 'BSS') +#'bs <- UltimateBrier(exp, obs, dat_dim = 'dataset') +#'bss <- UltimateBrier(exp, obs, type = 'BSS', dat_dim = 'dataset') #' #'@import SpecsVerification plyr multiApply #'@export -UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', time_dim = 'sdate', +UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_dim = 'sdate', quantile = TRUE, thr = c(5/100, 95/100), type = 'BS', decomposition = TRUE, ncores = NULL) { @@ -223,7 +223,7 @@ UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', ti return(res) } -.UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', thr = c(5/100, 95/100), +.UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', thr = c(5/100, 95/100), type = 'BS', decomposition = TRUE) { # If exp and obs are probablistics # exp: [sdate, nexp] diff --git a/man/ACC.Rd b/man/ACC.Rd index 522843a..840f01f 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -7,7 +7,7 @@ ACC( exp, obs, - dat_dim = "dataset", + dat_dim = NULL, lat_dim = "lat", lon_dim = "lon", space_dim = c("lat", "lon"), @@ -33,8 +33,7 @@ The dimension should be the same as 'exp' except the length of 'dat_dim' and 'memb_dim'.} \item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) -dimension. The default value is 'dataset'. If there is no dataset -dimension, set NULL.} +dimension. The default value is NULL (no dataset).} \item{lat_dim}{A character string indicating the name of the latitude dimension of 'exp' and 'obs' along which ACC is computed. The default value @@ -161,8 +160,9 @@ sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) -acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap', lat = sampleData$lat) +acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat, dat_dim = 'dataset') +acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap', + lat = sampleData$lat, dat_dim = 'dataset') # Combine acc results for PlotACC res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), dim = c(dim(acc$acc), 4)) diff --git a/man/Corr.Rd b/man/Corr.Rd index 10bdf71..9fc2d31 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -8,7 +8,7 @@ Corr( exp, obs, time_dim = "sdate", - dat_dim = "dataset", + dat_dim = NULL, comp_dim = NULL, limits = NULL, method = "pearson", @@ -32,8 +32,7 @@ parameter 'exp' except along 'dat_dim' and 'memb_dim'.} which the correlations are computed. The default value is 'sdate'.} \item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) -dimension. The default value is 'dataset'. If there is no dataset -dimension, set NULL.} +dimension. The default value is NULL (no dataset).} \item{comp_dim}{A character string indicating the name of dimension along which obs is taken into account only if it is complete. The default value @@ -126,13 +125,14 @@ required_complete_row <- 3 # Discard start dates which contain any NA lead-time leadtimes_per_startdate <- 60 corr <- Corr(MeanDims(smooth_ano_exp, 'member'), MeanDims(smooth_ano_obs, 'member'), - comp_dim = 'ftime', + comp_dim = 'ftime', dat_dim = 'dataset', limits = c(ceiling((runmean_months + 1) / 2), leadtimes_per_startdate - floor(runmean_months / 2))) # Case 2: Keep member dimension -corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') +corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') # ensemble mean -corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) +corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, + dat_dim = 'dataset') } diff --git a/man/GetProbs.Rd b/man/GetProbs.Rd index 27fe68c..fd84d2f 100644 --- a/man/GetProbs.Rd +++ b/man/GetProbs.Rd @@ -27,28 +27,26 @@ to compute the probabilities of the forecast, or NULL if there is no member dimension (e.g., for observations, or for forecast with only one ensemble member). The default value is 'member'.} -\item{indices_for_quantiles}{A vector of the indices to be taken along 'time_dim' -for computing the absolute thresholds between the probabilistic categories. -If NULL, the whole period is used. The default value is NULL.} +\item{indices_for_quantiles}{A vector of the indices to be taken along +'time_dim' for computing the absolute thresholds between the probabilistic +categories. If NULL, the whole period is used. The default value is NULL.} \item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to 1) between the categories. The default value is c(1/3, 2/3), which corresponds to tercile equiprobable categories.} \item{weights}{A named numerical array of the weights for 'data' with -dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value is -NULL. The ensemble should have at least 70 members or span at least 10 time -steps and have more than 45 members if consistency between the weighted and -unweighted methodologies is desired.} +dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +is NULL. The ensemble should have at least 70 members or span at least 10 +time steps and have more than 45 members if consistency between the weighted +and unweighted methodologies is desired.} -\item{cross.val}{A logical indicating whether to compute the thresholds between -probabilistic categories in cross-validation mode. The default value is FALSE.} +\item{cross.val}{A logical indicating whether to compute the thresholds +between probabilistic categories in cross-validation mode. The default value +is FALSE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} - -\item{dat_dim}{A character string indicating the name of dataset dimension. -The default value is NULL, which means no dataset dimension.} } \value{ A numerical array of probabilities with dimensions c(bin, the rest dimensions @@ -56,18 +54,20 @@ of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. } \description{ -Compute probabilistic forecasts from an ensemble based on the relative thresholds, -or the probabilistic observations (i.e., which probabilistic category was observed). -A reference period can be specified to calculate the absolute thresholds between -each probabilistic category. The absolute thresholds can be computed in cross-validation -mode. If data is an ensemble, the probabilities are calculated as the percentage of -members that fall into each category. For observations (or forecast without member -dimension), 1 means that the event happened, while 0 indicates that the event did -not happen. Weighted probabilities can be computed if the weights are provided for +Compute probabilistic forecasts from an ensemble based on the relative +thresholds, or the probabilistic observations (i.e., which probabilistic +category was observed). A reference period can be specified to calculate the +absolute thresholds between each probabilistic category. The absolute +thresholds can be computed in cross-validation mode. If data is an ensemble, +the probabilities are calculated as the percentage of members that fall into +each category. For observations (or forecast without member dimension), 1 +means that the event happened, while 0 indicates that the event did not +happen. Weighted probabilities can be computed if the weights are provided for each ensemble member and time step. } \examples{ data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) -res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', indices_for_quantiles = 4:17) +res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', + indices_for_quantiles = 4:17) } diff --git a/man/Plot2VarsVsLTime.Rd b/man/Plot2VarsVsLTime.Rd index 46b9cd5..9eeb928 100644 --- a/man/Plot2VarsVsLTime.Rd +++ b/man/Plot2VarsVsLTime.Rd @@ -115,15 +115,17 @@ required_complete_row <- 'ftime' # discard startdates for which there are NA le leadtimes_per_startdate <- 60 rms <- RMS(MeanDims(smooth_ano_exp, dim_to_mean), MeanDims(smooth_ano_obs, dim_to_mean), - comp_dim = required_complete_row, + comp_dim = required_complete_row, dat_dim = 'dataset', limits = c(ceiling((runmean_months + 1) / 2), - leadtimes_per_startdate - floor(runmean_months / 2))) + leadtimes_per_startdate - floor(runmean_months / 2))) smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member', na.rm = TRUE), posdim = 3, lendim = dim(smooth_ano_exp)['member'], name = 'member') +suppressWarnings({ spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) +}) #Combine rms outputs into one array rms_combine <- abind::abind(rms$conf.lower, rms$rms, rms$conf.upper, along = 0) rms_combine <- Reorder(rms_combine, c(2, 3, 1, 4)) diff --git a/man/PlotACC.Rd b/man/PlotACC.Rd index 2764de3..9a32f8b 100644 --- a/man/PlotACC.Rd +++ b/man/PlotACC.Rd @@ -110,8 +110,9 @@ sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) -acc_bootstrap <- ACC(ano_exp, ano_obs, lat = sampleData$lat, conftype = 'bootstrap') +acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat, dat_dim = 'dataset') +acc_bootstrap <- ACC(ano_exp, ano_obs, lat = sampleData$lat, conftype = 'bootstrap', + dat_dim = 'dataset') # Combine acc results for PlotACC res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), dim = c(dim(acc$acc), 4)) diff --git a/man/PlotVsLTime.Rd b/man/PlotVsLTime.Rd index 05e2b42..21cfe53 100644 --- a/man/PlotVsLTime.Rd +++ b/man/PlotVsLTime.Rd @@ -129,11 +129,12 @@ required_complete_row <- 'ftime' # discard startdates for which there are NA le leadtimes_per_startdate <- 60 corr <- Corr(MeanDims(smooth_ano_exp, dim_to_mean), MeanDims(smooth_ano_obs, dim_to_mean), - comp_dim = required_complete_row, + comp_dim = required_complete_row, dat_dim = 'dataset', limits = c(ceiling((runmean_months + 1) / 2), leadtimes_per_startdate - floor(runmean_months / 2))) # Combine corr results for plotting -corr_combine <- abind::abind(corr$conf.lower, corr$corr, corr$conf.upper, corr$p.val, along = 0) +corr_combine <- abind::abind(corr$conf.lower, corr$corr, corr$conf.upper, corr$p.val, + along = 0) corr_combine <- Reorder(corr_combine, c(2, 3, 1, 4)) \donttest{ PlotVsLTime(corr_combine, toptitle = "correlations", ytitle = "correlation", diff --git a/man/RMS.Rd b/man/RMS.Rd index f5e46f6..241fbd5 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -101,7 +101,7 @@ The confidence interval is computed by the chi2 distribution.\cr set.seed(2) na <- floor(runif(10, min = 1, max = 80)) obs1[na] <- NA - res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_time = 'dat') + res <- RMS(exp1, obs1, comp_dim = 'ftime', dat_dim = 'dat') # Renew example when Ano and Smoothing are ready } diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd index f1f6f3d..07afc46 100644 --- a/man/RatioSDRMS.Rd +++ b/man/RatioSDRMS.Rd @@ -7,7 +7,7 @@ RatioSDRMS( exp, obs, - dat_dim = "dataset", + dat_dim = NULL, memb_dim = "member", time_dim = "sdate", pval = TRUE, @@ -23,8 +23,7 @@ dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as parameter 'exp' except along 'dat_dim' and 'memb_dim'.} \item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) -dimension. If there is no dataset dimension, set as NULL. The default value -is 'dataset'.} +dimension. The default value is NULL (no dataset).} \item{memb_dim}{A character string indicating the name of the member dimension. It must be one dimension in 'exp' and 'obs'. The default value @@ -43,13 +42,12 @@ computation. The default value is NULL.} A list of two arrays with dimensions c(nexp, nobs, the rest of dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. -(only present if \code{pval = TRUE}) of the one-sided Fisher test with -Ho: SD/RMSE = 1.\cr\cr + If dat_dim is NULL, nexp and nobs are omitted. \cr \item{$ratio}{ The ratio of the ensemble spread and RMSE. } \item{$p_val}{ - The p-value of the one-sided Fisher test with Ho: SD/RMSE = 1. Only present + The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present if \code{pval = TRUE}. } } @@ -57,12 +55,12 @@ Ho: SD/RMSE = 1.\cr\cr Compute the ratio between the standard deviation of the members around the ensemble mean in experimental data and the RMSE between the ensemble mean of experimental and observational data. The p-value is provided by a one-sided -Fischer test. +Fisher's test. } \examples{ # Load sample data as in Load() example: example(Load) -rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') # Reorder the data in order to plot it with PlotVsLTime rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) rsdrms_plot[, , 2, ] <- rsdrms$ratio diff --git a/man/Spread.Rd b/man/Spread.Rd index c5fc4d1..d3f93bb 100644 --- a/man/Spread.Rd +++ b/man/Spread.Rd @@ -72,7 +72,9 @@ smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'mem posdim = 3, lendim = dim(smooth_ano_exp)['member'], name = 'member') +suppressWarnings({ spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) +}) \dontrun{ PlotVsLTime(Reorder(spread$iqr, c('dataset', 'stats', 'ftime')), diff --git a/man/UltimateBrier.Rd b/man/UltimateBrier.Rd index 0dfa772..a20412e 100644 --- a/man/UltimateBrier.Rd +++ b/man/UltimateBrier.Rd @@ -7,7 +7,7 @@ UltimateBrier( exp, obs, - dat_dim = "dataset", + dat_dim = NULL, memb_dim = "member", time_dim = "sdate", quantile = TRUE, @@ -28,7 +28,7 @@ dimensions that at least include 'time_dim'. If it has 'exp' except 'dat_dim' and 'memb_dim'. It can be provided by \code{Ano()}.} \item{dat_dim}{A character string indicating the name of the dataset -dimension in 'exp' and 'obs'. The default value is 'dataset'. If there is no dataset +dimension in 'exp' and 'obs'. The default value is NULL (no dataset). dimension, set NULL.} \item{memb_dim}{A character string indicating the name of the member @@ -109,7 +109,7 @@ sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) clim <- Clim(sampleData$mod, sampleData$obs) exp <- Ano(sampleData$mod, clim$clim_exp) obs <- Ano(sampleData$obs, clim$clim_obs) -bs <- UltimateBrier(exp, obs) -bss <- UltimateBrier(exp, obs, type = 'BSS') +bs <- UltimateBrier(exp, obs, dat_dim = 'dataset') +bss <- UltimateBrier(exp, obs, type = 'BSS', dat_dim = 'dataset') } diff --git a/tests/testthat/test-ACC.R b/tests/testthat/test-ACC.R index 9753922..ab5f8be 100644 --- a/tests/testthat/test-ACC.R +++ b/tests/testthat/test-ACC.R @@ -5,11 +5,11 @@ context("s2dv::ACC tests") # dat1 set.seed(1) - exp1 <- array(rnorm(60), dim = c(dataset = 1, member = 2, sdate = 5, + exp1 <- array(rnorm(60), dim = c(member = 2, sdate = 5, ftime = 1, lat = 2, lon = 3)) set.seed(2) - obs1 <- array(rnorm(30), dim = c(dataset = 1, member = 1, sdate = 5, + obs1 <- array(rnorm(30), dim = c(member = 1, sdate = 5, ftime = 1, lat = 2, lon = 3)) lat1 <- c(30, 35) lon1 <- c(0, 5, 10) @@ -162,7 +162,7 @@ test_that("2. Output checks: dat1", { expect_equal( dim(ACC(exp1, obs1, lat = lat1, lon = lon1)$acc), - c(nexp = 1, nobs = 1, sdate = 5, ftime = 1) + c(sdate = 5, ftime = 1) ) expect_equal( names(ACC(exp1, obs1, lat = lat1)), @@ -194,7 +194,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( dim(ACC(exp1, obs1, lat = lat1, dat_dim = 'member', memb_dim = NULL)$acc), - c(nexp = 2, nobs = 1, sdate = 5, dataset = 1, ftime = 1) + c(nexp = 2, nobs = 1, sdate = 5, ftime = 1) ) expect_equal( names(ACC(exp1, obs1, lat = lat1, conf = FALSE)), @@ -227,16 +227,16 @@ test_that("2. Output checks: dat1", { test_that("3. Output checks: dat2", { expect_equal( - dim(ACC(exp2, obs2, lat = lat2, lon = lon2, memb_dim = NULL)$acc), + dim(ACC(exp2, obs2, lat = lat2, lon = lon2, memb_dim = NULL, dat_dim = 'dataset')$acc), c(nexp = 2, nobs = 1, sdate = 5, ftime = 1) ) expect_equal( - as.vector(ACC(exp2, obs2, lat = lat2, memb_dim = NULL)$acc)[3:7], + as.vector(ACC(exp2, obs2, lat = lat2, memb_dim = NULL, dat_dim = 'dataset')$acc)[3:7], c(-0.3601880, -0.5624773, -0.4603762, -0.6997169, -0.1336961), tolerance = 0.00001 ) expect_equal( - mean(ACC(exp2, obs2, lat = lat2, memb_dim = NULL)$acc), + mean(ACC(exp2, obs2, lat = lat2, memb_dim = NULL, dat_dim = 'dataset')$acc), -0.1484762, tolerance = 0.00001 ) diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R index ef013ca..f4172be 100644 --- a/tests/testthat/test-Corr.R +++ b/tests/testthat/test-Corr.R @@ -47,12 +47,10 @@ context("s2dv::Corr tests") # dat5: exp and obs have memb_dim and dataset = NULL set.seed(1) - exp5 <- array(rnorm(90), dim = c(member = 3, sdate = 5, - lat = 2, lon = 3)) + exp5 <- array(rnorm(90), dim = c(member = 3, sdate = 5, lat = 2, lon = 3)) set.seed(2) - obs5 <- array(rnorm(30), dim = c(member = 1, sdate = 5, - lat = 2, lon = 3)) + obs5 <- array(rnorm(30), dim = c(member = 1, sdate = 5, lat = 2, lon = 3)) # dat6: exp and obs have memb_dim = NULL and dataset = NULL set.seed(1) @@ -164,7 +162,7 @@ test_that("1. Input checks", { ) expect_error( Corr(exp = array(1:10, dim = c(sdate = 2, dataset = 5, a = 1)), - obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2))), + obs = array(1:4, dim = c(a = 1, sdate = 2, dataset = 2)), dat_dim = 'dataset'), "The length of time_dim must be at least 3 to compute correlation." ) @@ -174,76 +172,76 @@ test_that("1. Input checks", { test_that("2. Output checks: dat1", { suppressWarnings( expect_equal( - dim(Corr(exp1, obs1)$corr), + dim(Corr(exp1, obs1, dat_dim = 'dataset')$corr), c(nexp = 2, nobs = 1, member = 1, ftime = 3, lat = 2, lon = 4) ) ) suppressWarnings( expect_equal( - Corr(exp1, obs1)$corr[1:6], + Corr(exp1, obs1, dat_dim = 'dataset')$corr[1:6], c(0.11503859, -0.46959987, -0.64113021, 0.09776572, -0.32393603, 0.27565829), tolerance = 0.001 ) ) suppressWarnings( expect_equal( - length(which(is.na(Corr(exp1, obs1)$p.val))), + length(which(is.na(Corr(exp1, obs1, dat_dim = 'dataset')$p.val))), 2 ) ) suppressWarnings( expect_equal( - max(Corr(exp1, obs1)$conf.lower, na.rm = T), + max(Corr(exp1, obs1, dat_dim = 'dataset')$conf.lower, na.rm = T), 0.6332941, tolerance = 0.001 ) ) suppressWarnings( expect_equal( - length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime')$corr))), + length(which(is.na(Corr(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime')$corr))), 6 ) ) suppressWarnings( expect_equal( - length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime', limits = c(2, 3))$corr))), + length(which(is.na(Corr(exp1, obs1, dat_dim = 'dataset', comp_dim = 'ftime', limits = c(2, 3))$corr))), 2 ) ) suppressWarnings( expect_equal( - min(Corr(exp1, obs1, alpha = 0.01)$conf.upper, na.rm = TRUE), + min(Corr(exp1, obs1, alpha = 0.01, dat_dim = 'dataset')$conf.upper, na.rm = TRUE), 0.2747904, tolerance = 0.0001 ) ) suppressWarnings( expect_equal( - length(Corr(exp1, obs1, conf = FALSE, pval = FALSE)), + length(Corr(exp1, obs1, conf = FALSE, pval = FALSE, dat_dim = 'dataset')), 1 ) ) suppressWarnings( expect_equal( - length(Corr(exp1, obs1, conf = FALSE)), + length(Corr(exp1, obs1, conf = FALSE, dat_dim = 'dataset')), 2 ) ) suppressWarnings( expect_equal( - length(Corr(exp1, obs1, pval = FALSE)), + length(Corr(exp1, obs1, pval = FALSE, dat_dim = 'dataset')), 3 ) ) suppressWarnings( expect_equal( - Corr(exp1, obs1, method = 'spearman')$corr[1:6], + Corr(exp1, obs1, method = 'spearman', dat_dim = 'dataset')$corr[1:6], c(-0.3, -0.4, -0.6, 0.3, -0.3, 0.2) ) ) suppressWarnings( expect_equal( - range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime')$p.val, na.rm = T), + range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime', dat_dim = 'dataset')$p.val, na.rm = T), c(0.0, 0.5), tolerance = 0.001 ) @@ -255,113 +253,113 @@ suppressWarnings( test_that("3. Output checks: dat2", { # individual member expect_equal( - dim(Corr(exp2, obs2, memb_dim = 'member')$corr), + dim(Corr(exp2, obs2, memb_dim = 'member', dat_dim = 'dataset')$corr), c(nexp = 2, nobs = 1, exp_memb = 3, obs_memb = 1, lat = 2, lon = 3) ) expect_equal( - dim(Corr(exp2, obs2, memb_dim = 'member')$corr), - dim(Corr(exp2, obs2, memb_dim = 'member')$p) + dim(Corr(exp2, obs2, memb_dim = 'member', dat_dim = 'dataset')$corr), + dim(Corr(exp2, obs2, memb_dim = 'member', dat_dim = 'dataset')$p) ) expect_equal( - names(Corr(exp2, obs2, memb_dim = 'member')), + names(Corr(exp2, obs2, memb_dim = 'member', dat_dim = 'dataset')), c("corr", "p.val", "conf.lower", "conf.upper") ) expect_equal( - names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)), + names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')), c("corr") ) expect_equal( - names(Corr(exp2, obs2, memb_dim = 'member', conf = FALSE)), + names(Corr(exp2, obs2, memb_dim = 'member', conf = FALSE, dat_dim = 'dataset')), c("corr", "p.val") ) expect_equal( - names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE)), + names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, dat_dim = 'dataset')), c("corr", "conf.lower", "conf.upper") ) expect_equal( - names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, sign = T)), + names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, sign = T, dat_dim = 'dataset')), c("corr", "conf.lower", "conf.upper", "sign") ) expect_equal( - mean(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + mean(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.01645575, tolerance = 0.0001 ) expect_equal( - median(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + median(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.03024513, tolerance = 0.0001 ) expect_equal( - max(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + max(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.9327993, tolerance = 0.0001 ) expect_equal( - min(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + min(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), -0.9361258, tolerance = 0.0001 ) expect_equal( - Corr(exp2, obs2, memb_dim = 'member', conf = FALSE)$p.val[1:5], + Corr(exp2, obs2, memb_dim = 'member', conf = FALSE, dat_dim = 'dataset')$p.val[1:5], c(0.24150854, 0.21790352, 0.04149139, 0.49851332, 0.19859843), tolerance = 0.0001 ) expect_equal( - Corr(exp2, obs2, memb_dim = 'member', pval = FALSE)$conf.lower[1:5], + Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, dat_dim = 'dataset')$conf.lower[1:5], c(-0.9500121, -0.9547642, -0.9883400, -0.8817478, -0.6879465), tolerance = 0.0001 ) expect_equal( - which(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = F, sign = T)$sign), + which(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = F, sign = T, dat_dim = 'dataset')$sign), c(3, 6, 12, 17, 23, 34) ) # ensemble mean expect_equal( - dim(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE)$corr), + dim(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, dat_dim = 'dataset')$corr), c(nexp = 2, nobs = 1, lat = 2, lon = 3) ) expect_equal( - mean(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + mean(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.02939929, tolerance = 0.0001 ) expect_equal( - median(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + median(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.03147432, tolerance = 0.0001 ) expect_equal( - max(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + max(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.8048901, tolerance = 0.0001 ) expect_equal( - min(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + min(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), -0.6839388, tolerance = 0.0001 ) expect_equal( - Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, conf = FALSE)$p.val[1:5], + Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, conf = FALSE, dat_dim = 'dataset')$p.val[1:5], c(0.1999518, 0.2776874, 0.3255444, 0.2839667, 0.1264518), tolerance = 0.0001 ) expect_equal( - Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE)$conf.lower[1:5], + Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, dat_dim = 'dataset')$conf.lower[1:5], c(-0.9582891, -0.7668065, -0.9316879, -0.9410621, -0.5659657), tolerance = 0.0001 ) # exp2_2 expect_equal( - which(is.na(Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr)), + which(is.na(Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE, dat_dim = 'dataset')$corr)), 1:2 ) expect_equal( - Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr[-c(1:2)], - Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE)$corr[-c(1:2)] + Corr(exp2_2, obs2_2, memb_dim = 'member', memb = FALSE, pval = FALSE, dat_dim = 'dataset')$corr[-c(1:2)], + Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, dat_dim = 'dataset')$corr[-c(1:2)] ) }) @@ -370,56 +368,56 @@ test_that("3. Output checks: dat2", { test_that("4. Output checks: dat3", { # individual member expect_equal( - dim(Corr(exp3, obs3, memb_dim = 'member')$corr), + dim(Corr(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$corr), c(nexp = 2, nobs = 2, exp_memb = 3, obs_memb = 2, lat = 2, lon = 3) ) expect_equal( - names(Corr(exp3, obs3, memb_dim = 'member')), + names(Corr(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), c("corr", "p.val", "conf.lower", "conf.upper") ) expect_equal( - mean(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + mean(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.006468017, tolerance = 0.0001 ) expect_equal( - median(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + median(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.03662394, tolerance = 0.0001 ) expect_equal( - max(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + max(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.9798228, tolerance = 0.0001 ) expect_equal( - min(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + min(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), -0.9464891, tolerance = 0.0001 ) # ensemble mean expect_equal( - dim(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE)$corr), + dim(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, dat_dim = 'dataset')$corr), c(nexp = 2, nobs = 2, lat = 2, lon = 3) ) expect_equal( - mean(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + mean(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), -0.01001896, tolerance = 0.0001 ) expect_equal( - median(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + median(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), -0.01895816, tolerance = 0.0001 ) expect_equal( - max(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + max(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), 0.798233, tolerance = 0.0001 ) expect_equal( - min(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + min(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), -0.6464809, tolerance = 0.0001 ) @@ -429,17 +427,17 @@ test_that("4. Output checks: dat3", { test_that("5. Output checks: dat4", { # no member expect_equal( - dim(Corr(exp4, obs4)$corr), + dim(Corr(exp4, obs4, dat_dim = 'dataset')$corr), c(nexp = 1, nobs = 1, member = 1, lat = 2) ) # individual member expect_equal( - dim(Corr(exp4, obs4, memb_dim = 'member')$corr), + dim(Corr(exp4, obs4, memb_dim = 'member', dat_dim = 'dataset')$corr), c(nexp = 1, nobs = 1, exp_memb = 1, obs_memb = 1, lat = 2) ) # ensemble expect_equal( - dim(Corr(exp4, obs4, memb_dim = 'member', memb = FALSE)$corr), + dim(Corr(exp4, obs4, memb_dim = 'member', memb = FALSE, dat_dim = 'dataset')$corr), c(nexp = 1, nobs = 1, lat = 2) ) @@ -460,7 +458,7 @@ test_that("6. Output checks: dat5", { c("corr") ) expect_equal( - mean(Corr(exp5, obs5, dat_dim = NULL, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + mean(Corr(exp5, obs5, dat_dim = NULL, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr, dat_dim = 'dataset'), 0.1880204, tolerance = 0.0001 ) @@ -506,7 +504,7 @@ test_that("7. Output checks: dat6", { test_that("8. Output checks: dat6 and dat7", { expect_equal( mean(Corr(exp6, obs6, dat_dim = NULL, memb_dim = NULL, pval = FALSE, conf = FALSE)$corr), - mean(Corr(exp7, obs7, memb_dim = NULL, pval = FALSE, conf = FALSE)$corr), + mean(Corr(exp7, obs7, memb_dim = NULL, pval = FALSE, conf = FALSE, dat_dim = 'dataset')$corr), tolerance = 0.0001 ) }) diff --git a/tests/testthat/test-RatioSDRMS.R b/tests/testthat/test-RatioSDRMS.R index 5dbc171..3086c87 100644 --- a/tests/testthat/test-RatioSDRMS.R +++ b/tests/testthat/test-RatioSDRMS.R @@ -51,27 +51,27 @@ test_that("1. Input checks", { "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." ) expect_error( - RatioSDRMS(exp1, obs1, memb_dim = 1), + RatioSDRMS(exp1, obs1, memb_dim = 1, dat_dim = 'dataset'), "Parameter 'memb_dim' must be a character string." ) expect_error( - RatioSDRMS(exp1, obs1, memb_dim = 'a'), + RatioSDRMS(exp1, obs1, memb_dim = 'a', dat_dim = 'dataset'), "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." ) expect_error( - RatioSDRMS(exp1, obs1, time_dim = c('sdate', 'a')), + RatioSDRMS(exp1, obs1, time_dim = c('sdate', 'a'), dat_dim = 'dataset'), "Parameter 'time_dim' must be a character string." ) expect_error( - RatioSDRMS(exp1, obs1, time_dim = 'a'), + RatioSDRMS(exp1, obs1, time_dim = 'a', dat_dim = 'dataset'), "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." ) expect_error( - RatioSDRMS(exp1, obs1, pval = 1), + RatioSDRMS(exp1, obs1, pval = 1, dat_dim = 'dataset'), "Parameter 'pval' must be one logical value." ) expect_error( - RatioSDRMS(exp1, obs1, ncores = 1.5), + RatioSDRMS(exp1, obs1, ncores = 1.5, dat_dim = 'dataset'), "Parameter 'ncores' must be a positive integer." ) expect_error( @@ -85,34 +85,34 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { expect_equal( -names(RatioSDRMS(exp1, obs1)), +names(RatioSDRMS(exp1, obs1, dat_dim = 'dataset')), c('ratio', 'p.val') ) expect_equal( -dim(RatioSDRMS(exp1, obs1)$ratio), +dim(RatioSDRMS(exp1, obs1, dat_dim = 'dataset')$ratio), c(nexp = 2, nobs = 1, ftime = 2) ) expect_equal( -dim(RatioSDRMS(exp1, obs1)$p.val), +dim(RatioSDRMS(exp1, obs1, dat_dim = 'dataset')$p.val), c(nexp = 2, nobs = 1, ftime = 2) ) expect_equal( -as.vector(RatioSDRMS(exp1, obs1)$ratio), +as.vector(RatioSDRMS(exp1, obs1, dat_dim = 'dataset')$ratio), c(0.7198164, 0.6525068, 0.6218262, 0.6101527), tolerance = 0.0001 ) expect_equal( -as.vector(RatioSDRMS(exp1, obs1)$p.val), +as.vector(RatioSDRMS(exp1, obs1, dat_dim = 'dataset')$p.val), c(0.8464094, 0.8959219, 0.9155102, 0.9224119), tolerance = 0.0001 ) expect_equal( -names(RatioSDRMS(exp1, obs1, pval = F)), +names(RatioSDRMS(exp1, obs1, pval = F, dat_dim = 'dataset')), c('ratio') ) expect_equal( -as.vector(RatioSDRMS(exp1, obs1)$ratio), -as.vector(RatioSDRMS(exp1, obs1, pval = F)$ratio) +as.vector(RatioSDRMS(exp1, obs1, dat_dim = 'dataset')$ratio), +as.vector(RatioSDRMS(exp1, obs1, pval = F, dat_dim = 'dataset')$ratio) ) }) @@ -120,17 +120,17 @@ as.vector(RatioSDRMS(exp1, obs1, pval = F)$ratio) ############################################## test_that("3. Output checks: dat2", { expect_equal( -dim(RatioSDRMS(exp2, obs2)$ratio), +dim(RatioSDRMS(exp2, obs2, dat_dim = 'dataset')$ratio), c(nexp = 2, nobs = 1, ftime = 2) ) expect_equal( -as.vector(RatioSDRMS(exp2, obs2)$ratio), +as.vector(RatioSDRMS(exp2, obs2, dat_dim = 'dataset')$ratio), c(0.7635267, 0.6525068, 0.6218262, 0.6101527), tolerance = 0.0001 ) expect_equal( -as.vector(RatioSDRMS(exp1, obs1)$p.val), -c(0.8464094, 0.8959219, 0.9155102, 0.9224119), +as.vector(RatioSDRMS(exp2, obs2, dat_dim = 'dataset')$p.val), +c(0.7970868, 0.8959219, 0.9155102, 0.9224119), tolerance = 0.0001 ) diff --git a/tests/testthat/test-UltimateBrier.R b/tests/testthat/test-UltimateBrier.R index 09412f0..6ce8866 100644 --- a/tests/testthat/test-UltimateBrier.R +++ b/tests/testthat/test-UltimateBrier.R @@ -60,46 +60,46 @@ test_that("1. Input checks", { ) # exp and obs (2) expect_error( - UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 6, ftime = 2))), + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 6, ftime = 2)), dat_dim = 'dataset'), paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", "of all the dimensions except 'dat_dim' and 'memb_dim'.") ) expect_error( - UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 5, time = 2))), + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 5, time = 2)), dat_dim = 'dataset'), paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", "of all the dimensions except 'dat_dim' and 'memb_dim'.") ) # quantile expect_error( - UltimateBrier(exp1, obs1, quantile = c(0.05, 0.95)), + UltimateBrier(exp1, obs1, quantile = c(0.05, 0.95), dat_dim = 'dataset'), "Parameter 'quantile' must be one logical value." ) expect_error( - UltimateBrier(exp1, obs1, quantile = FALSE, thr = 1:3, type = 'FairEnsembleBS'), + UltimateBrier(exp1, obs1, quantile = FALSE, thr = 1:3, type = 'FairEnsembleBS', dat_dim = 'dataset'), "Parameter 'quantile' must be TRUE if 'type' is 'FairEnsembleBSS' or 'FairEnsembleBS'." ) # thr expect_error( - UltimateBrier(exp1, obs1, thr = TRUE), + UltimateBrier(exp1, obs1, thr = TRUE, dat_dim = 'dataset'), "Parameter 'thr' must be a numeric vector." ) expect_error( - UltimateBrier(exp1, obs1, quantile = TRUE, thr = 1:3), + UltimateBrier(exp1, obs1, quantile = TRUE, thr = 1:3, dat_dim = 'dataset'), "Parameter 'thr' must be between 0 and 1 when quantile is TRUE." ) # type expect_error( - UltimateBrier(exp1, obs1, type = 'UltimateBrier'), + UltimateBrier(exp1, obs1, type = 'UltimateBrier', dat_dim = 'dataset'), "Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', 'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'." ) # decomposition expect_error( - UltimateBrier(exp1, obs1, decomposition = 1), + UltimateBrier(exp1, obs1, decomposition = 1, dat_dim = 'dataset'), "Parameter 'decomposition' must be one logical value." ) # ncores expect_error( - UltimateBrier(exp1, obs1, ncores = 0), + UltimateBrier(exp1, obs1, ncores = 0, dat_dim = 'dataset'), "Parameter 'ncores' must be a positive integer." ) @@ -111,130 +111,130 @@ test_that("2. Output checks: dat1", { # 'BS' expect_equal( - is.list(UltimateBrier(exp1, obs1)), + is.list(UltimateBrier(exp1, obs1, dat_dim = 'dataset')), TRUE ) expect_equal( - names(UltimateBrier(exp1, obs1)), + names(UltimateBrier(exp1, obs1, dat_dim = 'dataset')), c('bs', 'rel', 'res', 'unc') ) expect_equal( - is.list(UltimateBrier(exp1, obs1, decomposition = FALSE)), + is.list(UltimateBrier(exp1, obs1, decomposition = FALSE, dat_dim = 'dataset')), FALSE ) expect_equal( - dim(UltimateBrier(exp1, obs1, decomposition = FALSE)), + dim(UltimateBrier(exp1, obs1, decomposition = FALSE, dat_dim = 'dataset')), c(nexp = 1, nobs = 1, bin = 3, ftime = 2) ) expect_equal( - dim(UltimateBrier(exp1, obs1, decomposition = FALSE, thr = c(0.25, 0.5, 0.75))), + dim(UltimateBrier(exp1, obs1, dat_dim = 'dataset', decomposition = FALSE, thr = c(0.25, 0.5, 0.75))), c(nexp = 1, nobs = 1, bin = 4, ftime = 2) ) expect_equal( - UltimateBrier(exp1, obs1)$bs, - UltimateBrier(exp1, obs1, decomposition = FALSE) + UltimateBrier(exp1, obs1, dat_dim = 'dataset')$bs, + UltimateBrier(exp1, obs1, decomposition = FALSE, dat_dim = 'dataset') ) expect_equal( - as.vector(UltimateBrier(exp1, obs1)$bs), + as.vector(UltimateBrier(exp1, obs1, dat_dim = 'dataset')$bs), c(0.42222222, 0.44444444, 0.02222222, 0.48888889, 0.37777778, 0.02222222), tolerance = 0.0001 ) expect_equal( - as.vector(UltimateBrier(exp1, obs1)$rel), + as.vector(UltimateBrier(exp1, obs1, dat_dim = 'dataset')$rel), c(0.22222222, 0.31111111, 0.02222222, 0.28888889, 0.24444444, 0.02222222), tolerance = 0.0001 ) expect_equal( - as.vector(UltimateBrier(exp1, obs1)$res), + as.vector(UltimateBrier(exp1, obs1, dat_dim = 'dataset')$res), c(0.0400000, 0.1066667, 0.0000000, 0.0400000, 0.1066667, 0.0000000), tolerance = 0.0001 ) expect_equal( - as.vector(UltimateBrier(exp1, obs1)$unc), + as.vector(UltimateBrier(exp1, obs1, dat_dim = 'dataset')$unc), c(0.24, 0.24, 0.00, 0.24, 0.24, 0.00), tolerance = 0.0001 ) # 'BSS' expect_equal( - dim(UltimateBrier(exp1, obs1, type = 'BSS')), + dim(UltimateBrier(exp1, obs1, type = 'BSS', dat_dim = 'dataset')), c(nexp = 1, nobs = 1, bin = 3, ftime = 2) ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'BSS')), + as.vector(UltimateBrier(exp1, obs1, type = 'BSS', dat_dim = 'dataset')), c(-0.7592593, -0.8518519, -Inf, -1.0370370, -0.5740741, -Inf), tolerance = 0.0001 ) # 'FairStartDatesBS' expect_equal( - is.list(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')), + is.list(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS', dat_dim = 'dataset')), TRUE ) expect_equal( - names(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')), + names(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS', dat_dim = 'dataset')), c('bs', 'rel', 'res', 'unc') ) expect_equal( - is.list(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS')), + is.list(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS', dat_dim = 'dataset')), FALSE ) expect_equal( - dim(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS')), + dim(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS', dat_dim = 'dataset')), c(nexp = 1, nobs = 1, bin = 3, ftime = 2) ) expect_equal( - UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$bs, - UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS') + UltimateBrier(exp1, obs1, type = 'FairStartDatesBS', dat_dim = 'dataset')$bs, + UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS', dat_dim = 'dataset') ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$bs), + as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS', dat_dim = 'dataset')$bs), c(0.42222222, 0.44444444, 0.02222222, 0.48888889, 0.37777778, 0.02222222), tolerance = 0.0001 ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$rel), + as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS', dat_dim = 'dataset')$rel), c(0.22222222, 0.31111111, 0.02222222, 0.28888889, 0.24444444, 0.02222222), tolerance = 0.0001 ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$res), + as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS', dat_dim = 'dataset')$res), c(0.0400000, 0.1066667, 0.0000000, 0.0400000, 0.1066667, 0.0000000), tolerance = 0.0001 ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$unc), + as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS', dat_dim = 'dataset')$unc), c(0.24, 0.24, 0.00, 0.24, 0.24, 0.00), tolerance = 0.0001 ) # 'FairStartDatesBSS' expect_equal( - dim(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS')), + dim(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS', dat_dim = 'dataset')), c(nexp = 1, nobs = 1, bin = 3, ftime = 2) ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS')), + as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS', dat_dim = 'dataset')), c(-0.7592593, -0.8518519, -Inf, -1.0370370, -0.5740741, -Inf), tolerance = 0.0001 ) # 'FairEnsembleBS' expect_equal( - dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS')), + dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS', dat_dim = 'dataset')), c(nexp = 1, nobs = 1, bin = 3, ftime = 2) ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS')), + as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS', dat_dim = 'dataset')), c(0.1333333, 0.2000000, 0.2000000, 0.1333333, 0.4000000, 0.2000000), tolerance = 0.0001 ) # 'FairEnsembleBSS' expect_equal( - dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS')), + dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS', dat_dim = 'dataset')), c(nexp = 1, nobs = 1, bin = 3, ftime = 2) ) expect_equal( - as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS')), + as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS', dat_dim = 'dataset')), c(-0.1111111, -0.6666667, -0.6666667, 0.2592593, -1.2222222, -0.6666667), tolerance = 0.0001 ) -- GitLab From 177eaf37826f4000b4a46385f72401f0a0691406 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 11 Apr 2023 14:04:02 +0200 Subject: [PATCH 13/13] Remove pval and sign in RMS() --- R/RMS.R | 74 +++++++++++++-------------------------- man/RMS.Rd | 15 -------- tests/testthat/test-RMS.R | 19 +++------- 3 files changed, 29 insertions(+), 79 deletions(-) diff --git a/R/RMS.R b/R/RMS.R index 7664d97..164167f 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -28,13 +28,8 @@ #' is NULL. #'@param limits A vector of two integers indicating the range along comp_dim to #' be completed. The default value is c(1, length(comp_dim dimension)). -#'@param pval A logical value indicating whether to return or not the p-value -#' of the test Ho: Corr = 0. The default value is TRUE. #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. -#'@param sign A logical value indicating whether to retrieve the statistical -#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is -#' FALSE. #'@param alpha A numeric indicating the significance level for the statistical #' significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel @@ -49,18 +44,12 @@ #'\item{$rms}{ #' The root mean square error. #'} -#'\item{$p.val}{ -#' The p-value. Only present if \code{pval = TRUE}. -#'} #'\item{$conf.lower}{ #' The lower confidence interval. Only present if \code{conf = TRUE}. #'} #'\item{$conf.upper}{ #' The upper confidence interval. Only present if \code{conf = TRUE}. #'} -#'\item{$sign}{ -#' The statistical significance. Only present if \code{sign = TRUE}. -#'} #' #'@examples #'# Load sample data as in Load() example: @@ -78,9 +67,8 @@ #'@importFrom ClimProjDiags Subset #'@importFrom stats qchisq #'@export -RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, - comp_dim = NULL, limits = NULL, pval = TRUE, conf = TRUE, - sign = FALSE, alpha = 0.05, ncores = NULL) { +RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, comp_dim = NULL, + limits = NULL, conf = TRUE, alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) if (is.null(exp) | is.null(obs)) { @@ -148,18 +136,10 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, "integers smaller than the length of paramter 'comp_dim'.")) } } - ## pval - if (!is.logical(pval) | length(pval) > 1) { - stop("Parameter 'pval' must be one logical value.") - } ## conf if (!is.logical(conf) | length(conf) > 1) { stop("Parameter 'conf' must be one logical value.") } - ## sign - if (!is.logical(sign) | length(sign) > 1) { - stop("Parameter 'sign' must be one logical value.") - } ## alpha if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { stop("Parameter 'alpha' must be a numeric number between 0 and 1.") @@ -215,13 +195,12 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, c(time_dim, dat_dim)), fun = .RMS, time_dim = time_dim, dat_dim = dat_dim, - pval = pval, conf = conf, sign = sign, alpha = alpha, + conf = conf, alpha = alpha, ncores = ncores) return(res) } -.RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, - pval = TRUE, conf = TRUE, sign = FALSE, alpha = 0.05) { +.RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, conf = TRUE, alpha = 0.05) { if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] @@ -253,7 +232,7 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, rms <- colMeans(dif^2, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) - if (conf | pval | sign) { + if (conf) { #NOTE: pval and sign also need #count effective sample along sdate. eno: c(nexp, nobs) # eno <- Eno(dif, time_dim) # slower than for loop below? eno <- array(dim = c(nexp = nexp, nobs = nobs)) @@ -262,29 +241,28 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, eno[n_exp, n_obs] <- .Eno(dif[, n_exp, n_obs], na.action = na.pass) } } - if (conf) { - # conf.lower - chi <- sapply(1:nobs, function(i) { - qchisq(confhigh, eno[, i] - 1) - }) - conf.lower <- (eno * rms ** 2 / chi) ** 0.5 + # conf.lower + chi <- sapply(1:nobs, function(i) { + qchisq(confhigh, eno[, i] - 1) + }) + conf.lower <- (eno * rms ** 2 / chi) ** 0.5 - # conf.upper - chi <- sapply(1:nobs, function(i) { - qchisq(conflow, eno[, i] - 1) - }) - conf.upper <- (eno * rms ** 2 / chi) ** 0.5 - } + # conf.upper + chi <- sapply(1:nobs, function(i) { + qchisq(conflow, eno[, i] - 1) + }) + conf.upper <- (eno * rms ** 2 / chi) ** 0.5 } - if (pval | sign) { - chi <- array(dim = c(nexp = nexp, nobs = nobs)) - for (i in 1:nobs) { - chi[, i] <- sapply(1:nexp, function(x) {sum((exp[, x] - obs[, i])^2 / exp[, x])}) - } - p_val <- pchisq(chi, eno - 1, lower.tail = FALSE) - if (sign) signif <- p_val <= alpha - } +#NOTE: Not sure if the calculation is correct. p_val is reasonable compared to the chi-square chart though. +# if (pval | sign) { +# chi <- array(dim = c(nexp = nexp, nobs = nobs)) +# for (i in 1:nobs) { +# chi[, i] <- sapply(1:nexp, function(x) {sum((obs[, i] - exp[, x])^2 / exp[, x])}) +# } +# p_val <- pchisq(chi, eno - 1, lower.tail = FALSE) +# if (sign) signif <- p_val <= alpha +# } ################################### # Remove nexp and nobs if dat_dim = NULL @@ -294,14 +272,10 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, dim(conf.lower) <- NULL dim(conf.upper) <- NULL } - if (pval) dim(p_val) <- NULL - if (sign) dim(signif) <- NULL } ################################### res <- list(rms = rms) - if (pval) res <- c(res, list(p.val = p_val)) - if (sign) res <- c(res, list(sign = signif)) if (conf) res <- c(res, list(conf.lower = conf.lower, conf.upper = conf.upper)) return(res) diff --git a/man/RMS.Rd b/man/RMS.Rd index 241fbd5..9d02d82 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -11,9 +11,7 @@ RMS( dat_dim = NULL, comp_dim = NULL, limits = NULL, - pval = TRUE, conf = TRUE, - sign = FALSE, alpha = 0.05, ncores = NULL ) @@ -43,16 +41,9 @@ is NULL.} \item{limits}{A vector of two integers indicating the range along comp_dim to be completed. The default value is c(1, length(comp_dim dimension)).} -\item{pval}{A logical value indicating whether to return or not the p-value -of the test Ho: Corr = 0. The default value is TRUE.} - \item{conf}{A logical value indicating whether to retrieve the confidence intervals or not. The default value is TRUE.} -\item{sign}{A logical value indicating whether to retrieve the statistical -significance of the test Ho: Corr = 0 based on 'alpha'. The default value is -FALSE.} - \item{alpha}{A numeric indicating the significance level for the statistical significance test. The default value is 0.05.} @@ -68,18 +59,12 @@ nobs are omitted.\cr \item{$rms}{ The root mean square error. } -\item{$p.val}{ - The p-value. Only present if \code{pval = TRUE}. -} \item{$conf.lower}{ The lower confidence interval. Only present if \code{conf = TRUE}. } \item{$conf.upper}{ The upper confidence interval. Only present if \code{conf = TRUE}. } -\item{$sign}{ - The statistical significance. Only present if \code{sign = TRUE}. -} } \description{ Compute the root mean square error for an array of forecasts and an array of diff --git a/tests/testthat/test-RMS.R b/tests/testthat/test-RMS.R index d45d59c..2a97466 100644 --- a/tests/testthat/test-RMS.R +++ b/tests/testthat/test-RMS.R @@ -176,7 +176,7 @@ suppressWarnings( suppressWarnings( expect_equal( length(RMS(exp1, obs1, dat_dim = 'dataset', conf = FALSE)), - 2 + 1 ) ) @@ -220,25 +220,16 @@ test_that("4. Output checks: dat3", { c(ftimes = 2, lon = 1, lat = 1) ) expect_equal( - length(RMS(exp3, obs3, dat_dim = NULL, sign = T, conf = F)), - 3 + length(RMS(exp3, obs3, dat_dim = NULL, conf = F)), + 1 ) expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL, sign = T, conf = F)$sign[1,1,]), - c(FALSE, FALSE, TRUE, FALSE) - ) - expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL, sign = T, conf = F)$p.val[1,1,]), - c(1, 0.8498872, 4.686846e-06, 1), - tolerance = 0.0001 - ) - expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL, pval = F)$conf.lower[1,1,]), + c(RMS(exp3, obs3, dat_dim = NULL)$conf.lower[1,1,]), c(1.1024490, 0.5533838, 1.4531443, 0.3606632), tolerance = 0.0001 ) expect_equal( - c(RMS(exp3, obs3, dat_dim = NULL, pval = F)$conf.upper[1,1,]), + c(RMS(exp3, obs3, dat_dim = NULL)$conf.upper[1,1,]), c(5.287554, 2.654133, 6.969554, 1.729809), tolerance = 0.0001 ) -- GitLab