From 7d28ee1490cad8326ce91b5e24a8f3ea58b9d525 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 4 Mar 2022 16:44:05 +0100 Subject: [PATCH 01/13] first version adapted from SpecsVerification --- R/DiffCorr.R | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 R/DiffCorr.R diff --git a/R/DiffCorr.R b/R/DiffCorr.R new file mode 100644 index 0000000..eea3184 --- /dev/null +++ b/R/DiffCorr.R @@ -0,0 +1,101 @@ + +library(s2dv) +library(multiApply) + +DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = "na.fail", time_dim = 'year', member_dim = NULL, ncores = NULL){ + + ## Checkings + if (!is.array(exp)){stop('"exp" must be an array.')} + if (!is.array(obs)){stop('"obs" must be an array.')} + if (!is.array(ref)){stop('"ref" must be an array.')} + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (!is.null(conf.lev)){ + if (!is.numeric(conf.lev) | conf.lev <= 0 | conf.lev >= 1){stop('"conf.lev" must be either NULL or a number between 0 and 1.')} + } + if (!is.character(time_dim)){stop('"time_dim" must be a string.')} + if (!is.null(member_dim)){ + if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} + } + + ## Ensemble means + if (!is.null(member_dim)){ + exp <- s2dv::MeanDims(data = exp, dims = member_dim, na.rm = FALSE, drop = 'selected') + ref <- s2dv::MeanDims(data = ref, dims = member_dim, na.rm = FALSE, drop = 'selected') + } + + ## Correlation difference + output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), + target_dims = time_dim, fun = .DiffCorr, + conf.lev = conf.lev, handle.na = handle.na, + time_dim = time_dim, ncores = ncores) + return(output) +} + +.DiffCorr <- function(exp, ref, obs, conf.lev, handle.na, time_dim) { + + ## handle NA's + if (handle.na == "na.fail") { + if (any(is.na(c(exp, ref, obs)))) { + stop("There are missing values.") + } + } else if (handle.na == "only.complete.triplets") { + nna <- !is.na(fcst) & !is.na(fcst.ref) & !is.na(obs) + if (all(nna == FALSE)) { + stop("There are no complete sets of forecasts and observations") + } + exp <- exp[nna] + ref <- ref[nna] + obs <- obs[nna] + } else { + stop("unknown 'handle.na' argument") + } + + ## Number of effective degrees of freedom + N <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = handle.na, ncores = 1) + + ## Correlation coefficients and their confidence intervals + cor.exp <- cor(exp, obs) + cor.ref <- cor(ref, obs) + + ## Correlation difference + output <- NULL + output$corr.diff <- cor.exp - cor.ref + + ## auxiliary quantities + r12 <- cor.exp + r13 <- cor.ref + r23 <- cor(exp, ref) + + ## confidence interval, according to Zou (2007) + if (!is.null(conf.lev)){ + + ## individual confidence limits of cor.exp and cor.ref (not used) + # z.exp <- 0.5 * log((1 + cor.exp) / (1 - cor.exp)) + # z.ref <- 0.5 * log((1 + cor.ref) / (1 - cor.ref)) + + alpha <- (1. - conf.lev) / 2. + l1 <- tanh(atanh(r12) + qnorm(alpha)/sqrt(N-3)) + u1 <- tanh(atanh(r12) + qnorm(1-alpha)/sqrt(N-3)) + l2 <- tanh(atanh(r13) + qnorm(alpha)/sqrt(N-3)) + u2 <- tanh(atanh(r13) + qnorm(1-alpha)/sqrt(N-3)) + + ## Correlation between the two corr.coef r12 and r13 + c.12.13 <- ((r23 - 0.5 * r12 * r13) * (1 - r12*r12 - r13*r13 - r23*r23) + r23*r23*r23) / ((1 - r12*r12) * (1 - r13*r13)) + + ## Confidence interval + output$conf.lower <- r12 - r13 - sqrt((r12-l1)^2 + (u2 - r13)^2 - 2*c.12.13*(r12-l1)*(u2-r13)) + output$conf.upper <- r12 - r13 + sqrt((u1 - r12)^2 + (r13-l2)^2 - 2*c.12.13*(u1-r12)*(r13-l2)) + + } + + ## p-value of one-sided test for equality of dependent correlation coefficients (Steiger, 1980) + R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 + t <- (r12 - r13) * sqrt((N-1)*(1+r23) / (2 * ((N-1)/(N-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) + output$p.value <- 1 - pt(t, df=N-3) + + return(output) +} -- GitLab From 1b79f4171bbd132ded21c10269b635420a162c6d Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 10 Mar 2022 17:02:23 +0100 Subject: [PATCH 02/13] return.na --- R/DiffCorr.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index eea3184..3aae37f 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -2,7 +2,7 @@ library(s2dv) library(multiApply) -DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = "na.fail", time_dim = 'year', member_dim = NULL, ncores = NULL){ +DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = 'return.na', time_dim = 'year', member_dim = NULL, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -23,8 +23,8 @@ DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = "na.fail", time ## Ensemble means if (!is.null(member_dim)){ - exp <- s2dv::MeanDims(data = exp, dims = member_dim, na.rm = FALSE, drop = 'selected') - ref <- s2dv::MeanDims(data = ref, dims = member_dim, na.rm = FALSE, drop = 'selected') + exp <- multiApply::Apply(data = exp, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + ref <- multiApply::Apply(data = ref, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 } ## Correlation difference @@ -50,12 +50,12 @@ DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = "na.fail", time exp <- exp[nna] ref <- ref[nna] obs <- obs[nna] - } else { + } else if (handle.na != "return.na") { stop("unknown 'handle.na' argument") } ## Number of effective degrees of freedom - N <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = handle.na, ncores = 1) + N <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) ## Correlation coefficients and their confidence intervals cor.exp <- cor(exp, obs) -- GitLab From 3b4232a3bcf139bee665635fc226561ba71d66de Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 17 Mar 2022 17:55:48 +0100 Subject: [PATCH 03/13] included documentation --- R/DiffCorr.R | 134 ++++++++++++++++++++++++++------------------------- 1 file changed, 68 insertions(+), 66 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 3aae37f..325f16f 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -1,8 +1,47 @@ - -library(s2dv) -library(multiApply) - -DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = 'return.na', time_dim = 'year', member_dim = NULL, ncores = NULL){ +#'Compute the correlation difference and its significance +#' +#'Positive values of the correlation difference indicate that the forecast is more skilful +#'than the reference forecast, while negative values mean that the reference forecast is +#'more skilful. The statistical significance of the correlation differences is computed with +#'a one-sided test for equality of dependent correlation coefficients (Steiger, 1980; +#'https://content.apa.org/doi/10.1037/0033-2909.87.2.245; #'Siegert et al., 2017; +#'https://doi.org/10.1175/MWR-D-16-0037.1) using a effective degrees of freedom to account for +#'the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). +#' +#'@param exp A numerical array with the forecast with, at least, time dimension. +#'@param ref A numerical array with the reference forecast data with, at least, time dimension. +#' The length of the time dimension must be the same as for "exp". +#'@param obs A numerical array with the observations with, at least, time dimension. +#' The length of the time dimension must be the same as for "exp". +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'year'. +#'@param member_dim A character string indicating the name of the member dimension to compute the +#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should +#' be provided directly to the function. The default value is NULL. +#'@param alpha Significance level to be used in the statistical significance test. The default value +#' is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$r}{ +#' A numerical array with the correlation differences with the same dimensions as the input arrays +#' except "time_dim" (and "member_dim" if provided). +#'} +#'\item{$sign}{ +#' A logical array with the statistical significance of the correlation differences with the same +#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). +#'} +#' +#'@examples +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +#' DiffCorr(exp, ref, obs, member_dim = 'member') +#' +#'@import multiApply +#'@export +DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -10,13 +49,11 @@ DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = 'return.na', ti if (!is.array(ref)){stop('"ref" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} - if (!is.null(conf.lev)){ - if (!is.numeric(conf.lev) | conf.lev <= 0 | conf.lev >= 1){stop('"conf.lev" must be either NULL or a number between 0 and 1.')} - } if (!is.character(time_dim)){stop('"time_dim" must be a string.')} if (!is.null(member_dim)){ if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} } + if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')} if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} } @@ -30,72 +67,37 @@ DiffCorr <- function(exp, ref, obs, conf.lev = NULL, handle.na = 'return.na', ti ## Correlation difference output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), target_dims = time_dim, fun = .DiffCorr, - conf.lev = conf.lev, handle.na = handle.na, - time_dim = time_dim, ncores = ncores) + time_dim = time_dim, alpha = alpha, ncores = ncores) return(output) } -.DiffCorr <- function(exp, ref, obs, conf.lev, handle.na, time_dim) { - - ## handle NA's - if (handle.na == "na.fail") { - if (any(is.na(c(exp, ref, obs)))) { - stop("There are missing values.") - } - } else if (handle.na == "only.complete.triplets") { - nna <- !is.na(fcst) & !is.na(fcst.ref) & !is.na(obs) - if (all(nna == FALSE)) { - stop("There are no complete sets of forecasts and observations") - } - exp <- exp[nna] - ref <- ref[nna] - obs <- obs[nna] - } else if (handle.na != "return.na") { - stop("unknown 'handle.na' argument") - } - - ## Number of effective degrees of freedom - N <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) +.DiffCorr <- function(exp, ref, obs, time_dim, alpha) { - ## Correlation coefficients and their confidence intervals - cor.exp <- cor(exp, obs) - cor.ref <- cor(ref, obs) - - ## Correlation difference - output <- NULL - output$corr.diff <- cor.exp - cor.ref - - ## auxiliary quantities - r12 <- cor.exp - r13 <- cor.ref - r23 <- cor(exp, ref) - - ## confidence interval, according to Zou (2007) - if (!is.null(conf.lev)){ - - ## individual confidence limits of cor.exp and cor.ref (not used) - # z.exp <- 0.5 * log((1 + cor.exp) / (1 - cor.exp)) - # z.ref <- 0.5 * log((1 + cor.ref) / (1 - cor.ref)) - - alpha <- (1. - conf.lev) / 2. - l1 <- tanh(atanh(r12) + qnorm(alpha)/sqrt(N-3)) - u1 <- tanh(atanh(r12) + qnorm(1-alpha)/sqrt(N-3)) - l2 <- tanh(atanh(r13) + qnorm(alpha)/sqrt(N-3)) - u2 <- tanh(atanh(r13) + qnorm(1-alpha)/sqrt(N-3)) + if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ - ## Correlation between the two corr.coef r12 and r13 - c.12.13 <- ((r23 - 0.5 * r12 * r13) * (1 - r12*r12 - r13*r13 - r23*r23) + r23*r23*r23) / ((1 - r12*r12) * (1 - r13*r13)) + ## Correlation difference + cor.exp <- cor(exp, obs) + cor.ref <- cor(ref, obs) + output <- NULL + output$corr.diff <- cor.exp - cor.ref - ## Confidence interval - output$conf.lower <- r12 - r13 - sqrt((r12-l1)^2 + (u2 - r13)^2 - 2*c.12.13*(r12-l1)*(u2-r13)) - output$conf.upper <- r12 - r13 + sqrt((u1 - r12)^2 + (r13-l2)^2 - 2*c.12.13*(u1-r12)*(r13-l2)) + ## Significance with one-sided test for equality of dependent correlation coefficients (Steiger, 1980) + r12 <- cor.exp + r13 <- cor.ref + r23 <- cor(exp, ref) + N <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) ## effective degrees of freedom + R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 + t <- (r12 - r13) * sqrt((N-1)*(1+r23) / (2 * ((N-1)/(N-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) + p.value <- 1 - pt(t, df = N-3) + if (p.value <= alpha){ + output$sign <- TRUE + } else { + output$sign <- FALSE + } + } else { + output <- list(corr.diff = NA, sign = NA) } - ## p-value of one-sided test for equality of dependent correlation coefficients (Steiger, 1980) - R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 - t <- (r12 - r13) * sqrt((N-1)*(1+r23) / (2 * ((N-1)/(N-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) - output$p.value <- 1 - pt(t, df=N-3) - return(output) } -- GitLab From a41f8bb1cba9567adc154245c7286563dd06debd Mon Sep 17 00:00:00 2001 From: Carlos Delgado Torres Date: Thu, 17 Mar 2022 18:21:56 +0100 Subject: [PATCH 04/13] . --- R/DiffCorr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 325f16f..15538e0 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -4,7 +4,7 @@ #'than the reference forecast, while negative values mean that the reference forecast is #'more skilful. The statistical significance of the correlation differences is computed with #'a one-sided test for equality of dependent correlation coefficients (Steiger, 1980; -#'https://content.apa.org/doi/10.1037/0033-2909.87.2.245; #'Siegert et al., 2017; +#'https://content.apa.org/doi/10.1037/0033-2909.87.2.245; Siegert et al., 2017; #'https://doi.org/10.1175/MWR-D-16-0037.1) using a effective degrees of freedom to account for #'the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). #' -- GitLab From 69809a06d380fb457ab6ff972390e4c4dedd4575 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 24 Mar 2022 13:56:41 +0100 Subject: [PATCH 05/13] added method parameter to allow computing pearson, kendall and spearman coefficients --- R/DiffCorr.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 15538e0..3ebe6dd 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -18,6 +18,8 @@ #'@param member_dim A character string indicating the name of the member dimension to compute the #' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should #' be provided directly to the function. The default value is NULL. +#'@param method A character string indicating the correlation coefficient to be computed ("pearson", +#' "kendall", or "spearman"). The default value is "pearson". #'@param alpha Significance level to be used in the statistical significance test. The default value #' is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel @@ -41,7 +43,7 @@ #' #'@import multiApply #'@export -DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = NULL){ +DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = 0.05, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -53,6 +55,7 @@ DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha if (!is.null(member_dim)){ if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} } + if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')} if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')} if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} @@ -67,17 +70,18 @@ DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha ## Correlation difference output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), target_dims = time_dim, fun = .DiffCorr, - time_dim = time_dim, alpha = alpha, ncores = ncores) + time_dim = time_dim, method = method, + alpha = alpha, ncores = ncores) return(output) } -.DiffCorr <- function(exp, ref, obs, time_dim, alpha) { +.DiffCorr <- function(exp, ref, obs, time_dim, method, alpha) { if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ ## Correlation difference - cor.exp <- cor(exp, obs) - cor.ref <- cor(ref, obs) + cor.exp <- cor(x = exp, y = obs, method = method) + cor.ref <- cor(x = ref, y = obs, method = method) output <- NULL output$corr.diff <- cor.exp - cor.ref -- GitLab From 147a91b8e254fa0f7fa25e205f6497cbbab4a84f Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 25 Mar 2022 17:03:15 +0100 Subject: [PATCH 06/13] p.val can be return. N.eff can be defined --- R/DiffCorr.R | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 3ebe6dd..1def12d 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -13,6 +13,8 @@ #' The length of the time dimension must be the same as for "exp". #'@param obs A numerical array with the observations with, at least, time dimension. #' The length of the time dimension must be the same as for "exp". +#'@param N.eff Effective sample size to be used in the statistical significance test. +#' If NA, it is computed with the Eno() function. The default value is NA. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'year'. #'@param member_dim A character string indicating the name of the member dimension to compute the @@ -20,6 +22,7 @@ #' be provided directly to the function. The default value is NULL. #'@param method A character string indicating the correlation coefficient to be computed ("pearson", #' "kendall", or "spearman"). The default value is "pearson". +#'@param p.val Logical indicating whether to regurn the p-value. The default value is TRUE. #'@param alpha Significance level to be used in the statistical significance test. The default value #' is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel @@ -34,6 +37,10 @@ #' A logical array with the statistical significance of the correlation differences with the same #' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). #'} +#'\item{$p.val}{ +#' A numeric array with the p-values with the same dimensions as the input arrays except "time_dim" +#' (and "member_dim" if provided). +#'} #' #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) @@ -43,7 +50,7 @@ #' #'@import multiApply #'@export -DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = 0.05, ncores = NULL){ +DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'year', member_dim = NULL, method = 'pearson', p.val = TRUE, alpha = 0.05, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -51,11 +58,11 @@ DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method if (!is.array(ref)){stop('"ref" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (!is.na(N.eff) & !is.numeric(N.eff)){stop('"N.eff" must be NA or numeric.')} if (!is.character(time_dim)){stop('"time_dim" must be a string.')} - if (!is.null(member_dim)){ - if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} - } + if (!is.null(member_dim) & !is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')} + if (!is.logical(p.val)){stop('"p.val" must be TRUE or FALSE.')} if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')} if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} @@ -70,12 +77,12 @@ DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method ## Correlation difference output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), target_dims = time_dim, fun = .DiffCorr, - time_dim = time_dim, method = method, - alpha = alpha, ncores = ncores) + N.eff = N.eff, time_dim = time_dim, method = method, + p.val = p.val, alpha = alpha, ncores = ncores) return(output) } -.DiffCorr <- function(exp, ref, obs, time_dim, method, alpha) { +.DiffCorr <- function(exp, ref, obs, N.eff, time_dim, method, p.val, alpha) { if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ @@ -89,18 +96,22 @@ DiffCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method r12 <- cor.exp r13 <- cor.ref r23 <- cor(exp, ref) - N <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) ## effective degrees of freedom + if (is.na(N.eff)){ + N.eff <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) ## effective degrees of freedom + } R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 - t <- (r12 - r13) * sqrt((N-1)*(1+r23) / (2 * ((N-1)/(N-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) - p.value <- 1 - pt(t, df = N-3) + t <- (r12 - r13) * sqrt((N.eff-1)*(1+r23) / (2 * ((N.eff-1)/(N.eff-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) + p.value <- 1 - pt(t, df = N.eff-3) if (p.value <= alpha){ output$sign <- TRUE } else { output$sign <- FALSE } + if (isTRUE(p.val)){output$p.val <- p.value} } else { output <- list(corr.diff = NA, sign = NA) + if (isTRUE(p.val)){output$p.val <- NA} } return(output) -- GitLab From dc9d0784af3157ef03c6452ab9fc45e3df4195b6 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 25 Mar 2022 17:14:10 +0100 Subject: [PATCH 07/13] sign=FALSE is return if p.val=NA (e.g., if there is variance is zero) --- R/DiffCorr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 1def12d..c35ee91 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -102,7 +102,7 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'year', member_dim = R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 t <- (r12 - r13) * sqrt((N.eff-1)*(1+r23) / (2 * ((N.eff-1)/(N.eff-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) p.value <- 1 - pt(t, df = N.eff-3) - if (p.value <= alpha){ + if (!is.na(p.value) & p.value <= alpha){ output$sign <- TRUE } else { output$sign <- FALSE -- GitLab From 36a67e5e8f6b9638cd062fd2687ed640b9c971f4 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Mon, 28 Mar 2022 17:59:51 +0200 Subject: [PATCH 08/13] several modification taking into account Eleftheria feedback --- R/DiffCorr.R | 73 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index c35ee91..4286082 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -13,44 +13,46 @@ #' The length of the time dimension must be the same as for "exp". #'@param obs A numerical array with the observations with, at least, time dimension. #' The length of the time dimension must be the same as for "exp". -#'@param N.eff Effective sample size to be used in the statistical significance test. -#' If NA, it is computed with the Eno() function. The default value is NA. +#'@param N.eff Effective sample size to be used in the statistical significance test. It can be NA +#' (in this case, it is computed with the s2dv::Eno() function), a numeric (which is used for all +#' cases), or an array with the same dimensions as "obs" except "time_dim" (for a particular N.eff +#' to be used for each case). The default value is NA. #'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'year'. +#' The default value is 'sdate'. #'@param member_dim A character string indicating the name of the member dimension to compute the #' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should #' be provided directly to the function. The default value is NULL. #'@param method A character string indicating the correlation coefficient to be computed ("pearson", #' "kendall", or "spearman"). The default value is "pearson". -#'@param p.val Logical indicating whether to regurn the p-value. The default value is TRUE. -#'@param alpha Significance level to be used in the statistical significance test. The default value -#' is 0.05. +#'@param alpha Significance level to be used in the statistical significance test. If NULL, the p-value +#' is returned instead. The default value is NULL. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' -#'@return A list of 2: +#'@return A list with: #'\item{$r}{ #' A numerical array with the correlation differences with the same dimensions as the input arrays #' except "time_dim" (and "member_dim" if provided). #'} #'\item{$sign}{ #' A logical array with the statistical significance of the correlation differences with the same -#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). +#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). Returned only +#' if "alpha" is a numeric. #'} #'\item{$p.val}{ #' A numeric array with the p-values with the same dimensions as the input arrays except "time_dim" -#' (and "member_dim" if provided). +#' (and "member_dim" if provided). Returned only if "alpha" is NULL. #'} #' #'@examples -#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) #' DiffCorr(exp, ref, obs, member_dim = 'member') #' #'@import multiApply #'@export -DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'year', member_dim = NULL, method = 'pearson', p.val = TRUE, alpha = 0.05, ncores = NULL){ +DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', member_dim = NULL, method = 'pearson', alpha = NULL, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -58,12 +60,19 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'year', member_dim = if (!is.array(ref)){stop('"ref" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} - if (!is.na(N.eff) & !is.numeric(N.eff)){stop('"N.eff" must be NA or numeric.')} + if (is.array(N.eff)){ + if (!all(names(dim(N.eff)) %in% names(dim(obs)))){ + stop('If "N.eff" is provided with an array, it must have the same dimensions as "obs" except "time_dim".') + } + } else if (!is.na(N.eff) & !is.numeric(N.eff)){ + stop('"N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".') + } if (!is.character(time_dim)){stop('"time_dim" must be a string.')} if (!is.null(member_dim) & !is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')} - if (!is.logical(p.val)){stop('"p.val" must be TRUE or FALSE.')} - if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')} + if (!is.null(alpha)){ + if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be NULL or a number between 0 and 1.')} + } if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} } @@ -75,14 +84,16 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'year', member_dim = } ## Correlation difference - output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), - target_dims = time_dim, fun = .DiffCorr, - N.eff = N.eff, time_dim = time_dim, method = method, - p.val = p.val, alpha = alpha, ncores = ncores) + output <- multiApply::Apply(data = list(exp = exp, ref = ref, + obs = obs, N.eff = N.eff), + target_dims = list(exp = time_dim, ref = time_dim, + obs = time_dim, N.eff = NULL), + fun = .DiffCorr, time_dim = time_dim, + method = method, alpha = alpha, ncores = ncores) return(output) } -.DiffCorr <- function(exp, ref, obs, N.eff, time_dim, method, p.val, alpha) { +.DiffCorr <- function(exp, ref, obs, N.eff, time_dim, method, alpha) { if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ @@ -102,16 +113,24 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'year', member_dim = R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 t <- (r12 - r13) * sqrt((N.eff-1)*(1+r23) / (2 * ((N.eff-1)/(N.eff-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) p.value <- 1 - pt(t, df = N.eff-3) - if (!is.na(p.value) & p.value <= alpha){ - output$sign <- TRUE + if (is.null(alpha)){ + output$p.val <- p.value } else { - output$sign <- FALSE + if (!is.na(p.value) & p.value <= alpha){ + output$sign <- TRUE + } else { + output$sign <- FALSE + } } - if (isTRUE(p.val)){output$p.val <- p.value} } else { - output <- list(corr.diff = NA, sign = NA) - if (isTRUE(p.val)){output$p.val <- NA} + + if (is.null(alpha)){ + output <- list(corr.diff = NA, p.val = NA) + } else { + output <- list(corr.diff = NA, sign = NA) + } + } return(output) -- GitLab From db13be8e56430f30be24289a2898ca921de4cfe9 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Tue, 29 Mar 2022 17:15:38 +0200 Subject: [PATCH 09/13] added handle.na parameter --- R/DiffCorr.R | 95 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 4286082..5b067a5 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -26,6 +26,10 @@ #' "kendall", or "spearman"). The default value is "pearson". #'@param alpha Significance level to be used in the statistical significance test. If NULL, the p-value #' is returned instead. The default value is NULL. +#'@handle.na How to handle missing values. If "return.na", NA will be returned for the cases that contain +#' at least one NA in "exp", "ref" or "obs". If "only.complete.triplets", only the time steps with no +#' missing values in all "exp", "ref" and "obs" will be used. If "na.fail", an error will arise if any of +#' "exp", "ref" or "obs" contains any NA. The default value is "return.na". #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -52,7 +56,7 @@ #' #'@import multiApply #'@export -DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', member_dim = NULL, method = 'pearson', alpha = NULL, ncores = NULL){ +DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', member_dim = NULL, method = 'pearson', alpha = NULL, handle.na = 'return.na', ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -73,6 +77,7 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', member_dim = if (!is.null(alpha)){ if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be NULL or a number between 0 and 1.')} } + if (!handle.na %in% c('return.na','only.complete.triplets','na.fail')){stop('"handle.na" must be "return.na", "only.complete.triplets" or "na.fail".')} if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} } @@ -88,49 +93,73 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', member_dim = obs = obs, N.eff = N.eff), target_dims = list(exp = time_dim, ref = time_dim, obs = time_dim, N.eff = NULL), - fun = .DiffCorr, time_dim = time_dim, - method = method, alpha = alpha, ncores = ncores) + fun = .DiffCorr, time_dim = time_dim, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) return(output) } -.DiffCorr <- function(exp, ref, obs, N.eff, time_dim, method, alpha) { +.DiffCorr <- function(exp, ref, obs, N.eff, time_dim, method, alpha, handle.na) { - if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ + if (anyNA(exp) | anyNA(ref) | anyNA(obs)){ ## There are NAs - ## Correlation difference - cor.exp <- cor(x = exp, y = obs, method = method) - cor.ref <- cor(x = ref, y = obs, method = method) - output <- NULL - output$corr.diff <- cor.exp - cor.ref - - ## Significance with one-sided test for equality of dependent correlation coefficients (Steiger, 1980) - r12 <- cor.exp - r13 <- cor.ref - r23 <- cor(exp, ref) - if (is.na(N.eff)){ - N.eff <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) ## effective degrees of freedom - } - R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 - t <- (r12 - r13) * sqrt((N.eff-1)*(1+r23) / (2 * ((N.eff-1)/(N.eff-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) - p.value <- 1 - pt(t, df = N.eff-3) - if (is.null(alpha)){ - output$p.val <- p.value - } else { - if (!is.na(p.value) & p.value <= alpha){ - output$sign <- TRUE + if (identical(handle.na,'na.fail')){ + + stop('The data contain NAs.') + + } else if (identical(handle.na,'only.complete.triplets')){ + + nna <- !is.na(exp) & !is.na(ref) & !is.na(obs) + if (all(nna == FALSE)){stop("there are no complete sets of forecasts and observations")} + exp <- exp[nna] + ref <- ref[nna] + obs <- obs[nna] + output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, N.eff = N.eff, time_dim = time_dim, alpha = alpha) + + } else if (identical(handle.na,'return.na')){ + + if (is.null(alpha)){ + output <- list(corr.diff = NA, p.val = NA) } else { - output$sign <- FALSE + output <- list(corr.diff = NA, sign = NA) } - } + + } else {stop('Incorrect "handle.na"')} - } else { + } else { ## There are no NAs - if (is.null(alpha)){ - output <- list(corr.diff = NA, p.val = NA) + output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, N.eff = N.eff, time_dim = time_dim, alpha = alpha) + + } + + return(output) +} + +.diff.corr <- function(exp, ref, obs, method, N.eff, time_dim, alpha){ + + ## Correlation difference + cor.exp <- cor(x = exp, y = obs, method = method) + cor.ref <- cor(x = ref, y = obs, method = method) + output <- NULL + output$corr.diff <- cor.exp - cor.ref + + ## Significance with one-sided test for equality of dependent correlation coefficients (Steiger, 1980) + r12 <- cor.exp + r13 <- cor.ref + r23 <- cor(exp, ref) + if (is.na(N.eff)){ + N.eff <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) ## effective degrees of freedom + } + R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 + t <- (r12 - r13) * sqrt((N.eff-1)*(1+r23) / (2 * ((N.eff-1)/(N.eff-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) + p.value <- 1 - pt(t, df = N.eff-3) + if (is.null(alpha)){ + output$p.val <- p.value + } else { + if (!is.na(p.value) & p.value <= alpha){ + output$sign <- TRUE } else { - output <- list(corr.diff = NA, sign = NA) + output$sign <- FALSE } - } return(output) -- GitLab From 43665468fc0a4a93ffc60161f5a52ec7b2b273d7 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 6 Apr 2022 16:13:27 +0200 Subject: [PATCH 10/13] Combine .diff.corr() with .DiffCorr() and refine the code format --- NAMESPACE | 1 + R/DiffCorr.R | 334 ++++++++++++++++++++++++++++++------------------ man/DiffCorr.Rd | 98 ++++++++++++++ 3 files changed, 310 insertions(+), 123 deletions(-) create mode 100644 man/DiffCorr.Rd diff --git a/NAMESPACE b/NAMESPACE index ca5d500..8e17668 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(ConfigShowSimilarEntries) export(ConfigShowTable) export(Consist_Trend) export(Corr) +export(DiffCorr) export(EOF) export(Eno) export(EuroAtlanticTC) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 5b067a5..74421eb 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -1,166 +1,254 @@ #'Compute the correlation difference and its significance #' -#'Positive values of the correlation difference indicate that the forecast is more skilful -#'than the reference forecast, while negative values mean that the reference forecast is -#'more skilful. The statistical significance of the correlation differences is computed with -#'a one-sided test for equality of dependent correlation coefficients (Steiger, 1980; +#'Compute the correlation difference between two deterministic forecasts. +#'Positive values of the correlation difference indicate that the forecast is +#'more skillful than the reference forecast, while negative values mean that +#'the reference forecast is more skillful. The statistical significance of the +#'correlation differences is computed with a one-sided test for equality of +#'dependent correlation coefficients (Steiger, 1980; #'https://content.apa.org/doi/10.1037/0033-2909.87.2.245; Siegert et al., 2017; -#'https://doi.org/10.1175/MWR-D-16-0037.1) using a effective degrees of freedom to account for -#'the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). +#'https://doi.org/10.1175/MWR-D-16-0037.1) using effective degrees of freedom +#'to account for the autocorrelation of the time series (von Storch and Zwiers, +#'1999; https://doi.org/10.1017/CBO9780511612336). #' -#'@param exp A numerical array with the forecast with, at least, time dimension. -#'@param ref A numerical array with the reference forecast data with, at least, time dimension. -#' The length of the time dimension must be the same as for "exp". -#'@param obs A numerical array with the observations with, at least, time dimension. -#' The length of the time dimension must be the same as for "exp". -#'@param N.eff Effective sample size to be used in the statistical significance test. It can be NA -#' (in this case, it is computed with the s2dv::Eno() function), a numeric (which is used for all -#' cases), or an array with the same dimensions as "obs" except "time_dim" (for a particular N.eff -#' to be used for each case). The default value is NA. +#'@param exp A named numerical array of the forecast data with at least time +#' dimension. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as "exp" except +#' 'memb_dim'. +#'@param obs A named numerical array with the observations with at least time +#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "obs" except "time_dim" (for a particular N.eff to be used for each case) +#' . The default value is NA. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. -#'@param member_dim A character string indicating the name of the member dimension to compute the -#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should -#' be provided directly to the function. The default value is NULL. -#'@param method A character string indicating the correlation coefficient to be computed ("pearson", -#' "kendall", or "spearman"). The default value is "pearson". -#'@param alpha Significance level to be used in the statistical significance test. If NULL, the p-value -#' is returned instead. The default value is NULL. -#'@handle.na How to handle missing values. If "return.na", NA will be returned for the cases that contain -#' at least one NA in "exp", "ref" or "obs". If "only.complete.triplets", only the time steps with no -#' missing values in all "exp", "ref" and "obs" will be used. If "na.fail", an error will arise if any of -#' "exp", "ref" or "obs" contains any NA. The default value is "return.na". +#'@param memb_dim A character string indicating the name of the member +#' dimension to compute the ensemble mean of the forecast and reference +#' forecast. If it is NULL (default), the ensemble mean should be provided +#' directly to the function. +#'@param method A character string indicating the correlation coefficient to be +#' computed ("pearson", "kendall", or "spearman"). The default value is +#' "pearson". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. If it is a numeric, "sign" will be returned. If NULL, the +#' p-value will be returned instead. The default value is NULL. +#'@param handle.na A charcater string indicating how to handle missing values. +#' If "return.na", NAs will be returned for the cases that contain at least one +#' NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +#' steps with no missing values in all "exp", "ref", and "obs" will be used. If +#' "na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +#' NA. The default value is "return.na". #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return A list with: -#'\item{$r}{ -#' A numerical array with the correlation differences with the same dimensions as the input arrays -#' except "time_dim" (and "member_dim" if provided). +#'\item{$corr.diff}{ +#' A numerical array of the correlation differences with the same dimensions as +#' the input arrays except "time_dim" (and "memb_dim" if provided). #'} #'\item{$sign}{ -#' A logical array with the statistical significance of the correlation differences with the same -#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). Returned only -#' if "alpha" is a numeric. +#' A logical array of the statistical significance of the correlation +#' differences with the same dimensions as the input arrays except "time_dim" +#' (and "memb_dim" if provided). Returned only if "alpha" is a numeric. #'} #'\item{$p.val}{ -#' A numeric array with the p-values with the same dimensions as the input arrays except "time_dim" -#' (and "member_dim" if provided). Returned only if "alpha" is NULL. +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is +#' NULL. #'} #' #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#' DiffCorr(exp, ref, obs, member_dim = 'member') +#' res <- DiffCorr(exp, ref, obs, memb_dim = 'member') #' #'@import multiApply #'@export -DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', member_dim = NULL, method = 'pearson', alpha = NULL, handle.na = 'return.na', ncores = NULL){ +DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', + memb_dim = NULL, method = 'pearson', alpha = NULL, + handle.na = 'return.na', ncores = NULL) { - ## Checkings - if (!is.array(exp)){stop('"exp" must be an array.')} - if (!is.array(obs)){stop('"obs" must be an array.')} - if (!is.array(ref)){stop('"ref" must be an array.')} - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} - if (is.array(N.eff)){ - if (!all(names(dim(N.eff)) %in% names(dim(obs)))){ - stop('If "N.eff" is provided with an array, it must have the same dimensions as "obs" except "time_dim".') + # Check inputs + ## exp, ref, and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs)))) { + stop(paste0('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".')) + } else if (any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop("Parameter 'N.eff' must have the same dimensions as 'obs' except 'time_dim'.") } - } else if (!is.na(N.eff) & !is.numeric(N.eff)){ - stop('"N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".') + } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".')) } - if (!is.character(time_dim)){stop('"time_dim" must be a string.')} - if (!is.null(member_dim) & !is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} - if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')} - if (!is.null(alpha)){ - if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be NULL or a number between 0 and 1.')} + + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(ref)) | + !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension.") } - if (!handle.na %in% c('return.na','only.complete.triplets','na.fail')){stop('"handle.na" must be "return.na", "only.complete.triplets" or "na.fail".')} - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension.") + } } - - ## Ensemble means - if (!is.null(member_dim)){ - exp <- multiApply::Apply(data = exp, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 - ref <- multiApply::Apply(data = ref, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + ## exp, ref, and obs (2) + name_exp <- sort(names(dim(exp))) + name_ref <- sort(names(dim(ref))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!all.equal(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + ## method + if (!method %in% c("pearson", "kendall", "spearman")) { + stop('Parameter "method" must be "pearson", "kendall", or "spearman".') + } + ## alpha + if (!is.null(alpha)) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | + length(alpha) > 1)) { + stop('Parameter "alpha" must be NULL or a number between 0 and 1.') + } + } + ## handle.na + if (!handle.na %in% c('return.na', 'only.complete.triplets', 'na.fail')) { + stop('Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".') + } + if (!is.null(ncores)) { + if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1)) { + stop('Parameter "ncores" must be either NULL or a positive integer.') + } } - ## Correlation difference - output <- multiApply::Apply(data = list(exp = exp, ref = ref, - obs = obs, N.eff = N.eff), - target_dims = list(exp = time_dim, ref = time_dim, - obs = time_dim, N.eff = NULL), - fun = .DiffCorr, time_dim = time_dim, method = method, - alpha = alpha, handle.na = handle.na, ncores = ncores) + ############################### + + # NA check: na.fail + if (handle.na == "na.fail" & (anyNA(exp) | anyNA(ref) | anyNA(obs))) + stop('The data contain NAs.') + + # Calculate ensemble means + dim_exp <- dim(exp) + dim_ref <- dim(ref) + dim_obs <- dim(obs) + + if (!is.null(memb_dim)) { + exp_memb_dim_ind <- which(names(dim_exp) == memb_dim) + ref_memb_dim_ind <- which(names(dim_ref) == memb_dim) + exp <- apply(exp, c(1:length(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) + ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) + if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) + if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) + +# exp <- Apply(data = exp, target_dims = memb_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 +# ref <- Apply(data = ref, target_dims = memb_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + } + + # output_dims + if (is.null(alpha)) { + output_dims <- list(corr.diff = NULL, p.val = NULL) + } else { + output_dims <- list(corr.diff = NULL, sign = NULL) + } + # Correlation difference + if (is.array(N.eff)) { + output <- Apply(data = list(exp = exp, ref = ref, + obs = obs, N.eff = N.eff), + target_dims = list(exp = time_dim, ref = time_dim, + obs = time_dim, N.eff = NULL), + output_dims = output_dims, + fun = .DiffCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } else { + output <- Apply(data = list(exp = exp, ref = ref, + obs = obs), + target_dims = list(exp = time_dim, ref = time_dim, + obs = time_dim), + output_dims = output_dims, N.eff = N.eff, + fun = .DiffCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } + return(output) } -.DiffCorr <- function(exp, ref, obs, N.eff, time_dim, method, alpha, handle.na) { +.DiffCorr <- function(exp, ref, obs, N.eff, method, alpha, handle.na) { + + .diff.corr <- function(exp, ref, obs, method, N.eff, alpha) { + + # Correlation difference + cor.exp <- cor(x = exp, y = obs, method = method) + cor.ref <- cor(x = ref, y = obs, method = method) + output <- NULL + output$corr.diff <- cor.exp - cor.ref + + # Significance with one-sided test for equality of dependent correlation coefficients (Steiger, 1980) + r12 <- cor.exp + r13 <- cor.ref + r23 <- cor(exp, ref) + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } + R <- (1 - r12 * r12 - r13 * r13 - r23 * r23) + 2 * r12 * r13 * r23 + t <- (r12 - r13) * sqrt((N.eff - 1) * (1 + r23) / (2 * ((N.eff - 1) / (N.eff - 3)) * R + 0.25 * (r12 + r13)^2 * (1 - r23)^3)) + p.value <- 1 - pt(t, df = N.eff - 3) + if (is.null(alpha)) { + output$p.val <- p.value + } else { + output$sign <- ifelse(!is.na(p.value) & p.value <= alpha, TRUE, FALSE) + } + return(output) + } + + #================================================== - if (anyNA(exp) | anyNA(ref) | anyNA(obs)){ ## There are NAs - - if (identical(handle.na,'na.fail')){ - - stop('The data contain NAs.') - - } else if (identical(handle.na,'only.complete.triplets')){ - - nna <- !is.na(exp) & !is.na(ref) & !is.na(obs) - if (all(nna == FALSE)){stop("there are no complete sets of forecasts and observations")} - exp <- exp[nna] - ref <- ref[nna] - obs <- obs[nna] - output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, N.eff = N.eff, time_dim = time_dim, alpha = alpha) - - } else if (identical(handle.na,'return.na')){ + if (anyNA(exp) | anyNA(ref) | anyNA(obs)) { ## There are NAs + if (handle.na == 'only.complete.triplets') { + nna <- is.na(exp) | is.na(ref) | is.na(obs) # A vector of T/F + if (all(nna)) stop("There is no complete set of forecasts and observations.") + # Remove the incomplete set + exp <- exp[!nna] + ref <- ref[!nna] + obs <- obs[!nna] + + output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, + N.eff = N.eff, alpha = alpha) - if (is.null(alpha)){ + } else if (handle.na == 'return.na') { + # Data contain NA, return NAs directly without passing to .diff.corr + if (is.null(alpha)) { output <- list(corr.diff = NA, p.val = NA) } else { output <- list(corr.diff = NA, sign = NA) } - - } else {stop('Incorrect "handle.na"')} - - } else { ## There are no NAs - - output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, N.eff = N.eff, time_dim = time_dim, alpha = alpha) - - } - - return(output) -} - -.diff.corr <- function(exp, ref, obs, method, N.eff, time_dim, alpha){ - - ## Correlation difference - cor.exp <- cor(x = exp, y = obs, method = method) - cor.ref <- cor(x = ref, y = obs, method = method) - output <- NULL - output$corr.diff <- cor.exp - cor.ref - - ## Significance with one-sided test for equality of dependent correlation coefficients (Steiger, 1980) - r12 <- cor.exp - r13 <- cor.ref - r23 <- cor(exp, ref) - if (is.na(N.eff)){ - N.eff <- s2dv::Eno(data = obs, time_dim = time_dim, na.action = na.pass, ncores = 1) ## effective degrees of freedom - } - R <- (1-r12*r12-r13*r13-r23*r23) + 2*r12*r13*r23 - t <- (r12 - r13) * sqrt((N.eff-1)*(1+r23) / (2 * ((N.eff-1)/(N.eff-3)) * R + 0.25*(r12+r13)^2 * (1-r23)^3)) - p.value <- 1 - pt(t, df = N.eff-3) - if (is.null(alpha)){ - output$p.val <- p.value - } else { - if (!is.na(p.value) & p.value <= alpha){ - output$sign <- TRUE - } else { - output$sign <- FALSE } + + } else { ## There is no NA + output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, + N.eff = N.eff, alpha = alpha) } - return(output) } diff --git a/man/DiffCorr.Rd b/man/DiffCorr.Rd new file mode 100644 index 0000000..5671908 --- /dev/null +++ b/man/DiffCorr.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DiffCorr.R +\name{DiffCorr} +\alias{DiffCorr} +\title{Compute the correlation difference and its significance} +\usage{ +DiffCorr( + exp, + ref, + obs, + N.eff = NA, + time_dim = "sdate", + memb_dim = NULL, + method = "pearson", + alpha = NULL, + handle.na = "return.na", + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast data with at least time +dimension.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as "exp" except +'memb_dim'.} + +\item{obs}{A named numerical array with the observations with at least time +dimension. The dimensions must be the same as "exp" except 'memb_dim'.} + +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be NA (and it will be computed with the s2dv:::.Eno), a +numeric (which is used for all cases), or an array with the same dimensions +as "obs" except "time_dim" (for a particular N.eff to be used for each case) +. The default value is NA.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension to compute the ensemble mean of the forecast and reference +forecast. If it is NULL (default), the ensemble mean should be provided +directly to the function.} + +\item{method}{A character string indicating the correlation coefficient to be +computed ("pearson", "kendall", or "spearman"). The default value is +"pearson".} + +\item{alpha}{A numeric of the significance level to be used in the statistical +significance test. If it is a numeric, "sign" will be returned. If NULL, the +p-value will be returned instead. The default value is NULL.} + +\item{handle.na}{A charcater string indicating how to handle missing values. +If "return.na", NAs will be returned for the cases that contain at least one +NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +steps with no missing values in all "exp", "ref", and "obs" will be used. If +"na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +NA. The default value is "return.na".} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list with: +\item{$corr.diff}{ + A numerical array of the correlation differences with the same dimensions as + the input arrays except "time_dim" (and "memb_dim" if provided). +} +\item{$sign}{ + A logical array of the statistical significance of the correlation + differences with the same dimensions as the input arrays except "time_dim" + (and "memb_dim" if provided). Returned only if "alpha" is a numeric. +} +\item{$p.val}{ + A numeric array of the p-values with the same dimensions as the input arrays + except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is + NULL. +} +} +\description{ +Compute the correlation difference between two deterministic forecasts. +Positive values of the correlation difference indicate that the forecast is +more skillful than the reference forecast, while negative values mean that +the reference forecast is more skillful. The statistical significance of the +correlation differences is computed with a one-sided test for equality of +dependent correlation coefficients (Steiger, 1980; +https://content.apa.org/doi/10.1037/0033-2909.87.2.245; Siegert et al., 2017; +https://doi.org/10.1175/MWR-D-16-0037.1) using effective degrees of freedom +to account for the autocorrelation of the time series (von Storch and Zwiers, +1999; https://doi.org/10.1017/CBO9780511612336). +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +res <- DiffCorr(exp, ref, obs, memb_dim = 'member') + +} -- GitLab From b0bebd4af6e3372bfcb1a38545e619a8a9e6f714 Mon Sep 17 00:00:00 2001 From: aho Date: Thu, 28 Apr 2022 16:03:39 +0200 Subject: [PATCH 11/13] Change output name from 'corr.diff' to 'diff.corr'; add unit test; improve a few conditional statement --- R/DiffCorr.R | 21 ++-- man/DiffCorr.Rd | 2 +- man/ResidualCorr.Rd | 6 +- tests/testthat/test-DiffCorr.R | 217 +++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+), 15 deletions(-) create mode 100644 tests/testthat/test-DiffCorr.R diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 74421eb..511dd3a 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -45,7 +45,7 @@ #' computation. The default value is NULL. #' #'@return A list with: -#'\item{$corr.diff}{ +#'\item{$diff.corr}{ #' A numerical array of the correlation differences with the same dimensions as #' the input arrays except "time_dim" (and "memb_dim" if provided). #'} @@ -83,17 +83,15 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', ## N.eff if (is.array(N.eff)) { if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") - if (!all(names(dim(N.eff)) %in% names(dim(obs)))) { + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { stop(paste0('If parameter "N.eff" is provided with an array, it must ', 'have the same dimensions as "obs" except "time_dim".')) - } else if (any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { - stop("Parameter 'N.eff' must have the same dimensions as 'obs' except 'time_dim'.") } } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ', 'the same dimensions as "obs" except "time_dim".')) } - ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) stop('Parameter "time_dim" must be a character string.') @@ -118,7 +116,8 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', name_exp <- name_exp[-which(name_exp == memb_dim)] name_ref <- name_ref[-which(name_ref == memb_dim)] } - if (!all.equal(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { + if (!identical(length(name_exp), length(name_ref), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ", "all dimensions expect 'memb_dim'.")) } @@ -169,9 +168,9 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', # output_dims if (is.null(alpha)) { - output_dims <- list(corr.diff = NULL, p.val = NULL) + output_dims <- list(diff.corr = NULL, p.val = NULL) } else { - output_dims <- list(corr.diff = NULL, sign = NULL) + output_dims <- list(diff.corr = NULL, sign = NULL) } # Correlation difference if (is.array(N.eff)) { @@ -203,7 +202,7 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', cor.exp <- cor(x = exp, y = obs, method = method) cor.ref <- cor(x = ref, y = obs, method = method) output <- NULL - output$corr.diff <- cor.exp - cor.ref + output$diff.corr <- cor.exp - cor.ref # Significance with one-sided test for equality of dependent correlation coefficients (Steiger, 1980) r12 <- cor.exp @@ -240,9 +239,9 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', } else if (handle.na == 'return.na') { # Data contain NA, return NAs directly without passing to .diff.corr if (is.null(alpha)) { - output <- list(corr.diff = NA, p.val = NA) + output <- list(diff.corr = NA, p.val = NA) } else { - output <- list(corr.diff = NA, sign = NA) + output <- list(diff.corr = NA, sign = NA) } } diff --git a/man/DiffCorr.Rd b/man/DiffCorr.Rd index 5671908..7f56289 100644 --- a/man/DiffCorr.Rd +++ b/man/DiffCorr.Rd @@ -62,7 +62,7 @@ computation. The default value is NULL.} } \value{ A list with: -\item{$corr.diff}{ +\item{$diff.corr}{ A numerical array of the correlation differences with the same dimensions as the input arrays except "time_dim" (and "memb_dim" if provided). } diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd index 1e0adad..e98ea7a 100644 --- a/man/ResidualCorr.Rd +++ b/man/ResidualCorr.Rd @@ -67,9 +67,9 @@ A list with: the input arrays except "time_dim" (and "memb_dim" if provided). } \item{$sign}{ - A logical array of the statistical significance of the residual correlation - with the same dimensions as the input arrays except "time_dim" (and - "memb_dim" if provided). Returned only if "alpha" is a numeric. + A logical array indicating whether the residual correlation is statistically + significant or not with the same dimensions as the input arrays except "time_dim" + (and "memb_dim" if provided). Returned only if "alpha" is a numeric. } \item{$p.val}{ A numeric array of the p-values with the same dimensions as the input arrays diff --git a/tests/testthat/test-DiffCorr.R b/tests/testthat/test-DiffCorr.R new file mode 100644 index 0000000..2d1fbaf --- /dev/null +++ b/tests/testthat/test-DiffCorr.R @@ -0,0 +1,217 @@ +context("s2dv::DiffCorr tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(180), dim = c(lat = 3, lon = 2, memb = 3, sdate = 10)) +set.seed(2) +ref1 <- array(rnorm(120), dim = c(lat = 3, lon = 2, memb = 2, sdate = 10)) +set.seed(3) +obs1 <- array(rnorm(60), dim = c(lat = 3, lon = 2, sdate = 10)) +Neff1 <- array(rep(9:10, 3), dim = c(lat = 3, lon = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(2) +ref2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + DiffCorr(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + DiffCorr(exp1, c('a')), + 'Parameter "ref" must be a numeric array.' + ) + expect_error( + DiffCorr(exp1, ref1, c()), + 'Parameter "obs" must be a numeric array.' + ) + # N.eff + expect_error( + DiffCorr(exp1, ref1, obs1, N.eff = array(1, dim = c(lat = 2, lon = 2))), + "If parameter \"N.eff\" is provided with an array, it must have the same dimensions as \"obs\" except \"time_dim\"." + ) + expect_error( + DiffCorr(exp1, ref1, obs1, N.eff = 1:3), + 'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".' + ) + # time_dim + expect_error( + DiffCorr(exp1, ref1, obs1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + DiffCorr(exp1, ref1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension." + ) + # memb_dim + expect_error( + DiffCorr(exp1, ref1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + DiffCorr(exp1, ref1, obs1, memb_dim = 'member'), + "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + DiffCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), obs1, memb_dim = 'memb'), + "Parameter 'exp', 'ref', and 'obs' must have same length of all dimensions expect 'memb_dim'." + ) + # method + expect_error( + DiffCorr(exp1, ref1, obs1, method = 'asd', memb_dim = 'memb'), + 'Parameter "method" must be "pearson", "kendall", or "spearman".' + ) + # alpha + expect_error( + DiffCorr(exp2, ref2, obs2, alpha = 1), + 'Parameter "alpha" must be NULL or a number between 0 and 1.' + ) + # handle.na + expect_error( + DiffCorr(exp2, ref2, obs2, handle.na = TRUE), + 'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".' + ) + # ncores + expect_error( + DiffCorr(exp2, ref2, obs2, ncores = 1.5), + 'Parameter "ncores" must be either NULL or a positive integer.' + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { +expect_equal( +names(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')), +c("diff.corr", "p.val") +) +expect_equal( +dim(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$diff), +c(lat = 3, lon = 2) +) +expect_equal( +dim(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$p.val), +c(lat = 3, lon = 2) +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$diff), +c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$p), +c(0.26166060, 0.15899774, 0.39264452, 0.27959883, 0.34736305, 0.07479832), +tolerance = 0.0001 +) +expect_equal( +names(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)), +c("diff.corr", "sign") +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$diff.corr), +c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign), +rep(FALSE, 6) +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$diff.corr), +c(0.08888889, 0.35555556, 0.00000000, -0.04444444, 0.00000000, 0.93333333), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$p), +c(0.42259038, 0.25373083, 0.50000000, 0.54183730, 0.50000000, 0.06307063), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$diff.corr), +c(0.07272727, 0.54545455, 0.10909091, -0.01212121, -0.03636364, 1.01818182), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$p), +c(0.4358970, 0.1341575, 0.3448977, 0.5114262, 0.5264872, 0.0437861), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$diff.corr), +c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$p), +c(0.27841537, 0.15899774, 0.40096749, 0.27959883, 0.35889690, 0.07479832), +tolerance = 0.0001 +) + + +#--------------------------- +exp1[1] <- NA +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$diff.corr), +c(NA, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$diff.corr), +c(0.17418065, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_error( +DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'na.fail'), +"The data contain NAs." +) + +exp1[1,1,1,1:3] <- NA +ref1[1,1,1,4:8] <- NA +obs1[1,1,9:10] <- NA +expect_error( +DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), +"There is no complete set of forecasts and observations." +) + + +}) + +############################################## + +test_that("3. Output checks: dat2", { +expect_equal( +names(DiffCorr(exp2, ref2, obs2)), +c("diff.corr", "p.val") +) +expect_equal( +dim(DiffCorr(exp2, ref2, obs2)$diff.corr), +NULL +) +expect_equal( +dim(DiffCorr(exp2, ref2, obs2)$p), +NULL +) +expect_equal( +DiffCorr(exp2, ref2, obs2)$p, +0.6577392, +tolerance = 0.0001 +) +expect_equal( +DiffCorr(exp2, ref2, obs2)$diff, +-0.2434725, +tolerance = 0.0001 +) + +}) -- GitLab From 02cfb4067cc7be76f0ede438d0ae0245385751c4 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 13 May 2022 09:42:44 +0200 Subject: [PATCH 12/13] Change the order of input obs and ref. --- R/DiffCorr.R | 60 ++++++++++++-------------- man/DiffCorr.Rd | 12 +++--- tests/testthat/test-DiffCorr.R | 78 +++++++++++++++++----------------- 3 files changed, 73 insertions(+), 77 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index 511dd3a..c83b7b8 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -13,11 +13,11 @@ #' #'@param exp A named numerical array of the forecast data with at least time #' dimension. +#'@param obs A named numerical array with the observations with at least time +#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time dimension. The dimensions must be the same as "exp" except #' 'memb_dim'. -#'@param obs A named numerical array with the observations with at least time -#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. #'@param N.eff Effective sample size to be used in the statistical significance #' test. It can be NA (and it will be computed with the s2dv:::.Eno), a #' numeric (which is used for all cases), or an array with the same dimensions @@ -62,13 +62,13 @@ #' #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#' res <- DiffCorr(exp, ref, obs, memb_dim = 'member') +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' res <- DiffCorr(exp, obs, ref, memb_dim = 'member') #' #'@import multiApply #'@export -DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', +DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', memb_dim = NULL, method = 'pearson', alpha = NULL, handle.na = 'return.na', ncores = NULL) { @@ -76,10 +76,10 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', ## exp, ref, and obs (1) if (!is.array(exp) | !is.numeric(exp)) stop('Parameter "exp" must be a numeric array.') - if (!is.array(ref) | !is.numeric(ref)) - stop('Parameter "ref" must be a numeric array.') if (!is.array(obs) | !is.numeric(obs)) stop('Parameter "obs" must be a numeric array.') + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') ## N.eff if (is.array(N.eff)) { if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") @@ -95,9 +95,9 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(ref)) | - !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs)) | + !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension.") } ## memb_dim if (!is.null(memb_dim)) { @@ -110,15 +110,15 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', } ## exp, ref, and obs (2) name_exp <- sort(names(dim(exp))) - name_ref <- sort(names(dim(ref))) name_obs <- sort(names(dim(obs))) + name_ref <- sort(names(dim(ref))) if (!is.null(memb_dim)) { name_exp <- name_exp[-which(name_exp == memb_dim)] name_ref <- name_ref[-which(name_ref == memb_dim)] } - if (!identical(length(name_exp), length(name_ref), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ", + if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs]) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", "all dimensions expect 'memb_dim'.")) } ## method @@ -161,9 +161,6 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) - -# exp <- Apply(data = exp, target_dims = memb_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 -# ref <- Apply(data = ref, target_dims = memb_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 } # output_dims @@ -174,18 +171,17 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', } # Correlation difference if (is.array(N.eff)) { - output <- Apply(data = list(exp = exp, ref = ref, - obs = obs, N.eff = N.eff), - target_dims = list(exp = time_dim, ref = time_dim, - obs = time_dim, N.eff = NULL), + output <- Apply(data = list(exp = exp, obs = obs, ref = ref, + N.eff = N.eff), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim, N.eff = NULL), output_dims = output_dims, fun = .DiffCorr, method = method, alpha = alpha, handle.na = handle.na, ncores = ncores) } else { - output <- Apply(data = list(exp = exp, ref = ref, - obs = obs), - target_dims = list(exp = time_dim, ref = time_dim, - obs = time_dim), + output <- Apply(data = list(exp = exp, obs = obs, ref = ref), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim), output_dims = output_dims, N.eff = N.eff, fun = .DiffCorr, method = method, alpha = alpha, handle.na = handle.na, ncores = ncores) @@ -194,9 +190,9 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', return(output) } -.DiffCorr <- function(exp, ref, obs, N.eff, method, alpha, handle.na) { +.DiffCorr <- function(exp, obs, ref, N.eff = NA, method = 'pearson', alpha = NULL, handle.na = 'return.na') { - .diff.corr <- function(exp, ref, obs, method, N.eff, alpha) { + .diff.corr <- function(exp, obs, ref, method = 'pearson', N.eff = NA, alpha = NULL) { # Correlation difference cor.exp <- cor(x = exp, y = obs, method = method) @@ -224,16 +220,16 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', #================================================== - if (anyNA(exp) | anyNA(ref) | anyNA(obs)) { ## There are NAs + if (anyNA(exp) | anyNA(obs) | anyNA(ref)) { ## There are NAs if (handle.na == 'only.complete.triplets') { - nna <- is.na(exp) | is.na(ref) | is.na(obs) # A vector of T/F + nna <- is.na(exp) | is.na(obs) | is.na(ref) # A vector of T/F if (all(nna)) stop("There is no complete set of forecasts and observations.") # Remove the incomplete set exp <- exp[!nna] - ref <- ref[!nna] obs <- obs[!nna] + ref <- ref[!nna] - output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, + output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, N.eff = N.eff, alpha = alpha) } else if (handle.na == 'return.na') { @@ -246,7 +242,7 @@ DiffCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', } } else { ## There is no NA - output <- .diff.corr(exp = exp, ref = ref, obs = obs, method = method, + output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, N.eff = N.eff, alpha = alpha) } return(output) diff --git a/man/DiffCorr.Rd b/man/DiffCorr.Rd index 7f56289..d920536 100644 --- a/man/DiffCorr.Rd +++ b/man/DiffCorr.Rd @@ -6,8 +6,8 @@ \usage{ DiffCorr( exp, - ref, obs, + ref, N.eff = NA, time_dim = "sdate", memb_dim = NULL, @@ -21,13 +21,13 @@ DiffCorr( \item{exp}{A named numerical array of the forecast data with at least time dimension.} +\item{obs}{A named numerical array with the observations with at least time +dimension. The dimensions must be the same as "exp" except 'memb_dim'.} + \item{ref}{A named numerical array of the reference forecast data with at least time dimension. The dimensions must be the same as "exp" except 'memb_dim'.} -\item{obs}{A named numerical array with the observations with at least time -dimension. The dimensions must be the same as "exp" except 'memb_dim'.} - \item{N.eff}{Effective sample size to be used in the statistical significance test. It can be NA (and it will be computed with the s2dv:::.Eno), a numeric (which is used for all cases), or an array with the same dimensions @@ -91,8 +91,8 @@ to account for the autocorrelation of the time series (von Storch and Zwiers, } \examples{ exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -res <- DiffCorr(exp, ref, obs, memb_dim = 'member') +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +res <- DiffCorr(exp, obs, ref, memb_dim = 'member') } diff --git a/tests/testthat/test-DiffCorr.R b/tests/testthat/test-DiffCorr.R index 2d1fbaf..952e14f 100644 --- a/tests/testthat/test-DiffCorr.R +++ b/tests/testthat/test-DiffCorr.R @@ -30,62 +30,62 @@ test_that("1. Input checks", { ) expect_error( DiffCorr(exp1, c('a')), - 'Parameter "ref" must be a numeric array.' + 'Parameter "obs" must be a numeric array.' ) expect_error( - DiffCorr(exp1, ref1, c()), - 'Parameter "obs" must be a numeric array.' + DiffCorr(exp1, obs1, c()), + 'Parameter "ref" must be a numeric array.' ) # N.eff expect_error( - DiffCorr(exp1, ref1, obs1, N.eff = array(1, dim = c(lat = 2, lon = 2))), + DiffCorr(exp1, obs1, ref1, N.eff = array(1, dim = c(lat = 2, lon = 2))), "If parameter \"N.eff\" is provided with an array, it must have the same dimensions as \"obs\" except \"time_dim\"." ) expect_error( - DiffCorr(exp1, ref1, obs1, N.eff = 1:3), + DiffCorr(exp1, obs1, ref1, N.eff = 1:3), 'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".' ) # time_dim expect_error( - DiffCorr(exp1, ref1, obs1, time_dim = 1), + DiffCorr(exp1, obs1, ref1, time_dim = 1), 'Parameter "time_dim" must be a character string.' ) expect_error( - DiffCorr(exp1, ref1, obs1, time_dim = 'time'), - "Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension." + DiffCorr(exp1, obs1, ref1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension." ) # memb_dim expect_error( - DiffCorr(exp1, ref1, obs1, memb_dim = TRUE), + DiffCorr(exp1, obs1, ref1, memb_dim = TRUE), "Parameter 'memb_dim' must be a character string." ) expect_error( - DiffCorr(exp1, ref1, obs1, memb_dim = 'member'), + DiffCorr(exp1, obs1, ref1, memb_dim = 'member'), "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension." ) # exp, ref, and obs (2) expect_error( - DiffCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), obs1, memb_dim = 'memb'), - "Parameter 'exp', 'ref', and 'obs' must have same length of all dimensions expect 'memb_dim'." + DiffCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), ref1, memb_dim = 'memb'), + "Parameter 'exp', 'obs', and 'ref' must have same length of all dimensions expect 'memb_dim'." ) # method expect_error( - DiffCorr(exp1, ref1, obs1, method = 'asd', memb_dim = 'memb'), + DiffCorr(exp1, obs1, ref1, method = 'asd', memb_dim = 'memb'), 'Parameter "method" must be "pearson", "kendall", or "spearman".' ) # alpha expect_error( - DiffCorr(exp2, ref2, obs2, alpha = 1), + DiffCorr(exp2, obs2, ref2, alpha = 1), 'Parameter "alpha" must be NULL or a number between 0 and 1.' ) # handle.na expect_error( - DiffCorr(exp2, ref2, obs2, handle.na = TRUE), + DiffCorr(exp2, obs2, ref2, handle.na = TRUE), 'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".' ) # ncores expect_error( - DiffCorr(exp2, ref2, obs2, ncores = 1.5), + DiffCorr(exp2, obs2, ref2, ncores = 1.5), 'Parameter "ncores" must be either NULL or a positive integer.' ) @@ -94,67 +94,67 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { expect_equal( -names(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')), +names(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')), c("diff.corr", "p.val") ) expect_equal( -dim(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$diff), +dim(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$diff), c(lat = 3, lon = 2) ) expect_equal( -dim(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$p.val), +dim(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$p.val), c(lat = 3, lon = 2) ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$diff), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$diff), c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$p), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$p), c(0.26166060, 0.15899774, 0.39264452, 0.27959883, 0.34736305, 0.07479832), tolerance = 0.0001 ) expect_equal( -names(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)), +names(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)), c("diff.corr", "sign") ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$diff.corr), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$diff.corr), c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign), rep(FALSE, 6) ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$diff.corr), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$diff.corr), c(0.08888889, 0.35555556, 0.00000000, -0.04444444, 0.00000000, 0.93333333), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$p), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$p), c(0.42259038, 0.25373083, 0.50000000, 0.54183730, 0.50000000, 0.06307063), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$diff.corr), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$diff.corr), c(0.07272727, 0.54545455, 0.10909091, -0.01212121, -0.03636364, 1.01818182), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$p), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$p), c(0.4358970, 0.1341575, 0.3448977, 0.5114262, 0.5264872, 0.0437861), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$diff.corr), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$diff.corr), c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$p), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$p), c(0.27841537, 0.15899774, 0.40096749, 0.27959883, 0.35889690, 0.07479832), tolerance = 0.0001 ) @@ -163,17 +163,17 @@ tolerance = 0.0001 #--------------------------- exp1[1] <- NA expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb')$diff.corr), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$diff.corr), c(NA, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$diff.corr), +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$diff.corr), c(0.17418065, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), tolerance = 0.0001 ) expect_error( -DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'na.fail'), +DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'na.fail'), "The data contain NAs." ) @@ -181,7 +181,7 @@ exp1[1,1,1,1:3] <- NA ref1[1,1,1,4:8] <- NA obs1[1,1,9:10] <- NA expect_error( -DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), +DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), "There is no complete set of forecasts and observations." ) @@ -192,24 +192,24 @@ DiffCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplet test_that("3. Output checks: dat2", { expect_equal( -names(DiffCorr(exp2, ref2, obs2)), +names(DiffCorr(exp2, obs2, ref2)), c("diff.corr", "p.val") ) expect_equal( -dim(DiffCorr(exp2, ref2, obs2)$diff.corr), +dim(DiffCorr(exp2, obs2, ref2)$diff.corr), NULL ) expect_equal( -dim(DiffCorr(exp2, ref2, obs2)$p), +dim(DiffCorr(exp2, obs2, ref2)$p), NULL ) expect_equal( -DiffCorr(exp2, ref2, obs2)$p, +DiffCorr(exp2, obs2, ref2)$p, 0.6577392, tolerance = 0.0001 ) expect_equal( -DiffCorr(exp2, ref2, obs2)$diff, +DiffCorr(exp2, obs2, ref2)$diff, -0.2434725, tolerance = 0.0001 ) -- GitLab From a28ee0ea87c1d200a5f27119e010807fc876da07 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 17 May 2022 12:27:51 +0200 Subject: [PATCH 13/13] Remove method 'kendall' and add a warning if method == 'spearman' --- R/DiffCorr.R | 27 +++++++++++++++++---------- man/DiffCorr.Rd | 16 +++++++++------- tests/testthat/test-DiffCorr.R | 24 ++++++++++-------------- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/R/DiffCorr.R b/R/DiffCorr.R index c83b7b8..c250814 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -5,11 +5,9 @@ #'more skillful than the reference forecast, while negative values mean that #'the reference forecast is more skillful. The statistical significance of the #'correlation differences is computed with a one-sided test for equality of -#'dependent correlation coefficients (Steiger, 1980; -#'https://content.apa.org/doi/10.1037/0033-2909.87.2.245; Siegert et al., 2017; -#'https://doi.org/10.1175/MWR-D-16-0037.1) using effective degrees of freedom -#'to account for the autocorrelation of the time series (von Storch and Zwiers, -#'1999; https://doi.org/10.1017/CBO9780511612336). +#'dependent correlation coefficients (Steiger, 1980; Siegert et al., 2017) using +#'effective degrees of freedom to account for the autocorrelation of the time +#'series (von Storch and Zwiers, 1999). #' #'@param exp A named numerical array of the forecast data with at least time #' dimension. @@ -30,8 +28,7 @@ #' forecast. If it is NULL (default), the ensemble mean should be provided #' directly to the function. #'@param method A character string indicating the correlation coefficient to be -#' computed ("pearson", "kendall", or "spearman"). The default value is -#' "pearson". +#' computed ("pearson" or "spearman"). The default value is "pearson". #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. If it is a numeric, "sign" will be returned. If NULL, the #' p-value will be returned instead. The default value is NULL. @@ -60,6 +57,11 @@ #' NULL. #'} #' +#'@references +#'Steiger, 1980; https://content.apa.org/doi/10.1037/0033-2909.87.2.245 +#'Siegert et al., 2017; https://doi.org/10.1175/MWR-D-16-0037.1 +#'von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336 +#' #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) @@ -119,11 +121,16 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) | !identical(dim(exp)[name_exp], dim(obs)[name_obs]) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", - "all dimensions expect 'memb_dim'.")) + "all dimensions except 'memb_dim'.")) } ## method - if (!method %in% c("pearson", "kendall", "spearman")) { - stop('Parameter "method" must be "pearson", "kendall", or "spearman".') + if (!method %in% c("pearson", "spearman")) { + stop('Parameter "method" must be "pearson" or "spearman".') + } + if (method == "spearman") { + warning(paste0("The test used in this function is built on Pearson method. ", + "To verify if Spearman method is reliable, you can run the ", + "Monte-Carlo simulations that are done in Siegert et al., 2017")) } ## alpha if (!is.null(alpha)) { diff --git a/man/DiffCorr.Rd b/man/DiffCorr.Rd index d920536..d8ff65c 100644 --- a/man/DiffCorr.Rd +++ b/man/DiffCorr.Rd @@ -43,8 +43,7 @@ forecast. If it is NULL (default), the ensemble mean should be provided directly to the function.} \item{method}{A character string indicating the correlation coefficient to be -computed ("pearson", "kendall", or "spearman"). The default value is -"pearson".} +computed ("pearson" or "spearman"). The default value is "pearson".} \item{alpha}{A numeric of the significance level to be used in the statistical significance test. If it is a numeric, "sign" will be returned. If NULL, the @@ -83,11 +82,9 @@ Positive values of the correlation difference indicate that the forecast is more skillful than the reference forecast, while negative values mean that the reference forecast is more skillful. The statistical significance of the correlation differences is computed with a one-sided test for equality of -dependent correlation coefficients (Steiger, 1980; -https://content.apa.org/doi/10.1037/0033-2909.87.2.245; Siegert et al., 2017; -https://doi.org/10.1175/MWR-D-16-0037.1) using effective degrees of freedom -to account for the autocorrelation of the time series (von Storch and Zwiers, -1999; https://doi.org/10.1017/CBO9780511612336). +dependent correlation coefficients (Steiger, 1980; Siegert et al., 2017) using +effective degrees of freedom to account for the autocorrelation of the time +series (von Storch and Zwiers, 1999). } \examples{ exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) @@ -96,3 +93,8 @@ ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) res <- DiffCorr(exp, obs, ref, memb_dim = 'member') } +\references{ +Steiger, 1980; https://content.apa.org/doi/10.1037/0033-2909.87.2.245 +Siegert et al., 2017; https://doi.org/10.1175/MWR-D-16-0037.1 +von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336 +} diff --git a/tests/testthat/test-DiffCorr.R b/tests/testthat/test-DiffCorr.R index 952e14f..e0834e1 100644 --- a/tests/testthat/test-DiffCorr.R +++ b/tests/testthat/test-DiffCorr.R @@ -66,12 +66,18 @@ test_that("1. Input checks", { # exp, ref, and obs (2) expect_error( DiffCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), ref1, memb_dim = 'memb'), - "Parameter 'exp', 'obs', and 'ref' must have same length of all dimensions expect 'memb_dim'." + "Parameter 'exp', 'obs', and 'ref' must have same length of all dimensions except 'memb_dim'." ) # method expect_error( DiffCorr(exp1, obs1, ref1, method = 'asd', memb_dim = 'memb'), - 'Parameter "method" must be "pearson", "kendall", or "spearman".' + 'Parameter "method" must be "pearson" or "spearman".' + ) + expect_warning( + DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = 'spearman'), + paste0("The test used in this function is built on Pearson method. ", + "To verify if Spearman method is reliable, you can run the ", + "Monte-Carlo simulations that are done in Siegert et al., 2017") ) # alpha expect_error( @@ -129,22 +135,12 @@ as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign), rep(FALSE, 6) ) expect_equal( -as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$diff.corr), -c(0.08888889, 0.35555556, 0.00000000, -0.04444444, 0.00000000, 0.93333333), -tolerance = 0.0001 -) -expect_equal( -as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$p), -c(0.42259038, 0.25373083, 0.50000000, 0.54183730, 0.50000000, 0.06307063), -tolerance = 0.0001 -) -expect_equal( -as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$diff.corr), +suppressWarnings(as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$diff.corr)), c(0.07272727, 0.54545455, 0.10909091, -0.01212121, -0.03636364, 1.01818182), tolerance = 0.0001 ) expect_equal( -as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$p), +suppressWarnings(as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$p)), c(0.4358970, 0.1341575, 0.3448977, 0.5114262, 0.5264872, 0.0437861), tolerance = 0.0001 ) -- GitLab