From 9c99215cf2f49aaa6624c3be06fdd9c788adcdf7 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 17 Mar 2022 11:19:55 +0100 Subject: [PATCH 1/9] first version (without documentation) --- R/ResidualCorr.R | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 R/ResidualCorr.R diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R new file mode 100644 index 0000000..74e6a25 --- /dev/null +++ b/R/ResidualCorr.R @@ -0,0 +1,62 @@ + +library(s2dv) +library(multiApply) + +ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = 1){ + + ## Checkings + if (!is.array(exp)){stop('"exp" must be an array.')} + if (!is.array(obs)){stop('"obs" must be an array.')} + if (!is.array(ref)){stop('"ref" must be an array.')} + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} + if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} + if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')} + if (!is.character(time_dim)){stop('"time_dim" must be a string.')} + if (!is.null(member_dim)){ + if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} + } + + ## Ensemble mean + if (!is.null(member_dim)){ + exp <- multiApply::Apply(data = exp, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + ref <- multiApply::Apply(data = ref, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + } + + ## Residual correlation and its significance + output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), + target_dims = time_dim, fun = .ResidualCorr, + time_dim = time_dim, alpha = alpha, ncores = ncores) + return(output) +} + +.ResidualCorr <- function(exp, ref, obs, time_dim, alpha){ + + if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ + + ## Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs') + exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals + obs_res <- lm(formula = y ~ x, data = list(y = obs, x = ref), na.action = NULL)$residuals + + ## Residual correlation (and significance) + residual_cor <- NULL + residual_cor$r <- cor(exp_res,obs_res) + + ## Significance + residual_cor$n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1) + t_alpha2_n2 <- qt(p=alpha/2, df = residual_cor$n_eff-2, lower.tail = FALSE) + t <- abs(residual_cor$r) * sqrt(residual_cor$n_eff-2) / sqrt(1-residual_cor$r^2) + if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ + residual_cor$sign <- TRUE + } else { + residual_cor$sign <- FALSE + } + + } else { + residual_cor <- list(r = NA, n_eff = NA, sign = NA) + } + + return(residual_cor) +} -- GitLab From 61ece2ebabd2b69f82ee38153d1ad4276fcf65c5 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 17 Mar 2022 17:20:12 +0100 Subject: [PATCH 2/9] included documentation --- R/ResidualCorr.R | 59 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 9 deletions(-) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index 74e6a25..b9cae9d 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -1,8 +1,49 @@ - -library(s2dv) -library(multiApply) - -ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = 1){ +#'Compute the residual correlation +#' +#'The residual correlation assesses whether a forecast captures any of the observed variability that is not +#'already captured by a reference forecast (Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.). +#'The procedure is as follows: the residuals of the forecasts and observations are computed by linearly regressing out +#'the reference forecast's ensemble mean from the forecasts’ ensemble mean and observations, respectively. Then, the +#'residual correlation is computed as the correlation between both residuals. Positive values of the residual correlation +#'indicate that the forecast capture more observed variability than the reference forecast, while negative values mean +#'that the reference forecast capture more. The significance of the residual correlation is computed with a two-sided t-test +#'(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using a effective degrees of freedom to account for +#'the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). +#' +#'@param exp A numerical array with the forecast with, at least, time dimension. +#'@param ref A numerical array with the reference forecast data with, at least, time dimension. +#' The length of the time dimension must be the same as for "exp". +#'@param obs A numerical array with the observations with, at least, time dimension. +#' The length of the time dimension must be the same as for "exp". +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'year'. +#'@param member_dim A character string indicating the name of the member dimension to compute the +#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should +#' be provided directly to the function. The default value is NULL. +#'@param alpha Significance level to be used in the statistical significance test. The default value +#' is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$r}{ +#' A numerical array with the residual correlation with the same dimensions as the input arrays +#' except "time_dim" (and "member_dim" if provided). +#'} +#'\item{$sign}{ +#' A logical array with the statistical significance of the residual correlation with the same +#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). +#'} +#' +#'@examples +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +#' ResidualCorr(exp, ref, obs, member_dim = 'member') +#' +#'@import multiApply +#'@export +ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -45,9 +86,9 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al residual_cor$r <- cor(exp_res,obs_res) ## Significance - residual_cor$n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1) - t_alpha2_n2 <- qt(p=alpha/2, df = residual_cor$n_eff-2, lower.tail = FALSE) - t <- abs(residual_cor$r) * sqrt(residual_cor$n_eff-2) / sqrt(1-residual_cor$r^2) + n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1) + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(residual_cor$r) * sqrt(n_eff-2) / sqrt(1-residual_cor$r^2) if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ residual_cor$sign <- TRUE } else { @@ -55,7 +96,7 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al } } else { - residual_cor <- list(r = NA, n_eff = NA, sign = NA) + residual_cor <- list(r = NA, sign = NA) } return(residual_cor) -- GitLab From 6e50719a3978f29cad701afcf85ea1bc187de60a Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 17 Mar 2022 17:56:29 +0100 Subject: [PATCH 3/9] . --- R/ResidualCorr.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index b9cae9d..a69178a 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -1,4 +1,4 @@ -#'Compute the residual correlation +#'Compute the residual correlation and its significance #' #'The residual correlation assesses whether a forecast captures any of the observed variability that is not #'already captured by a reference forecast (Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.). -- GitLab From bc63bef7eb49cfbc8854e749c415605cf852cbb8 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 24 Mar 2022 13:29:13 +0100 Subject: [PATCH 4/9] added method parameter to allow computing pearson, kendall and spearman coefficients --- R/ResidualCorr.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index a69178a..bd551e5 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -20,6 +20,8 @@ #'@param member_dim A character string indicating the name of the member dimension to compute the #' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should #' be provided directly to the function. The default value is NULL. +#'@param method A character string indicating the correlation coefficient to be computed ("pearson", +#' "kendall", or "spearman"). The default value is "pearson". #'@param alpha Significance level to be used in the statistical significance test. The default value #' is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel @@ -43,7 +45,7 @@ #' #'@import multiApply #'@export -ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, alpha = 0.05, ncores = NULL){ +ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = 0.05, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -56,6 +58,7 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al if (!is.null(member_dim)){ if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} } + if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')} if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} } @@ -69,11 +72,12 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al ## Residual correlation and its significance output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), target_dims = time_dim, fun = .ResidualCorr, - time_dim = time_dim, alpha = alpha, ncores = ncores) + time_dim = time_dim, method = method, + alpha = alpha, ncores = ncores) return(output) } -.ResidualCorr <- function(exp, ref, obs, time_dim, alpha){ +.ResidualCorr <- function(exp, ref, obs, time_dim, method, alpha){ if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ @@ -83,7 +87,7 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, al ## Residual correlation (and significance) residual_cor <- NULL - residual_cor$r <- cor(exp_res,obs_res) + residual_cor$r <- cor(x = exp_res, y = obs_res, method = method) ## Significance n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1) -- GitLab From c7a87618a84302edb959436756249351988b867b Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 7 Apr 2022 16:03:49 +0200 Subject: [PATCH 5/9] p.val is returned if alpha=NULL --- R/ResidualCorr.R | 51 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index bd551e5..645e70c 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -22,19 +22,25 @@ #' be provided directly to the function. The default value is NULL. #'@param method A character string indicating the correlation coefficient to be computed ("pearson", #' "kendall", or "spearman"). The default value is "pearson". -#'@param alpha Significance level to be used in the statistical significance test. The default value -#' is 0.05. +#'@param alpha Significance level to be used in the statistical significance test. +#' If it is a numeric, "sign" will be returned. If NULL, the p-value will be returned instead. +#' The default value is NULL. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' -#'@return A list of 2: +#'@return A list with: #'\item{$r}{ #' A numerical array with the residual correlation with the same dimensions as the input arrays #' except "time_dim" (and "member_dim" if provided). #'} #'\item{$sign}{ #' A logical array with the statistical significance of the residual correlation with the same -#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). +#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). Returned only +#' if "alpha" is a numeric. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is NULL. #'} #' #'@examples @@ -45,7 +51,7 @@ #' #'@import multiApply #'@export -ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = 0.05, ncores = NULL){ +ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = NULL, ncores = NULL){ ## Checkings if (!is.array(exp)){stop('"exp" must be an array.')} @@ -53,7 +59,9 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, me if (!is.array(ref)){stop('"ref" must be an array.')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} - if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be a number between 0 and 1.')} + if (!is.null(alpha)){ + if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be NULL or a number between 0 and 1.')} + } if (!is.character(time_dim)){stop('"time_dim" must be a string.')} if (!is.null(member_dim)){ if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} @@ -89,18 +97,35 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, me residual_cor <- NULL residual_cor$r <- cor(x = exp_res, y = obs_res, method = method) - ## Significance + ## Effective degrees of freedom n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1) - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + + ## Statistic t <- abs(residual_cor$r) * sqrt(n_eff-2) / sqrt(1-residual_cor$r^2) - if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ - residual_cor$sign <- TRUE + + if(is.null(alpha)){ ## p-value + + residual_cor$p.val <- pt(q = t, df = n_eff-2, lower.tail = FALSE) + + } else { + + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ + residual_cor$sign <- TRUE + } else { + residual_cor$sign <- FALSE + } + + } + + } else {## Significance + + if (is.null(alpha)){ + residual_cor <- list(r = NA, p.val = NA) } else { - residual_cor$sign <- FALSE + residual_cor <- list(r = NA, sign = NA) } - } else { - residual_cor <- list(r = NA, sign = NA) } return(residual_cor) -- GitLab From 003a4a10f61fbe9230fb5a366b3881ca955209fa Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 25 Apr 2022 17:26:48 +0200 Subject: [PATCH 6/9] Format adjustment --- NAMESPACE | 1 + R/ResidualCorr.R | 207 +++++++++++++++++++++++++++++--------------- man/ResidualCorr.Rd | 87 +++++++++++++++++++ 3 files changed, 223 insertions(+), 72 deletions(-) create mode 100644 man/ResidualCorr.Rd diff --git a/NAMESPACE b/NAMESPACE index ca5d500..841814e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,6 +60,7 @@ export(RatioRMS) export(RatioSDRMS) export(Regression) export(Reorder) +export(ResidualCorr) export(SPOD) export(Season) export(SignalNoiseRatio) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index 645e70c..2171da0 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -1,126 +1,189 @@ #'Compute the residual correlation and its significance #' -#'The residual correlation assesses whether a forecast captures any of the observed variability that is not -#'already captured by a reference forecast (Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.). -#'The procedure is as follows: the residuals of the forecasts and observations are computed by linearly regressing out -#'the reference forecast's ensemble mean from the forecasts’ ensemble mean and observations, respectively. Then, the -#'residual correlation is computed as the correlation between both residuals. Positive values of the residual correlation -#'indicate that the forecast capture more observed variability than the reference forecast, while negative values mean -#'that the reference forecast capture more. The significance of the residual correlation is computed with a two-sided t-test -#'(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using a effective degrees of freedom to account for -#'the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). +#'The residual correlation assesses whether a forecast captures any of the +#'observed variability that is not already captured by a reference forecast +#'(Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.). +#'The procedure is as follows: the residuals of the forecasts and observations +#'are computed by linearly regressing out the reference forecast's ensemble mean +#'from the forecasts' ensemble mean and observations, respectively. Then, the +#'residual correlation is computed as the correlation between both residuals. +#'Positive values of the residual correlation indicate that the forecast capture +#'more observed variability than the reference forecast, while negative values +#'mean that the reference forecast capture more. The significance of the +#'residual correlation is computed with a two-sided t-test +#'(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using an +#'effective degrees of freedom to account for the time series' autocorrelation +#'(von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). #' -#'@param exp A numerical array with the forecast with, at least, time dimension. -#'@param ref A numerical array with the reference forecast data with, at least, time dimension. -#' The length of the time dimension must be the same as for "exp". -#'@param obs A numerical array with the observations with, at least, time dimension. -#' The length of the time dimension must be the same as for "exp". +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as "exp" except +#' 'memb_dim'. +#'@param obs A named numerical array of the observations with at least time +#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'year'. -#'@param member_dim A character string indicating the name of the member dimension to compute the -#' ensemble means of the forecast and reference forecast. If it is NULL, the ensemble means should -#' be provided directly to the function. The default value is NULL. -#'@param method A character string indicating the correlation coefficient to be computed ("pearson", -#' "kendall", or "spearman"). The default value is "pearson". -#'@param alpha Significance level to be used in the statistical significance test. -#' If it is a numeric, "sign" will be returned. If NULL, the p-value will be returned instead. -#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean of the forecast and reference forecast. If it +#' is NULL, the ensemble mean should be provided directly to the function. The +#' default value is NULL. +#'@param method A character string indicating the correlation coefficient to be +#' computed ("pearson", "kendall", or "spearman"). The default value is +#' "pearson". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. If it is a numeric, "sign" will be returned. If NULL, the +#' p-value will be returned instead. The default value is NULL. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return A list with: #'\item{$r}{ -#' A numerical array with the residual correlation with the same dimensions as the input arrays -#' except "time_dim" (and "member_dim" if provided). +#' A numerical array of the residual correlation with the same dimensions as +#' the input arrays except "time_dim" (and "memb_dim" if provided). #'} #'\item{$sign}{ -#' A logical array with the statistical significance of the residual correlation with the same -#' dimensions as the input arrays except "time_dim" (and "member_dim" if provided). Returned only -#' if "alpha" is a numeric. +#' A logical array of the statistical significance of the residual correlation +#' with the same dimensions as the input arrays except "time_dim" (and +#' "memb_dim" if provided). Returned only if "alpha" is a numeric. #'} #'\item{$p.val}{ #' A numeric array of the p-values with the same dimensions as the input arrays -#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is NULL. +#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is +#' NULL. #'} #' #'@examples #' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) #' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) #' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) -#' ResidualCorr(exp, ref, obs, member_dim = 'member') +#' res <- ResidualCorr(exp, ref, obs, memb_dim = 'member') #' #'@import multiApply #'@export -ResidualCorr <- function(exp, ref, obs, time_dim = 'year', member_dim = NULL, method = 'pearson', alpha = NULL, ncores = NULL){ +ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, + method = 'pearson', alpha = NULL, ncores = NULL) { - ## Checkings - if (!is.array(exp)){stop('"exp" must be an array.')} - if (!is.array(obs)){stop('"obs" must be an array.')} - if (!is.array(ref)){stop('"ref" must be an array.')} - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(ref)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "ref".')} - if (!identical(as.numeric(dim(exp)[time_dim]), as.numeric(dim(obs)[time_dim]))){stop('"time_dim" must have the same length in "exp" and "obs".')} - if (!is.null(alpha)){ - if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1){stop('"alpha" must be NULL or a number between 0 and 1.')} + # Check inputs + ## exp, ref, and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(ref)) | + !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension.") } - if (!is.character(time_dim)){stop('"time_dim" must be a string.')} - if (!is.null(member_dim)){ - if (!is.character(member_dim)){stop('"member_dim" must be either NULL or a string.')} + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension.") + } } - if (!method %in% c("pearson","kendall","spearman")){stop('"method" must be "pearson", "kendall", or "spearman".')} - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores < 1){stop('"ncores" must be either NULL or a positive integer.')} + ## exp, ref, and obs (2) + name_exp <- sort(names(dim(exp))) + name_ref <- sort(names(dim(ref))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_ref <- name_ref[-which(name_ref == memb_dim)] } - - ## Ensemble mean - if (!is.null(member_dim)){ - exp <- multiApply::Apply(data = exp, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 - ref <- multiApply::Apply(data = ref, target_dims = member_dim, fun = mean, na.rm = FALSE, ncores = ncores)$output1 + if (!all.equal(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) } - + ## method + if (!method %in% c("pearson", "kendall", "spearman")) { + stop('Parameter "method" must be "pearson", "kendall", or "spearman".') + } + ## alpha + if (!is.null(alpha)) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | + length(alpha) > 1)) { + stop('Parameter "alpha" must be NULL or a number between 0 and 1.') + } + } + ## ncores + if (!is.null(ncores)) { + if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1)) { + stop('Parameter "ncores" must be either NULL or a positive integer.') + } + } + + ############################### + + # Calculate ensemble mean + dim_exp <- dim(exp) + dim_ref <- dim(ref) + dim_obs <- dim(obs) + + if (!is.null(memb_dim)) { + exp_memb_dim_ind <- which(names(dim_exp) == memb_dim) + ref_memb_dim_ind <- which(names(dim_ref) == memb_dim) + exp <- apply(exp, c(1:length(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) + ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) + if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) + if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) + } + + # output_dims + if (is.null(alpha)) { + output_dims <- list(r = NULL, p.val = NULL) + } else { + output_dims <- list(r = NULL, sign = NULL) + } + ## Residual correlation and its significance - output <- multiApply::Apply(data = list(exp = exp, ref = ref, obs = obs), - target_dims = time_dim, fun = .ResidualCorr, - time_dim = time_dim, method = method, - alpha = alpha, ncores = ncores) + output <- Apply(data = list(exp = exp, ref = ref, obs = obs), + target_dims = time_dim, fun = .ResidualCorr, + method = method, + alpha = alpha, ncores = ncores) return(output) } -.ResidualCorr <- function(exp, ref, obs, time_dim, method, alpha){ - - if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)){ +.ResidualCorr <- function(exp, ref, obs, method, alpha) { + # exp and ref and obs: [time] + + if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)) { - ## Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs') + # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs') exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals obs_res <- lm(formula = y ~ x, data = list(y = obs, x = ref), na.action = NULL)$residuals - ## Residual correlation (and significance) + # Residual correlation (and significance) residual_cor <- NULL residual_cor$r <- cor(x = exp_res, y = obs_res, method = method) - ## Effective degrees of freedom - n_eff <- s2dv::Eno(data = obs_res, time_dim = time_dim, na.action = na.pass, ncores = 1) + # Effective degrees of freedom + n_eff <- .Eno(x = obs_res, na.action = na.pass) ## Statistic - t <- abs(residual_cor$r) * sqrt(n_eff-2) / sqrt(1-residual_cor$r^2) + t <- abs(residual_cor$r) * sqrt(n_eff - 2) / sqrt(1 - residual_cor$r^2) - if(is.null(alpha)){ ## p-value - - residual_cor$p.val <- pt(q = t, df = n_eff-2, lower.tail = FALSE) - + if (is.null(alpha)) { # p-value + residual_cor$p.val <- pt(q = t, df = n_eff - 2, lower.tail = FALSE) } else { - - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) - if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ + t_alpha2_n2 <- qt(p = alpha / 2, df = n_eff - 2, lower.tail = FALSE) + if (!anyNA(c(t,t_alpha2_n2)) & t >= t_alpha2_n2) { residual_cor$sign <- TRUE } else { residual_cor$sign <- FALSE } - } - } else {## Significance + } else { # There is NA - if (is.null(alpha)){ + if (is.null(alpha)) { residual_cor <- list(r = NA, p.val = NA) } else { residual_cor <- list(r = NA, sign = NA) diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd new file mode 100644 index 0000000..6bc489c --- /dev/null +++ b/man/ResidualCorr.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ResidualCorr.R +\name{ResidualCorr} +\alias{ResidualCorr} +\title{Compute the residual correlation and its significance} +\usage{ +ResidualCorr( + exp, + ref, + obs, + time_dim = "year", + memb_dim = NULL, + method = "pearson", + alpha = NULL, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as "exp" except +'memb_dim'.} + +\item{obs}{A named numerical array of the observations with at least time +dimension. The dimensions must be the same as "exp" except 'memb_dim'.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'year'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean of the forecast and reference forecast. If it +is NULL, the ensemble mean should be provided directly to the function. The +default value is NULL.} + +\item{method}{A character string indicating the correlation coefficient to be +computed ("pearson", "kendall", or "spearman"). The default value is +"pearson".} + +\item{alpha}{A numeric of the significance level to be used in the statistical +significance test. If it is a numeric, "sign" will be returned. If NULL, the +p-value will be returned instead. The default value is NULL.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list with: +\item{$r}{ + A numerical array of the residual correlation with the same dimensions as + the input arrays except "time_dim" (and "memb_dim" if provided). +} +\item{$sign}{ + A logical array of the statistical significance of the residual correlation + with the same dimensions as the input arrays except "time_dim" (and + "memb_dim" if provided). Returned only if "alpha" is a numeric. +} +\item{$p.val}{ + A numeric array of the p-values with the same dimensions as the input arrays + except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is + NULL. +} +} +\description{ +The residual correlation assesses whether a forecast captures any of the +observed variability that is not already captured by a reference forecast +(Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.). +The procedure is as follows: the residuals of the forecasts and observations +are computed by linearly regressing out the reference forecast's ensemble mean +from the forecasts' ensemble mean and observations, respectively. Then, the +residual correlation is computed as the correlation between both residuals. +Positive values of the residual correlation indicate that the forecast capture +more observed variability than the reference forecast, while negative values +mean that the reference forecast capture more. The significance of the +residual correlation is computed with a two-sided t-test +(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using an +effective degrees of freedom to account for the time series' autocorrelation +(von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +res <- ResidualCorr(exp, ref, obs, memb_dim = 'member') + +} -- GitLab From 7d0833b866bc4ade28efdb9e6a54a56611741789 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 26 Apr 2022 19:01:58 +0200 Subject: [PATCH 7/9] Add params 'N.eff' and 'handle.na'; Change time_dim default to 'sdate'; create unit tests --- R/ResidualCorr.R | 144 +++++++++++++++------ man/ResidualCorr.Rd | 25 +++- tests/testthat/test-ResidualCorr.R | 197 +++++++++++++++++++++++++++++ 3 files changed, 324 insertions(+), 42 deletions(-) create mode 100644 tests/testthat/test-ResidualCorr.R diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index 2171da0..ba00994 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -22,6 +22,11 @@ #' 'memb_dim'. #'@param obs A named numerical array of the observations with at least time #' dimension. The dimensions must be the same as "exp" except 'memb_dim'. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "obs" except "time_dim" (for a particular N.eff to be used for each case) +#' . The default value is NA. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'year'. #'@param memb_dim A character string indicating the name of the member dimension @@ -34,11 +39,17 @@ #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. If it is a numeric, "sign" will be returned. If NULL, the #' p-value will be returned instead. The default value is NULL. +#'@param handle.na A charcater string indicating how to handle missing values. +#' If "return.na", NAs will be returned for the cases that contain at least one +#' NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +#' steps with no missing values in all "exp", "ref", and "obs" will be used. If +#' "na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +#' NA. The default value is "return.na". #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return A list with: -#'\item{$r}{ +#'\item{$res.corr}{ #' A numerical array of the residual correlation with the same dimensions as #' the input arrays except "time_dim" (and "memb_dim" if provided). #'} @@ -54,16 +65,17 @@ #'} #' #'@examples -#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) #' res <- ResidualCorr(exp, ref, obs, memb_dim = 'member') #' #'@import multiApply #'@export -ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, - method = 'pearson', alpha = NULL, ncores = NULL) { - +ResidualCorr <- function(exp, ref, obs, N.eff = NA, time_dim = 'sdate', + memb_dim = NULL, method = 'pearson', alpha = NULL, + handle.na = 'return.na', ncores = NULL) { + # Check inputs ## exp, ref, and obs (1) if (!is.array(exp) | !is.numeric(exp)) @@ -73,6 +85,19 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, if (!is.array(obs) | !is.numeric(obs)) stop('Parameter "obs" must be a numeric array.') + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop(paste0('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".')) + } + } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".')) + } + ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) stop('Parameter "time_dim" must be a character string.') @@ -97,7 +122,8 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, name_exp <- name_exp[-which(name_exp == memb_dim)] name_ref <- name_ref[-which(name_ref == memb_dim)] } - if (!all.equal(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { + if (!identical(length(name_exp), length(name_ref), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp', 'ref', and 'obs' must have same length of ", "all dimensions expect 'memb_dim'.")) } @@ -112,6 +138,10 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, stop('Parameter "alpha" must be NULL or a number between 0 and 1.') } } + ## handle.na + if (!handle.na %in% c('return.na', 'only.complete.triplets', 'na.fail')) { + stop('Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".') + } ## ncores if (!is.null(ncores)) { if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -122,6 +152,10 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, ############################### + # NA check: na.fail + if (handle.na == "na.fail" & (anyNA(exp) | anyNA(ref) | anyNA(obs))) + stop('The data contain NAs.') + # Calculate ensemble mean dim_exp <- dim(exp) dim_ref <- dim(ref) @@ -138,58 +172,94 @@ ResidualCorr <- function(exp, ref, obs, time_dim = 'year', memb_dim = NULL, # output_dims if (is.null(alpha)) { - output_dims <- list(r = NULL, p.val = NULL) + output_dims <- list(res.corr = NULL, p.val = NULL) } else { - output_dims <- list(r = NULL, sign = NULL) + output_dims <- list(res.corr = NULL, sign = NULL) } - ## Residual correlation and its significance - output <- Apply(data = list(exp = exp, ref = ref, obs = obs), - target_dims = time_dim, fun = .ResidualCorr, - method = method, - alpha = alpha, ncores = ncores) + + # Residual correlation + if (is.array(N.eff)) { + output <- Apply(data = list(exp = exp, ref = ref, + obs = obs, N.eff = N.eff), + target_dims = list(exp = time_dim, ref = time_dim, + obs = time_dim, N.eff = NULL), + output_dims = output_dims, + fun = .ResidualCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } else { + output <- Apply(data = list(exp = exp, ref = ref, + obs = obs), + target_dims = list(exp = time_dim, ref = time_dim, + obs = time_dim), + output_dims = output_dims, N.eff = N.eff, + fun = .ResidualCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } + return(output) } -.ResidualCorr <- function(exp, ref, obs, method, alpha) { +.ResidualCorr <- function(exp, ref, obs, N.eff, method, alpha, handle.na) { # exp and ref and obs: [time] + .residual.corr <- function(exp, ref, obs, method, N.eff, alpha) { - if (!anyNA(exp) & !anyNA(ref) & !anyNA(obs)) { - # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs') exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals obs_res <- lm(formula = y ~ x, data = list(y = obs, x = ref), na.action = NULL)$residuals # Residual correlation (and significance) - residual_cor <- NULL - residual_cor$r <- cor(x = exp_res, y = obs_res, method = method) + output <- NULL + output$res.corr <- cor(x = exp_res, y = obs_res, method = method) # Effective degrees of freedom - n_eff <- .Eno(x = obs_res, na.action = na.pass) - - ## Statistic - t <- abs(residual_cor$r) * sqrt(n_eff - 2) / sqrt(1 - residual_cor$r^2) + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs_res, na.action = na.pass) + } + t <- abs(output$res.corr) * sqrt(N.eff - 2) / sqrt(1 - output$res.corr^2) if (is.null(alpha)) { # p-value - residual_cor$p.val <- pt(q = t, df = n_eff - 2, lower.tail = FALSE) + output$p.val <- pt(q = t, df = N.eff - 2, lower.tail = FALSE) } else { - t_alpha2_n2 <- qt(p = alpha / 2, df = n_eff - 2, lower.tail = FALSE) - if (!anyNA(c(t,t_alpha2_n2)) & t >= t_alpha2_n2) { - residual_cor$sign <- TRUE + t_alpha2_n2 <- qt(p = alpha / 2, df = N.eff - 2, lower.tail = FALSE) + if (!anyNA(c(t, t_alpha2_n2)) & t >= t_alpha2_n2) { + output$sign <- TRUE } else { - residual_cor$sign <- FALSE + output$sign <- FALSE } } - } else { # There is NA - - if (is.null(alpha)) { - residual_cor <- list(r = NA, p.val = NA) - } else { - residual_cor <- list(r = NA, sign = NA) + return(output) + } + + + #================================================== + + if (anyNA(exp) | anyNA(ref) | anyNA(obs)) { ## There are NAs + if (handle.na == 'only.complete.triplets') { + nna <- is.na(exp) | is.na(ref) | is.na(obs) # A vector of T/F + if (all(nna)) stop("There is no complete set of forecasts and observations.") + # Remove the incomplete set + exp <- exp[!nna] + ref <- ref[!nna] + obs <- obs[!nna] + + output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, + N.eff = N.eff, alpha = alpha) + + } else if (handle.na == 'return.na') { + # Data contain NA, return NAs directly without passing to .diff.corr + if (is.null(alpha)) { + output <- list(res.corr = NA, p.val = NA) + } else { + output <- list(res.corr = NA, sign = NA) + } } + } else { ## There is no NA + output <- .residual.corr(exp = exp, ref = ref, obs = obs, method = method, + N.eff = N.eff, alpha = alpha) } - - return(residual_cor) + + return(output) } diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd index 6bc489c..1e0adad 100644 --- a/man/ResidualCorr.Rd +++ b/man/ResidualCorr.Rd @@ -8,10 +8,12 @@ ResidualCorr( exp, ref, obs, - time_dim = "year", + N.eff = NA, + time_dim = "sdate", memb_dim = NULL, method = "pearson", alpha = NULL, + handle.na = "return.na", ncores = NULL ) } @@ -26,6 +28,12 @@ least time dimension. The dimensions must be the same as "exp" except \item{obs}{A named numerical array of the observations with at least time dimension. The dimensions must be the same as "exp" except 'memb_dim'.} +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be NA (and it will be computed with the s2dv:::.Eno), a +numeric (which is used for all cases), or an array with the same dimensions +as "obs" except "time_dim" (for a particular N.eff to be used for each case) +. The default value is NA.} + \item{time_dim}{A character string indicating the name of the time dimension. The default value is 'year'.} @@ -42,12 +50,19 @@ computed ("pearson", "kendall", or "spearman"). The default value is significance test. If it is a numeric, "sign" will be returned. If NULL, the p-value will be returned instead. The default value is NULL.} +\item{handle.na}{A charcater string indicating how to handle missing values. +If "return.na", NAs will be returned for the cases that contain at least one +NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +steps with no missing values in all "exp", "ref", and "obs" will be used. If +"na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +NA. The default value is "return.na".} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ A list with: -\item{$r}{ +\item{$res.corr}{ A numerical array of the residual correlation with the same dimensions as the input arrays except "time_dim" (and "memb_dim" if provided). } @@ -79,9 +94,9 @@ effective degrees of freedom to account for the time series' autocorrelation (von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). } \examples{ -exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, year = 50)) -obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, year = 50)) +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) res <- ResidualCorr(exp, ref, obs, memb_dim = 'member') } diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R new file mode 100644 index 0000000..e1eec1a --- /dev/null +++ b/tests/testthat/test-ResidualCorr.R @@ -0,0 +1,197 @@ +context("s2dv::ResidualCorr tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(180), dim = c(lat = 3, lon = 2, memb = 3, sdate = 10)) +set.seed(2) +ref1 <- array(rnorm(120), dim = c(lat = 3, lon = 2, memb = 2, sdate = 10)) +set.seed(3) +obs1 <- array(rnorm(60), dim = c(lat = 3, lon = 2, sdate = 10)) +Neff1 <- array(rep(9:10, 3), dim = c(lat = 3, lon = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(2) +ref2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + ResidualCorr(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + ResidualCorr(exp1, c('a')), + 'Parameter "ref" must be a numeric array.' + ) + expect_error( + ResidualCorr(exp1, ref1, c()), + 'Parameter "obs" must be a numeric array.' + ) + # N.eff + expect_error( + ResidualCorr(exp1, ref1, obs1, N.eff = array(1, dim = c(lat = 2, lon = 2))), +'If parameter "N.eff" is provided with an array, it must have the same dimensions as "obs" except "time_dim".' + ) + expect_error( + ResidualCorr(exp1, ref1, obs1, N.eff = 1:3), + 'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".' + ) + # time_dim + expect_error( + ResidualCorr(exp1, ref1, obs1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + ResidualCorr(exp1, ref1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp', 'ref', or 'obs' dimension." + ) + # memb_dim + expect_error( + ResidualCorr(exp1, ref1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + ResidualCorr(exp1, ref1, obs1, memb_dim = 'member'), + "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + ResidualCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), obs1, memb_dim = 'memb'), + "Parameter 'exp', 'ref', and 'obs' must have same length of all dimensions expect 'memb_dim'." + ) + # method + expect_error( + ResidualCorr(exp1, ref1, obs1, method = 'asd', memb_dim = 'memb'), + 'Parameter "method" must be "pearson", "kendall", or "spearman".' + ) + # alpha + expect_error( + ResidualCorr(exp2, ref2, obs2, alpha = 1), + 'Parameter "alpha" must be NULL or a number between 0 and 1.' + ) + # handle.na + expect_error( + ResidualCorr(exp2, ref2, obs2, handle.na = TRUE), + 'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".' + ) + # ncores + expect_error( + ResidualCorr(exp2, ref2, obs2, ncores = 1.5), + 'Parameter "ncores" must be either NULL or a positive integer.' + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { +expect_equal( +names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')), +c("res.corr", "p.val") +) +expect_equal( +dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr), +c(lat = 3, lon = 2) +) +expect_equal( +dim(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p.val), +c(lat = 3, lon = 2) +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr), +c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$p), +c(0.49695468, 0.05446055, 0.25203961, 0.23522967, 0.16960864, 0.10618145), +tolerance = 0.0001 +) +expect_equal( +names(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)), +c("res.corr", "sign") +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$res.corr), +c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign), +rep(FALSE, 6) +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$res), +c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$p), +c(0.49716394, 0.05446055, 0.26690777, 0.23522967, 0.18671025, 0.10618145), +tolerance = 0.0001 +) + + +#--------------------------- +exp1[1] <- NA +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb')$res.corr), +c(NA, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$res.corr), +c(-0.1082588, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_error( +ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'na.fail'), +"The data contain NAs." +) + +exp1[1,1,1,1:3] <- NA +ref1[1,1,1,4:8] <- NA +obs1[1,1,9:10] <- NA +expect_error( +ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), +"There is no complete set of forecasts and observations." +) + + +}) + +############################################## + +test_that("3. Output checks: dat2", { +expect_equal( +names(ResidualCorr(exp2, ref2, obs2)), +c("res.corr", "p.val") +) +expect_equal( +dim(ResidualCorr(exp2, ref2, obs2)$res.corr), +NULL +) +expect_equal( +dim(ResidualCorr(exp2, ref2, obs2)$p), +NULL +) +expect_equal( +ResidualCorr(exp2, ref2, obs2)$p, +0.2209847, +tolerance = 0.0001 +) +expect_equal( +ResidualCorr(exp2, ref2, obs2)$res, +-0.274962, +tolerance = 0.0001 +) + +}) -- GitLab From c27f3bb6210b04fe3ff545a1010019a631d7312b Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 26 Apr 2022 19:15:11 +0200 Subject: [PATCH 8/9] Add unit tests for method kendall and spearman --- tests/testthat/test-ResidualCorr.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R index e1eec1a..c9ac4b9 100644 --- a/tests/testthat/test-ResidualCorr.R +++ b/tests/testthat/test-ResidualCorr.R @@ -128,6 +128,28 @@ expect_equal( as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', alpha = 0.05)$sign), rep(FALSE, 6) ) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$res), +c(0.11111111, 0.42222222, -0.20000000, 0.02222222, 0.20000000, 0.42222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "kendall")$p), +c(0.3799615, 0.1120891, 0.2897920, 0.4757064, 0.2897920, 0.1120891), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$res), +c(0.13939394, 0.61212121, -0.27272727, 0.07878788, 0.28484848, 0.47878788), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', method = "spearman")$p), +c(0.35046594, 0.02998607, 0.22291917, 0.41435870, 0.21251908, 0.08076146), +tolerance = 0.0001 +) + + expect_equal( as.vector(ResidualCorr(exp1, ref1, obs1, memb_dim = 'memb', N.eff = Neff1)$res), c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), -- GitLab From 090925f232660f25eeb246671adf1459633d0dea Mon Sep 17 00:00:00 2001 From: Carlos Delgado Torres Date: Wed, 27 Apr 2022 17:13:37 +0200 Subject: [PATCH 9/9] clarified "$sign" output --- R/ResidualCorr.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index ba00994..dae0cb8 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -54,9 +54,9 @@ #' the input arrays except "time_dim" (and "memb_dim" if provided). #'} #'\item{$sign}{ -#' A logical array of the statistical significance of the residual correlation -#' with the same dimensions as the input arrays except "time_dim" (and -#' "memb_dim" if provided). Returned only if "alpha" is a numeric. +#' A logical array indicating whether the residual correlation is statistically +#' significant or not with the same dimensions as the input arrays except "time_dim" +#' (and "memb_dim" if provided). Returned only if "alpha" is a numeric. #'} #'\item{$p.val}{ #' A numeric array of the p-values with the same dimensions as the input arrays -- GitLab