From b3b1f3f5e625820c0ab3f0498727a97da5ae2dae Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 May 2024 15:50:26 +0200 Subject: [PATCH 1/2] included N.eff for RandomWalkTest --- R/CRPSS.R | 99 +++++++++++++++++++++++++++++++------ R/MSSS.R | 56 ++++++++++++++++----- R/RMSSS.R | 59 ++++++++++++++++++----- R/RPSS.R | 118 ++++++++++++++++++++++++++++++++++++++------- R/RandomWalkTest.R | 64 ++++++++++++++++++------ 5 files changed, 327 insertions(+), 69 deletions(-) diff --git a/R/CRPSS.R b/R/CRPSS.R index 5c901ac..5aa3399 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -48,6 +48,12 @@ #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +#' (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -80,7 +86,7 @@ #'@export CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, clim.cross.val = TRUE, sig_method.type = 'two.sided.approx', - alpha = 0.05, ncores = NULL) { + alpha = 0.05, N.eff = NA, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -193,6 +199,22 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", "= 0.05 only. Returning the significance at the 0.05 significance level.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } + if (!isFALSE(N.eff) & sig_method.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "sig_method.type" is "two.sided.approx".') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -218,26 +240,64 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', target_dims = list(exp = c(time_dim, memb_dim, dat_dim), obs = c(time_dim, dat_dim)) } - output <- Apply(data, - target_dims = target_dims, - fun = .CRPSS, - time_dim = time_dim, memb_dim = memb_dim, - dat_dim = dat_dim, - Fair = Fair, clim.cross.val = clim.cross.val, - sig_method.type = sig_method.type, alpha = alpha, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } + } else { # N.eff not an array + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } + } return(output) } .CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, clim.cross.val = TRUE, - sig_method.type = 'two.sided.approx', alpha = 0.05) { + sig_method.type = 'two.sided.approx', alpha = 0.05, N.eff = NA) { # exp: [sdate, memb, (dat)] # obs: [sdate, (dat)] # ref: [sdate, memb, (dat)] or NULL - + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -334,29 +394,38 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[j] + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } else { for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[i, j] + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[i, j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } - } else { + } else { # dat_dim = NULL crpss <- 1 - mean(crps_exp) / mean(crps_ref) # Significance + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref, test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } return(list(crpss = crpss, sign = sign)) diff --git a/R/MSSS.R b/R/MSSS.R index f97d91f..c37e66f 100644 --- a/R/MSSS.R +++ b/R/MSSS.R @@ -41,6 +41,12 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test with the Random Walk. It can be NA (and it will be computed with the +#' s2dv:::.Eno), FALSE (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". #'@param sig_method.type A character string indicating the test type of the @@ -87,7 +93,7 @@ #'@importFrom stats pf #'@export MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, - memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs @@ -172,6 +178,19 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } ## sig_method if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") @@ -289,20 +308,32 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) } - res <- Apply(data, - target_dims = target_dims, - fun = .MSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, sig_method.type = sig_method.type, - ncores = ncores) + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } else { + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, N.eff = N.eff, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } return(res) } .MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', - sig_method.type = NULL) { + sign = FALSE, alpha = 0.05, N.eff = NA, + sig_method = 'one-sided Fisher', sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] # ref: [sdate, (dat)] or NULL @@ -413,8 +444,11 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = sig_method.type, + test.type = sig_method.type, N.eff = N.eff, pval = pval, sign = sign, alpha = alpha) if (sign) signif[i, j] <- aux$sign if (pval) p_val[i, j] <- aux$p.val diff --git a/R/RMSSS.R b/R/RMSSS.R index c322c7f..105789a 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -42,6 +42,12 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test with the Random Walk. It can be NA (and it will be computed with the +#' s2dv:::.Eno), FALSE (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". #'@param sig_method.type A character string indicating the test type of the @@ -97,7 +103,7 @@ #'@importFrom stats pf #'@export RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, - memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs @@ -182,6 +188,22 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } + if (sig_method=='Random Walk' & !isFALSE(N.eff) & sig_method.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "sig_method.type" is "two.sided.approx".') + } ## sig_method if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") @@ -302,20 +324,32 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } else { target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) } - - res <- Apply(data, - target_dims = target_dims, - fun = .RMSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, sig_method.type = sig_method.type, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } else { + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, N.eff = N.eff, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } return(res) } .RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] @@ -427,8 +461,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = sig_method.type, + test.type = sig_method.type, N.eff = N.eff, pval = pval, sign = sign, alpha = alpha) if (sign) signif[i, j] <- aux$sign if (pval) p_val[i, j] <- aux$p.val diff --git a/R/RPSS.R b/R/RPSS.R index de4e257..907fdbe 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -79,6 +79,12 @@ #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +#' (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -128,8 +134,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, na.rm = FALSE, - sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + cross.val = FALSE, na.rm = FALSE, sig_method.type = 'two.sided.approx', + alpha = 0.05, N.eff = NA, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -352,6 +358,27 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", "= 0.05 only. Returning the significance at the 0.05 significance level.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } + if (!isFALSE(N.eff) & sig_method.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "sig_method.type" is "two.sided.approx".') + } + if (identical(N.eff,NA) & !is.null(cat_dim)){ + stop('"N.eff" cannot be NA if probabilities are already provided ', + '(cat_dim != NULL). Please compute "N.eff" with s2dv::Eno and ', + 'provide this function with them.') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -395,19 +422,68 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', target_dims = list(exp = target_dims_exp, obs = target_dims_obs) } - - output <- Apply(data, - target_dims = target_dims, - fun = .RPSS, - time_dim = time_dim, memb_dim = memb_dim, - cat_dim = cat_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - weights_exp = weights_exp, - weights_ref = weights_ref, - cross.val = cross.val, - na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } + } else { # N.eff not an array + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } + } return(output) @@ -416,7 +492,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, - na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05, N.eff = NA) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -596,10 +672,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!any(ind_nonNA)) { sign[i, j] <- NA } else { + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, i, j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } @@ -617,10 +696,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } else { # rps_exp and rps_ref: [sdate] rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 16d89f6..b041362 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -28,6 +28,11 @@ #' significance test. The default value is TRUE. #'@param sign A logical value indicating whether to return the statistical #' significance of the test based on 'alpha'. The default value is FALSE. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be FALSE (and the length of the time series will be used), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "skill_A" except "time_dim" (for a particular N.eff to be used for each +#' case). The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -87,7 +92,7 @@ #'@export RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, - sign = FALSE, ncores = NULL) { + sign = FALSE, N.eff = FALSE, ncores = NULL) { # Check inputs ## skill_A and skill_B @@ -126,6 +131,21 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } sign <- TRUE } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | + any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "skill_A" except "time_dim".') + } + } else if (any((!isFALSE(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be FALSE, a numeric, or an array with ', + 'the same dimensions as "skill_A" except "time_dim".') + } + if (!isFALSE(N.eff) & test.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "test.type" is "two.sided.approx".') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -134,23 +154,34 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } ## Compute the Random Walk Test - res <- Apply(data = list(skill_A = skill_A, - skill_B = skill_B), - target_dims = list(skill_A = time_dim, - skill_B = time_dim), - fun = .RandomWalkTest, - test.type = test.type, - alpha = alpha, pval = pval, sign = sign, - ncores = ncores) + if (is.array(N.eff)) { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B, + N.eff = N.eff), + target_dims = list(skill_A = time_dim, + skill_B = time_dim, + N.eff = NULL), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + } else { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + N.eff = N.eff, ncores = ncores) + } return(res) } .RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', - alpha = 0.05, pval = TRUE, sign = FALSE) { + alpha = 0.05, pval = TRUE, N.eff = FALSE, sign = FALSE) { #skill_A and skill_B: [sdate] - - N.eff <- length(skill_A) A_better <- sum(skill_B > skill_A) B_better <- sum(skill_B < skill_A) @@ -159,12 +190,17 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', output$score <- A_better - B_better if (test.type == 'two.sided.approx') { - output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + output$sign <- abs(output$score) > (2 * sqrt(length(skill_A))) } else { + if (isFALSE(N.eff)){N.eff <- length(skill_A)} + + if (N.eff < A_better){A_better <- N.eff} + if (!is.na(output$score)) { - p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, + conf.level = 1 - alpha, alternative = test.type)$p.value } else { -- GitLab From c34a865bd9c3032a9a8ce87898975b10903da11f Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 4 Jun 2024 12:10:05 +0200 Subject: [PATCH 2/2] corrected function for case that A_better > N_eff --- R/RandomWalkTest.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index b041362..e0d0e1b 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -196,13 +196,10 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', if (isFALSE(N.eff)){N.eff <- length(skill_A)} - if (N.eff < A_better){A_better <- N.eff} - - if (!is.na(output$score)) { + if (!is.na(output$score) & N.eff > A_better) { p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, alternative = test.type)$p.value - } else { p.val <- NA } -- GitLab