diff --git a/R/CRPSS.R b/R/CRPSS.R index 5c901ac24c3c206b128a6f19abab0f5fcaa5b706..5aa339941d231cfdb591a626b46b7e89b4370891 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -48,6 +48,12 @@ #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +#' (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -80,7 +86,7 @@ #'@export CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, clim.cross.val = TRUE, sig_method.type = 'two.sided.approx', - alpha = 0.05, ncores = NULL) { + alpha = 0.05, N.eff = NA, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -193,6 +199,22 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", "= 0.05 only. Returning the significance at the 0.05 significance level.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } + if (!isFALSE(N.eff) & sig_method.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "sig_method.type" is "two.sided.approx".') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -218,26 +240,64 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', target_dims = list(exp = c(time_dim, memb_dim, dat_dim), obs = c(time_dim, dat_dim)) } - output <- Apply(data, - target_dims = target_dims, - fun = .CRPSS, - time_dim = time_dim, memb_dim = memb_dim, - dat_dim = dat_dim, - Fair = Fair, clim.cross.val = clim.cross.val, - sig_method.type = sig_method.type, alpha = alpha, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } + } else { # N.eff not an array + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } + } return(output) } .CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, clim.cross.val = TRUE, - sig_method.type = 'two.sided.approx', alpha = 0.05) { + sig_method.type = 'two.sided.approx', alpha = 0.05, N.eff = NA) { # exp: [sdate, memb, (dat)] # obs: [sdate, (dat)] # ref: [sdate, memb, (dat)] or NULL - + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -334,29 +394,38 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[j] + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } else { for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[i, j] + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[i, j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } - } else { + } else { # dat_dim = NULL crpss <- 1 - mean(crps_exp) / mean(crps_ref) # Significance + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref, test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } return(list(crpss = crpss, sign = sign)) diff --git a/R/MSSS.R b/R/MSSS.R index f97d91f47cf20e2333560a5a2449473d201ee7cc..c37e66faef6e16a68d0e11fb1209ced344579dea 100644 --- a/R/MSSS.R +++ b/R/MSSS.R @@ -41,6 +41,12 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test with the Random Walk. It can be NA (and it will be computed with the +#' s2dv:::.Eno), FALSE (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". #'@param sig_method.type A character string indicating the test type of the @@ -87,7 +93,7 @@ #'@importFrom stats pf #'@export MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, - memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs @@ -172,6 +178,19 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } ## sig_method if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") @@ -289,20 +308,32 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) } - res <- Apply(data, - target_dims = target_dims, - fun = .MSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, sig_method.type = sig_method.type, - ncores = ncores) + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } else { + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, N.eff = N.eff, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } return(res) } .MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', - sig_method.type = NULL) { + sign = FALSE, alpha = 0.05, N.eff = NA, + sig_method = 'one-sided Fisher', sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] # ref: [sdate, (dat)] or NULL @@ -413,8 +444,11 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = sig_method.type, + test.type = sig_method.type, N.eff = N.eff, pval = pval, sign = sign, alpha = alpha) if (sign) signif[i, j] <- aux$sign if (pval) p_val[i, j] <- aux$p.val diff --git a/R/RMSSS.R b/R/RMSSS.R index c322c7fdd7aa0f0b01da91b82096a429ddd71f01..105789aa619c768c746dbe3cae9ce3c4140d1f05 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -42,6 +42,12 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test with the Random Walk. It can be NA (and it will be computed with the +#' s2dv:::.Eno), FALSE (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". #'@param sig_method.type A character string indicating the test type of the @@ -97,7 +103,7 @@ #'@importFrom stats pf #'@export RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, - memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs @@ -182,6 +188,22 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } + if (sig_method=='Random Walk' & !isFALSE(N.eff) & sig_method.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "sig_method.type" is "two.sided.approx".') + } ## sig_method if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") @@ -302,20 +324,32 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } else { target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) } - - res <- Apply(data, - target_dims = target_dims, - fun = .RMSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, sig_method.type = sig_method.type, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } else { + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, N.eff = N.eff, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } return(res) } .RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] @@ -427,8 +461,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = sig_method.type, + test.type = sig_method.type, N.eff = N.eff, pval = pval, sign = sign, alpha = alpha) if (sign) signif[i, j] <- aux$sign if (pval) p_val[i, j] <- aux$p.val diff --git a/R/RPSS.R b/R/RPSS.R index de4e2575c2201ee337f3b0124ec4d828b4fbd2cc..907fdbee0b35c77eb46cf67a946a6304b74f2033 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -79,6 +79,12 @@ #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +#' (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -128,8 +134,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, na.rm = FALSE, - sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + cross.val = FALSE, na.rm = FALSE, sig_method.type = 'two.sided.approx', + alpha = 0.05, N.eff = NA, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -352,6 +358,27 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", "= 0.05 only. Returning the significance at the 0.05 significance level.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } + if (!isFALSE(N.eff) & sig_method.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "sig_method.type" is "two.sided.approx".') + } + if (identical(N.eff,NA) & !is.null(cat_dim)){ + stop('"N.eff" cannot be NA if probabilities are already provided ', + '(cat_dim != NULL). Please compute "N.eff" with s2dv::Eno and ', + 'provide this function with them.') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -395,19 +422,68 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', target_dims = list(exp = target_dims_exp, obs = target_dims_obs) } - - output <- Apply(data, - target_dims = target_dims, - fun = .RPSS, - time_dim = time_dim, memb_dim = memb_dim, - cat_dim = cat_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - weights_exp = weights_exp, - weights_ref = weights_ref, - cross.val = cross.val, - na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } + } else { # N.eff not an array + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } + } return(output) @@ -416,7 +492,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, - na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05, N.eff = NA) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -596,10 +672,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!any(ind_nonNA)) { sign[i, j] <- NA } else { + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, i, j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } @@ -617,10 +696,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } else { # rps_exp and rps_ref: [sdate] rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 16d89f6d8b34bf824188677dd4f1728823725eca..e0d0e1bfad56d53eb58e3064641c57cdcac76b60 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -28,6 +28,11 @@ #' significance test. The default value is TRUE. #'@param sign A logical value indicating whether to return the statistical #' significance of the test based on 'alpha'. The default value is FALSE. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be FALSE (and the length of the time series will be used), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "skill_A" except "time_dim" (for a particular N.eff to be used for each +#' case). The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -87,7 +92,7 @@ #'@export RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, - sign = FALSE, ncores = NULL) { + sign = FALSE, N.eff = FALSE, ncores = NULL) { # Check inputs ## skill_A and skill_B @@ -126,6 +131,21 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } sign <- TRUE } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | + any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "skill_A" except "time_dim".') + } + } else if (any((!isFALSE(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be FALSE, a numeric, or an array with ', + 'the same dimensions as "skill_A" except "time_dim".') + } + if (!isFALSE(N.eff) & test.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "test.type" is "two.sided.approx".') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -134,23 +154,34 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } ## Compute the Random Walk Test - res <- Apply(data = list(skill_A = skill_A, - skill_B = skill_B), - target_dims = list(skill_A = time_dim, - skill_B = time_dim), - fun = .RandomWalkTest, - test.type = test.type, - alpha = alpha, pval = pval, sign = sign, - ncores = ncores) + if (is.array(N.eff)) { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B, + N.eff = N.eff), + target_dims = list(skill_A = time_dim, + skill_B = time_dim, + N.eff = NULL), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + } else { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + N.eff = N.eff, ncores = ncores) + } return(res) } .RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', - alpha = 0.05, pval = TRUE, sign = FALSE) { + alpha = 0.05, pval = TRUE, N.eff = FALSE, sign = FALSE) { #skill_A and skill_B: [sdate] - - N.eff <- length(skill_A) A_better <- sum(skill_B > skill_A) B_better <- sum(skill_B < skill_A) @@ -159,14 +190,16 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', output$score <- A_better - B_better if (test.type == 'two.sided.approx') { - output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + output$sign <- abs(output$score) > (2 * sqrt(length(skill_A))) } else { - if (!is.na(output$score)) { - p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + if (isFALSE(N.eff)){N.eff <- length(skill_A)} + + if (!is.na(output$score) & N.eff > A_better) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, + conf.level = 1 - alpha, alternative = test.type)$p.value - } else { p.val <- NA }