From d1e614c7edf1415e59f72a210e9fa467c8b0260e Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Tue, 6 Jul 2021 17:58:06 +0200 Subject: [PATCH 01/11] included the posibility to take into account the effective degrees of freedom --- R/RandomWalkTest.R | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 494be65..f747f87 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -10,6 +10,9 @@ #' forecaster B's. The dimensions should be identical as parameter 'skill_A'. #'@param time_dim A character string indicating the name of the dimension along #' which the tests are computed. The default value is 'sdate'. +#'@param N.eff A numerical array with the number of effective degrees of freedom. +#' The dimensions should be the same as 'skill_A' and 'skill_B' but without the 'time_dim' dimension. +#' If NA (default), the length of the 'time_dim' dimension is used. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -34,11 +37,11 @@ #' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) #' skill_A <- abs(fcst_A - reference) #' skill_B <- abs(fcst_B - reference) -#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', N.eff = NA, ncores = 1) #' #'@import multiApply #'@export -RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, ncores = NULL){ ## Check inputs if (is.null(skill_A) | is.null(skill_B)){ @@ -56,6 +59,11 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") } + if(!identical(N.eff,NA)){ + if (!identical(dim(N.eff), dim(skill_A)[-which(names(dim(skill_A)) == time_dim)]) | !is.numeric(N.eff)){ + stop("Parameter 'N.eff' must be NA, or an array with the same dimensions as 'skill_A' and 'skill_B' but without the 'time_dim' dimension") + } + } if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ stop("Parameter 'ncores' must be a positive integer.") @@ -63,20 +71,21 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ } ## Compute the Random Walk Test - res <- multiApply::Apply(data = list(skill_A, skill_B), - target_dims = time_dim, - fun = .RandomWalkTest, - ncores = ncores) + res <- multiApply::Apply(data = list(skill_A = skill_A, skill_B = skill_B, N.eff = N.eff), + target_dims = list(skill_A = time_dim, skill_B = time_dim, N.eff = NULL), + fun = .RandomWalkTest, ncores = ncores) return(res) } -.RandomWalkTest <- function(skill_A, skill_B){ +.RandomWalkTest <- function(skill_A, skill_B, N.eff){ + + N <- ifelse(test = is.na(N.eff), yes = length(skill_A), no = N.eff) + + score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + + ## TRUE if significant (if last value is above or below 2*sqrt(N)) + signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(N)) | (score[length(skill_A)] > 2*sqrt(N)), + yes = TRUE, no = FALSE) - score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) - - ## TRUE if significant (if last value is above or below 2*sqrt(N)) - signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), - yes = TRUE, no = FALSE) - - return(list("score"=score[length(skill_A)],"signif"=signif)) + return(list("score"=score[length(skill_A)],"signif"=signif)) } -- GitLab From 20206933fbbd0f74e029085f046c9e9b84525599 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 29 Dec 2022 16:34:45 +0100 Subject: [PATCH 02/11] first version including N.eff, test.type and alpha --- R/RandomWalkTest.R | 155 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 120 insertions(+), 35 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 1d39d59..a96c358 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -1,49 +1,72 @@ -#'Random walk test for skill differences +#'Random Walk test for skill differences #' #'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common reference) based on Random Walks, with significance estimate at the 95% -#'confidence level, as in DelSole and Tippett (2016). +#'common observational reference) based on Random Walks (DelSole and Tippett, 2016). +#'The function is implemented for negatively oriented scores (i.e., the lower the +#'better). Examples of negatively oriented scores are the RPS, RMSE and the Error. +#'In case of positively oriented scores (i.e., the higher the better), please +#'multiply skill_A and skill_B by -1. #' -#'@param skill_A A numerical array of the time series of the skill with the -#' forecaster A's. -#'@param skill_B A numerical array of the time series of the skill with the -#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. +#'@param skill_A A numerical array of the time series of the scores obtained +#' with the forecaster A. +#'@param skill_B A numerical array of the time series of the scores obtained +#' with the forecaster B. The dimensions should be identical as parameter 'skill_A'. #'@param time_dim A character string indicating the name of the dimension along #' which the tests are computed. The default value is 'sdate'. -#'@param N.eff A numerical array with the number of effective degrees of freedom. -#' The dimensions should be the same as 'skill_A' and 'skill_B' but without the 'time_dim' dimension. -#' If NA (default), the length of the 'time_dim' dimension is used. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (to use the length of the "time_dim" dimension) or an +#' array with the same dimensions as "skill_A" except "time_dim" (for a +#' particular N.eff to be used for each case). The default value is NA. +#'@param test.type A character string indicating the type of significance test. +#' It can be "two-sided" (to assess whether "skill_A" and "skill_B" are +#' significantly different) or "one-sided" (to assess whether "skill_A" is +#' significantly better than "skill_B"). In case of "one-sided" test, it matters +#' whether the scores are positively or negatively oriented. The one-sided test +#' will be performed assuming negatively oriented scores. The default value is +#' "two-sided". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. If it is a numeric, the significance will be returned. +#' If NULL, the p-value will be returned instead. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' -#'@return A list of 2: +#'@return A list with: #'\item{$score}{ #' A numerical array with the same dimensions as the input arrays except #' 'time_dim'. The number of times that forecaster A has been better than #' forecaster B minus the number of times that forecaster B has been better -#' than forecaster A (for skill positively oriented). If $score is positive -#' forecaster A is better than forecaster B, and if $score is negative -#' forecaster B is better than forecaster B. +#' than forecaster A (for skill negatively oriented, i.e., the lower the +#' better). If $score is positive, forecaster A has been better more times +#' than forecaster B. If $score is negative, forecaster B has been better more +#' times than forecaster A. #'} #'\item{$signif}{ -#' A logical array with the same dimensions as the input arrays except -#' 'time_dim'. Whether the difference is significant or not at the 5% -#' significance level. +#' A logical array of the statistical significance with the same dimensions +#' as the input arrays except "time_dim". Returned only if "alpha" is a numeric. #'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim". Returned only if "alpha" is NULL. +#'} +#' +#'@references +#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 #' #'@examples -#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) -#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) -#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) #' skill_A <- abs(fcst_A - reference) #' skill_B <- abs(fcst_B - reference) #' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', N.eff = NA, ncores = 1) #' #'@import multiApply #'@export -RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, ncores = NULL){ +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, + test.type = 'two-sided', alpha = 0.05, ncores = NULL){ ## Check inputs + ## skill_A and skill_B if (is.null(skill_A) | is.null(skill_B)){ stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") } @@ -53,17 +76,37 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, nco if (!identical(dim(skill_A),dim(skill_B))){ stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") } - if(!is.character(time_dim)){ + ## time_dim + if(!is.character(time_dim) | length(time_dim) != 1){ stop("Parameter 'time_dim' must be a character string.") } if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") } - if(!identical(N.eff,NA)){ - if (!identical(dim(N.eff), dim(skill_A)[-which(names(dim(skill_A)) == time_dim)]) | !is.numeric(N.eff)){ - stop("Parameter 'N.eff' must be NA, or an array with the same dimensions as 'skill_A' and 'skill_B' but without the 'time_dim' dimension") + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | + any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { + stop(paste0('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "skill_A" except "time_dim".')) + } + } else if (!is.na(N.eff)) { + stop(paste0('Parameter "N.eff" must be NA or an array with ', + 'the same dimensions as "skill_A" except "time_dim".')) + } + ## test.type + if (!test.type %in% c('two-sided', 'one-sided')) { + stop("Parameter 'test.type' must be 'two-sided' or 'one-sided'.") + } + ## alpha + if (!is.null(alpha)) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | + length(alpha) > 1)) { + stop('Parameter "alpha" must be NULL or a number between 0 and 1.') } } + ## ncores if (!is.null(ncores)){ if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ stop("Parameter 'ncores' must be a positive integer.") @@ -71,21 +114,63 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, nco } ## Compute the Random Walk Test - res <- multiApply::Apply(data = list(skill_A = skill_A, skill_B = skill_B, N.eff = N.eff), - target_dims = list(skill_A = time_dim, skill_B = time_dim, N.eff = NULL), - fun = .RandomWalkTest, ncores = ncores) + res <- multiApply::Apply(data = list(skill_A = skill_A, + skill_B = skill_B, + N.eff = N.eff), + target_dims = list(skill_A = time_dim, + skill_B = time_dim, + N.eff = NULL), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, + ncores = ncores) return(res) } -.RandomWalkTest <- function(skill_A, skill_B, N.eff){ +.RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two-sided', alpha = 0.05){ - N <- ifelse(test = is.na(N.eff), yes = length(skill_A), no = N.eff) + ## For one-sided test, this function assumes that the scores are negatively oriented, i.e., the lower the better + ## Thus, significance may only be reached if score<0 (i.e., forecaster A is better than forecaster B) + ## For two-sided test, it does not matter the orientation of the scores - score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + ## DelSole and Tippett (2016) approximation for two-sided test at 95% confidence level: significant if score is above or below 2*sqrt(N) + # signif<- ifelse(test = (score < (-2)*sqrt(N.eff)) | (score > 2*sqrt(N.eff)), yes = TRUE, no = FALSE) - ## TRUE if significant (if last value is above or below 2*sqrt(N)) - signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(N)) | (score[length(skill_A)] > 2*sqrt(N)), - yes = TRUE, no = FALSE) + if (is.na(N.eff)){ + N.eff <- length(skill_A) + } + + output <- NULL + output$score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + output$score <- -output$score[length(skill_A)] ## negative sign for the output to be positive in case A has been better more times + + if (test.type == 'one-sided'){ + + ## H0: forecaster A is not better than forecaster B + ## H1: forecaster A is better than forecaster B + + p.val <- pbinom(q = output$score, size = N.eff, prob = 0.5, lower.tail = FALSE, log.p = FALSE) + + if (is.null(alpha)){ + output$p.val <- p.val + } else { + output$sign <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + } + + } else if (test.type == 'two-sided'){ + + ## H0: forecaster A and forecaster B are not different in terms of skill + ## H1: forecaster A and forecaster B are different in terms of skill + + p.val <- pbinom(q = abs(output$score), size = N.eff, prob = 0.5, lower.tail = FALSE, log.p = FALSE) + + if (is.null(alpha)){ + output$p.val <- p.val + } else { + output$sign <- ifelse(!is.na(p.val) & p.val <= alpha / 2, TRUE, FALSE) + } + + } else {stop("Parameter 'test.type' is not supported.")} - return(list("score"=score[length(skill_A)],"signif"=signif)) + return(list("score"=score, "signif"=signif)) } -- GitLab From e7a33819fdaaade7a1411024c42c931dba085074 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 29 Dec 2022 16:52:34 +0100 Subject: [PATCH 03/11] fixed output --- R/RandomWalkTest.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index a96c358..d40a7d4 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -154,7 +154,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, if (is.null(alpha)){ output$p.val <- p.val } else { - output$sign <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + output$signif <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) } } else if (test.type == 'two-sided'){ @@ -167,10 +167,10 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, if (is.null(alpha)){ output$p.val <- p.val } else { - output$sign <- ifelse(!is.na(p.val) & p.val <= alpha / 2, TRUE, FALSE) + output$signif <- ifelse(!is.na(p.val) & p.val <= alpha / 2, TRUE, FALSE) } } else {stop("Parameter 'test.type' is not supported.")} - return(list("score"=score, "signif"=signif)) + return(output) } -- GitLab From c3ea43d56b18365d669ec289b7c2ac2b5deaa521 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 30 Dec 2022 16:43:36 +0100 Subject: [PATCH 04/11] using binom.test for both one-sided and two-sided --- R/RandomWalkTest.R | 108 +++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index d40a7d4..30f820b 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -2,10 +2,6 @@ #' #'Forecast comparison of the skill obtained with 2 forecasts (with respect to a #'common observational reference) based on Random Walks (DelSole and Tippett, 2016). -#'The function is implemented for negatively oriented scores (i.e., the lower the -#'better). Examples of negatively oriented scores are the RPS, RMSE and the Error. -#'In case of positively oriented scores (i.e., the higher the better), please -#'multiply skill_A and skill_B by -1. #' #'@param skill_A A numerical array of the time series of the scores obtained #' with the forecaster A. @@ -18,12 +14,13 @@ #' array with the same dimensions as "skill_A" except "time_dim" (for a #' particular N.eff to be used for each case). The default value is NA. #'@param test.type A character string indicating the type of significance test. -#' It can be "two-sided" (to assess whether "skill_A" and "skill_B" are -#' significantly different) or "one-sided" (to assess whether "skill_A" is -#' significantly better than "skill_B"). In case of "one-sided" test, it matters -#' whether the scores are positively or negatively oriented. The one-sided test -#' will be performed assuming negatively oriented scores. The default value is -#' "two-sided". +#' It can be "two.sided" (to assess whether forecaster A and forecaster B are +#' significantly different in terms of skill with a two-sided test), "greater" +#' (to assess whether forecaster A shows significantly better skill than +#' forecaster B with a one-sided test for negatively oriented scores), or "less" +#' (to assess whether forecaster A shows significantly better skill than +#' forecaster B with a one-sided test for positively oriented scores). The +#' default value is "two.sided". #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. If it is a numeric, the significance will be returned. #' If NULL, the p-value will be returned instead. The default value is 0.05. @@ -49,6 +46,23 @@ #' except "time_dim". Returned only if "alpha" is NULL. #'} #' +#'@details +#' Null and alternative hypothesis for "two-sided" test (regardless of the +#' orientation of the scores): +#' H0: forecaster A and forecaster B are not different in terms of skill +#' H1: forecaster A and forecaster B are different in terms of skill +#' +#' Null and alternative hypothesis for one-sided "greater" (for negatively +#' oriented scores, i.e., the lower the better) and "less" (for positively +#' oriented scores, i.e., the higher the better) tests: +#' H0: forecaster A is not better than forecaster B +#' H1: forecaster A is better than forecaster B +#' +#' Examples of negatively oriented scores are the RPS, RMSE and the Error. +#' +#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +#' level: significant if score is above 2sqrt(N) or below -2sqrt(N) +#' #'@references #'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 #' @@ -56,14 +70,15 @@ #' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) #' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) #' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) -#' skill_A <- abs(fcst_A - reference) -#' skill_B <- abs(fcst_B - reference) -#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', N.eff = NA, ncores = 1) +#' scores_A <- abs(fcst_A - reference) +#' scores_B <- abs(fcst_B - reference) +#' RandomWalkTest(skill_A = scores_A, skill_B = scores_B) #' #'@import multiApply +#'@importFrom ClimProjDiags Subset #'@export RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, - test.type = 'two-sided', alpha = 0.05, ncores = NULL){ + test.type = 'two.sided', alpha = 0.05, ncores = NULL){ ## Check inputs ## skill_A and skill_B @@ -96,8 +111,12 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, 'the same dimensions as "skill_A" except "time_dim".')) } ## test.type - if (!test.type %in% c('two-sided', 'one-sided')) { - stop("Parameter 'test.type' must be 'two-sided' or 'one-sided'.") + if (!test.type %in% c('two.sided', 'greater','less')) { + stop(paste0("Parameter 'test.type' must be 'two.sided' (for two-sided test ", + "regardless of the orientation of the scores), 'greater' (for ", + "one-sided test for negatively oriented scores, i.e., the lower ", + "the better) or 'less' (for one-sided test for positively ", + "oriented scores, i.e., the higher the better).")) } ## alpha if (!is.null(alpha)) { @@ -124,53 +143,36 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, test.type = test.type, alpha = alpha, ncores = ncores) + + ## to remove the extra dimension created because of target_dims(N.eff) = NULL + for (n in names(res)){ + res[[n]] <- ClimProjDiags::Subset(x = res[[n]], along = which(names(dim(res[[n]])) == ''), indices = 1, drop = 'selected') + } + return(res) } -.RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two-sided', alpha = 0.05){ - - ## For one-sided test, this function assumes that the scores are negatively oriented, i.e., the lower the better - ## Thus, significance may only be reached if score<0 (i.e., forecaster A is better than forecaster B) - ## For two-sided test, it does not matter the orientation of the scores - - ## DelSole and Tippett (2016) approximation for two-sided test at 95% confidence level: significant if score is above or below 2*sqrt(N) - # signif<- ifelse(test = (score < (-2)*sqrt(N.eff)) | (score > 2*sqrt(N.eff)), yes = TRUE, no = FALSE) +.RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two.sided', alpha = 0.05){ if (is.na(N.eff)){ N.eff <- length(skill_A) } + A_better <- sum(skill_B > skill_A) + B_better <- sum(skill_B < skill_A) + output <- NULL - output$score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) - output$score <- -output$score[length(skill_A)] ## negative sign for the output to be positive in case A has been better more times + output$score <- A_better - B_better - if (test.type == 'one-sided'){ - - ## H0: forecaster A is not better than forecaster B - ## H1: forecaster A is better than forecaster B - - p.val <- pbinom(q = output$score, size = N.eff, prob = 0.5, lower.tail = FALSE, log.p = FALSE) - - if (is.null(alpha)){ - output$p.val <- p.val - } else { - output$signif <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) - } - - } else if (test.type == 'two-sided'){ - - ## H0: forecaster A and forecaster B are not different in terms of skill - ## H1: forecaster A and forecaster B are different in terms of skill - - p.val <- pbinom(q = abs(output$score), size = N.eff, prob = 0.5, lower.tail = FALSE, log.p = FALSE) - - if (is.null(alpha)){ - output$p.val <- p.val - } else { - output$signif <- ifelse(!is.na(p.val) & p.val <= alpha / 2, TRUE, FALSE) - } - - } else {stop("Parameter 'test.type' is not supported.")} + if (!is.na(output$score)){ + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1-alpha, alternative = test.type)$p.value + } else {p.val <- NA} + + if (is.null(alpha)){ + output$p.val <- p.val + } else { + output$signif <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + } return(output) } -- GitLab From 8b65f4946f2b9d361265b75a8fdcba76f9d2c2da Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 30 Dec 2022 18:48:33 +0100 Subject: [PATCH 05/11] fixed error when alpha = NULL --- R/RandomWalkTest.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 30f820b..9110aa3 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -145,8 +145,10 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, ncores = ncores) ## to remove the extra dimension created because of target_dims(N.eff) = NULL - for (n in names(res)){ - res[[n]] <- ClimProjDiags::Subset(x = res[[n]], along = which(names(dim(res[[n]])) == ''), indices = 1, drop = 'selected') + for (n in names(res)){ ## there may be a better way of doing this + if (!identical(which(names(dim(res[[n]])) == ''),integer(0))){ + res[[n]] <- ClimProjDiags::Subset(x = res[[n]], along = which(names(dim(res[[n]])) == ''), indices = 1, drop = 'selected') + } } return(res) @@ -165,6 +167,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, output$score <- A_better - B_better if (!is.na(output$score)){ + if (is.null(alpha)){alpha <- 0.05} ## it is needed for binom.test(), but it does not affect the results since we only retrieve $p.value p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1-alpha, alternative = test.type)$p.value } else {p.val <- NA} -- GitLab From e1c81f26ec26f171c267a3bd30c15c287722864b Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Mon, 2 Jan 2023 08:51:31 +0100 Subject: [PATCH 06/11] included DelSole and Tippett (2016) approximation. This was the only method in the previous version --- R/RandomWalkTest.R | 65 +++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 9110aa3..c5f23e1 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -14,13 +14,15 @@ #' array with the same dimensions as "skill_A" except "time_dim" (for a #' particular N.eff to be used for each case). The default value is NA. #'@param test.type A character string indicating the type of significance test. -#' It can be "two.sided" (to assess whether forecaster A and forecaster B are -#' significantly different in terms of skill with a two-sided test), "greater" -#' (to assess whether forecaster A shows significantly better skill than -#' forecaster B with a one-sided test for negatively oriented scores), or "less" -#' (to assess whether forecaster A shows significantly better skill than -#' forecaster B with a one-sided test for positively oriented scores). The -#' default value is "two.sided". +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster B are +#' significantly different in terms of skill with a two-sided test using the +#' approximation of DelSole and Tippett, 2016), "two.sided" (to assess whether +#' forecaster A and forecaster B are significantly different in terms of skill +#' with a exact two-sided test), "greater" (to assess whether forecaster A shows +#' significantly better skill than forecaster B with a one-sided test for +#' negatively oriented scores), or "less" (to assess whether forecaster A shows +#' significantly better skill than forecaster B with a one-sided test for +#' positively oriented scores). The default value is "two.sided.approx". #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. If it is a numeric, the significance will be returned. #' If NULL, the p-value will be returned instead. The default value is 0.05. @@ -58,10 +60,13 @@ #' H0: forecaster A is not better than forecaster B #' H1: forecaster A is better than forecaster B #' -#' Examples of negatively oriented scores are the RPS, RMSE and the Error. +#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while +#' the ROC score is a positively oriented score. #' #' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence -#' level: significant if score is above 2sqrt(N) or below -2sqrt(N) +#' level: significant if the difference between the number of times that +#' forecaster A has been better than forecaster B and forecaster B has been +#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). #' #'@references #'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 @@ -78,7 +83,7 @@ #'@importFrom ClimProjDiags Subset #'@export RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, - test.type = 'two.sided', alpha = 0.05, ncores = NULL){ + test.type = 'two.sided.approx', alpha = 0.05, ncores = NULL){ ## Check inputs ## skill_A and skill_B @@ -111,13 +116,23 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, 'the same dimensions as "skill_A" except "time_dim".')) } ## test.type - if (!test.type %in% c('two.sided', 'greater','less')) { - stop(paste0("Parameter 'test.type' must be 'two.sided' (for two-sided test ", + if (!test.type %in% c('two.sided.approx','two.sided','greater','less')) { + stop(paste0("Parameter 'test.type' must be 'two.sided.test' (for two-sided test ", + "using the aproximation of DelSole and Tippett (2016), regardless ", + "of the orientation of the scores), 'two.sided' (for two-sided test ", "regardless of the orientation of the scores), 'greater' (for ", "one-sided test for negatively oriented scores, i.e., the lower ", "the better) or 'less' (for one-sided test for positively ", "oriented scores, i.e., the higher the better).")) } + if (test.type == 'two.sided.approx'){ + if (is.numeric(alpha) & alpha != 0.05){ + warning(paste0("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05. Returning the significance at the 0.05 significance level.")) + } else if (is.null(alpha)){ + warning(paste0("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.")) } + } ## alpha if (!is.null(alpha)) { if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | @@ -154,7 +169,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, return(res) } -.RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two.sided', alpha = 0.05){ +.RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two.sided.approx', alpha = 0.05){ if (is.na(N.eff)){ N.eff <- length(skill_A) @@ -166,15 +181,23 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, output <- NULL output$score <- A_better - B_better - if (!is.na(output$score)){ - if (is.null(alpha)){alpha <- 0.05} ## it is needed for binom.test(), but it does not affect the results since we only retrieve $p.value - p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1-alpha, alternative = test.type)$p.value - } else {p.val <- NA} - - if (is.null(alpha)){ - output$p.val <- p.val + if (test.type == 'two.sided.approx'){ + + output$signif <- ifelse(test = abs(output$score) > 2*sqrt(N.eff), yes = TRUE, no = FALSE) + } else { - output$signif <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + + if (!is.na(output$score)){ + if (is.null(alpha)){alpha <- 0.05} ## it is needed for binom.test(), but it does not affect the results since we only retrieve $p.value + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1-alpha, alternative = test.type)$p.value + } else {p.val <- NA} + + if (is.null(alpha)){ + output$p.val <- p.val + } else { + output$signif <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + } + } return(output) -- GitLab From b059f6135af57d2993b8d28e715a14aaed814cb0 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 20 Jan 2023 11:34:10 +0100 Subject: [PATCH 07/11] uncritical improvement; doc revision --- R/RandomWalkTest.R | 187 +++++++++++++++------------ man/RandomWalkTest.Rd | 108 ++++++++++++---- tests/testthat/test-RandomWalkTest.R | 4 +- 3 files changed, 190 insertions(+), 109 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index c5f23e1..3d4741e 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -1,12 +1,14 @@ #'Random Walk test for skill differences #' #'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common observational reference) based on Random Walks (DelSole and Tippett, 2016). +#'common observational reference) based on Random Walks (DelSole and Tippett, +#'2016). #' #'@param skill_A A numerical array of the time series of the scores obtained #' with the forecaster A. #'@param skill_B A numerical array of the time series of the scores obtained -#' with the forecaster B. The dimensions should be identical as parameter 'skill_A'. +#' with the forecaster B. The dimensions should be identical as parameter +#' 'skill_A'. #'@param time_dim A character string indicating the name of the dimension along #' which the tests are computed. The default value is 'sdate'. #'@param N.eff Effective sample size to be used in the statistical significance @@ -14,18 +16,22 @@ #' array with the same dimensions as "skill_A" except "time_dim" (for a #' particular N.eff to be used for each case). The default value is NA. #'@param test.type A character string indicating the type of significance test. -#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster B are -#' significantly different in terms of skill with a two-sided test using the -#' approximation of DelSole and Tippett, 2016), "two.sided" (to assess whether -#' forecaster A and forecaster B are significantly different in terms of skill -#' with a exact two-sided test), "greater" (to assess whether forecaster A shows -#' significantly better skill than forecaster B with a one-sided test for -#' negatively oriented scores), or "less" (to assess whether forecaster A shows -#' significantly better skill than forecaster B with a one-sided test for -#' positively oriented scores). The default value is "two.sided.approx". +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster +#' B are significantly different in terms of skill with a two-sided test using +#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +#' whether forecaster A and forecaster B are significantly different in terms +#' of skill with an exact two-sided test), "greater" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for negatively oriented scores), or "less" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for positively oriented scores). The default value is +#' "two.sided.approx". #'@param alpha A numeric of the significance level to be used in the statistical -#' significance test. If it is a numeric, the significance will be returned. -#' If NULL, the p-value will be returned instead. The default value is 0.05. +#' significance test (output "sign"). The default value is 0.05. +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test based on 'alpha'. The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -39,25 +45,25 @@ #' than forecaster B. If $score is negative, forecaster B has been better more #' times than forecaster A. #'} -#'\item{$signif}{ +#'\item{$sign}{ #' A logical array of the statistical significance with the same dimensions -#' as the input arrays except "time_dim". Returned only if "alpha" is a numeric. +#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. #'} #'\item{$p.val}{ #' A numeric array of the p-values with the same dimensions as the input arrays -#' except "time_dim". Returned only if "alpha" is NULL. +#' except "time_dim". Returned only if "pval" is TRUE. #'} #' #'@details #' Null and alternative hypothesis for "two-sided" test (regardless of the -#' orientation of the scores): -#' H0: forecaster A and forecaster B are not different in terms of skill +#' orientation of the scores):\cr +#' H0: forecaster A and forecaster B are not different in terms of skill\cr #' H1: forecaster A and forecaster B are different in terms of skill #' #' Null and alternative hypothesis for one-sided "greater" (for negatively #' oriented scores, i.e., the lower the better) and "less" (for positively -#' oriented scores, i.e., the higher the better) tests: -#' H0: forecaster A is not better than forecaster B +#' oriented scores, i.e., the higher the better) tests:\cr +#' H0: forecaster A is not better than forecaster B\cr #' H1: forecaster A is better than forecaster B #' #' Examples of negatively oriented scores are the RPS, RMSE and the Error, while @@ -77,101 +83,110 @@ #' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) #' scores_A <- abs(fcst_A - reference) #' scores_B <- abs(fcst_B - reference) -#' RandomWalkTest(skill_A = scores_A, skill_B = scores_B) +#' res <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B) #' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, - test.type = 'two.sided.approx', alpha = 0.05, ncores = NULL){ + test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, + sign = FALSE, ncores = NULL) { - ## Check inputs + # Check inputs ## skill_A and skill_B - if (is.null(skill_A) | is.null(skill_B)){ + if (is.null(skill_A) | is.null(skill_B)) { stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") } - if(!is.numeric(skill_A) | !is.numeric(skill_B)){ + if(!is.numeric(skill_A) | !is.numeric(skill_B)) { stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") } - if (!identical(dim(skill_A),dim(skill_B))){ + if (!identical(dim(skill_A), dim(skill_B))) { stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") } ## time_dim - if(!is.character(time_dim) | length(time_dim) != 1){ + if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") } - if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ + if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") } ## N.eff if (is.array(N.eff)) { - if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") - if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | - any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { - stop(paste0('If parameter "N.eff" is provided with an array, it must ', - 'have the same dimensions as "skill_A" except "time_dim".')) + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be a numeric array or NA.") + if (!identical(dim(N.eff), dim(skill_A)[-which(names(dim(skill_A)) == time_dim)])) { +# if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | +# any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { + stop("If parameter 'N.eff' is provided with an array, it must ", + "have the same dimensions as 'skill_A' except 'time_dim'.") } - } else if (!is.na(N.eff)) { - stop(paste0('Parameter "N.eff" must be NA or an array with ', - 'the same dimensions as "skill_A" except "time_dim".')) + } else if (!identical(N.eff, NA)) { + stop("Parameter 'N.eff' must be NA or an array with ", + "the same dimensions as 'skill_A' except 'time_dim'.") + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") } ## test.type if (!test.type %in% c('two.sided.approx','two.sided','greater','less')) { - stop(paste0("Parameter 'test.type' must be 'two.sided.test' (for two-sided test ", - "using the aproximation of DelSole and Tippett (2016), regardless ", - "of the orientation of the scores), 'two.sided' (for two-sided test ", - "regardless of the orientation of the scores), 'greater' (for ", - "one-sided test for negatively oriented scores, i.e., the lower ", - "the better) or 'less' (for one-sided test for positively ", - "oriented scores, i.e., the higher the better).")) + stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") } - if (test.type == 'two.sided.approx'){ - if (is.numeric(alpha) & alpha != 0.05){ - warning(paste0("DelSole and Tippett (2016) aproximation is valid for alpha ", - "= 0.05. Returning the significance at the 0.05 significance level.")) - } else if (is.null(alpha)){ - warning(paste0("p-value cannot be returned with the DelSole and Tippett (2016) ", - "aproximation. Returning the significance at the 0.05 significance level.")) } - } - ## alpha - if (!is.null(alpha)) { - if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | - length(alpha) > 1)) { - stop('Parameter "alpha" must be NULL or a number between 0 and 1.') + if (test.type == 'two.sided.approx') { + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + if (pval) { + .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.") } + sign <- TRUE } ## ncores - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ## Compute the Random Walk Test - res <- multiApply::Apply(data = list(skill_A = skill_A, - skill_B = skill_B, - N.eff = N.eff), - target_dims = list(skill_A = time_dim, - skill_B = time_dim, - N.eff = NULL), - fun = .RandomWalkTest, - test.type = test.type, - alpha = alpha, - ncores = ncores) - - ## to remove the extra dimension created because of target_dims(N.eff) = NULL - for (n in names(res)){ ## there may be a better way of doing this - if (!identical(which(names(dim(res[[n]])) == ''),integer(0))){ - res[[n]] <- ClimProjDiags::Subset(x = res[[n]], along = which(names(dim(res[[n]])) == ''), indices = 1, drop = 'selected') - } + if (is.array(N.eff)) { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B, + N.eff = N.eff), + target_dims = list(skill_A = time_dim, + skill_B = time_dim, + N.eff = NULL), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, + ncores = ncores) + } else { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + N.eff = N.eff, + test.type = test.type, + alpha = alpha, + ncores = ncores) } + +# ## to remove the extra dimension created because of target_dims(N.eff) = NULL +# for (n in names(res)){ ## there may be a better way of doing this +# if (!identical(which(names(dim(res[[n]])) == ''),integer(0))){ +# res[[n]] <- ClimProjDiags::Subset(x = res[[n]], along = which(names(dim(res[[n]])) == ''), indices = 1, drop = 'selected') +# } +# } return(res) } -.RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two.sided.approx', alpha = 0.05){ +.RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two.sided.approx', + alpha = 0.05) { - if (is.na(N.eff)){ + if (is.na(N.eff)) { N.eff <- length(skill_A) } @@ -181,21 +196,23 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, output <- NULL output$score <- A_better - B_better - if (test.type == 'two.sided.approx'){ - - output$signif <- ifelse(test = abs(output$score) > 2*sqrt(N.eff), yes = TRUE, no = FALSE) - + if (test.type == 'two.sided.approx') { + output$sign <- ifelse(test = abs(output$score) > (2 * sqrt(N.eff)), yes = TRUE, no = FALSE) + } else { - if (!is.na(output$score)){ - if (is.null(alpha)){alpha <- 0.05} ## it is needed for binom.test(), but it does not affect the results since we only retrieve $p.value + if (!is.na(output$score)) { + if (is.null(alpha)) alpha <- 0.05 ## it is needed for binom.test(), but it does not affect the results since we only retrieve $p.value p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1-alpha, alternative = test.type)$p.value - } else {p.val <- NA} + + } else { + p.val <- NA + } - if (is.null(alpha)){ + if (is.null(alpha)) { output$p.val <- p.val } else { - output$signif <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + output$sign <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) } } diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd index bd1460a..9db3285 100644 --- a/man/RandomWalkTest.Rd +++ b/man/RandomWalkTest.Rd @@ -2,50 +2,114 @@ % Please edit documentation in R/RandomWalkTest.R \name{RandomWalkTest} \alias{RandomWalkTest} -\title{Random walk test for skill differences} +\title{Random Walk test for skill differences} \usage{ -RandomWalkTest(skill_A, skill_B, time_dim = "sdate", ncores = NULL) +RandomWalkTest( + skill_A, + skill_B, + time_dim = "sdate", + N.eff = NA, + test.type = "two.sided.approx", + alpha = 0.05, + pval = TRUE, + sign = FALSE, + ncores = NULL +) } \arguments{ -\item{skill_A}{A numerical array of the time series of the skill with the -forecaster A's.} +\item{skill_A}{A numerical array of the time series of the scores obtained +with the forecaster A.} -\item{skill_B}{A numerical array of the time series of the skill with the -forecaster B's. The dimensions should be identical as parameter 'skill_A'.} +\item{skill_B}{A numerical array of the time series of the scores obtained +with the forecaster B. The dimensions should be identical as parameter +'skill_A'.} \item{time_dim}{A character string indicating the name of the dimension along which the tests are computed. The default value is 'sdate'.} +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be NA (to use the length of the "time_dim" dimension) or an +array with the same dimensions as "skill_A" except "time_dim" (for a +particular N.eff to be used for each case). The default value is NA.} + +\item{test.type}{A character string indicating the type of significance test. +It can be "two.sided.approx" (to assess whether forecaster A and forecaster +B are significantly different in terms of skill with a two-sided test using +the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +whether forecaster A and forecaster B are significantly different in terms +of skill with an exact two-sided test), "greater" (to assess whether +forecaster A shows significantly better skill than forecaster B with a +one-sided test for negatively oriented scores), or "less" (to assess whether +forecaster A shows significantly better skill than forecaster B with a +one-sided test for positively oriented scores). The default value is +"two.sided.approx".} + +\item{alpha}{A numeric of the significance level to be used in the statistical +significance test (output "sign"). The default value is 0.05.} + +\item{pval}{A logical value indicating whether to return the p-value of the +significance test. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to return the statistical +significance of the test based on 'alpha'. The default value is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A list of 2: +A list with: \item{$score}{ A numerical array with the same dimensions as the input arrays except 'time_dim'. The number of times that forecaster A has been better than forecaster B minus the number of times that forecaster B has been better - than forecaster A (for skill positively oriented). If $score is positive - forecaster A is better than forecaster B, and if $score is negative - forecaster B is better than forecaster B. + than forecaster A (for skill negatively oriented, i.e., the lower the + better). If $score is positive, forecaster A has been better more times + than forecaster B. If $score is negative, forecaster B has been better more + times than forecaster A. +} +\item{$sign}{ + A logical array of the statistical significance with the same dimensions + as the input arrays except "time_dim". Returned only if "sign" is TRUE. } -\item{$signif}{ - A logical array with the same dimensions as the input arrays except - 'time_dim'. Whether the difference is significant or not at the 5% - significance level. +\item{$p.val}{ + A numeric array of the p-values with the same dimensions as the input arrays + except "time_dim". Returned only if "pval" is TRUE. } } \description{ Forecast comparison of the skill obtained with 2 forecasts (with respect to a -common reference) based on Random Walks, with significance estimate at the 95% -confidence level, as in DelSole and Tippett (2016). +common observational reference) based on Random Walks (DelSole and Tippett, +2016). +} +\details{ +Null and alternative hypothesis for "two-sided" test (regardless of the +orientation of the scores):\cr +H0: forecaster A and forecaster B are not different in terms of skill\cr +H1: forecaster A and forecaster B are different in terms of skill + +Null and alternative hypothesis for one-sided "greater" (for negatively +oriented scores, i.e., the lower the better) and "less" (for positively +oriented scores, i.e., the higher the better) tests:\cr +H0: forecaster A is not better than forecaster B\cr +H1: forecaster A is better than forecaster B + +Examples of negatively oriented scores are the RPS, RMSE and the Error, while +the ROC score is a positively oriented score. + +DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +level: significant if the difference between the number of times that +forecaster A has been better than forecaster B and forecaster B has been +better than forecaster A is above 2sqrt(N) or below -2sqrt(N). } \examples{ -fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) -fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) -reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) -skill_A <- abs(fcst_A - reference) -skill_B <- abs(fcst_B - reference) -RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +scores_A <- abs(fcst_A - reference) +scores_B <- abs(fcst_B - reference) +res <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B) } +\references{ +DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 +} diff --git a/tests/testthat/test-RandomWalkTest.R b/tests/testthat/test-RandomWalkTest.R index d50b9ce..e0cedbe 100644 --- a/tests/testthat/test-RandomWalkTest.R +++ b/tests/testthat/test-RandomWalkTest.R @@ -31,7 +31,7 @@ test_that("1. Input checks", { "Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimension." ) expect_error( - RandomWalkTest(dat1_A, dat1_B, ncores = T), + RandomWalkTest(dat1_A, dat1_B, ncores = T, sign = T, pval = F), "Parameter 'ncores' must be a positive integer." ) @@ -39,7 +39,7 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { - res <- RandomWalkTest(dat1_A, dat1_B) + res <- RandomWalkTest(dat1_A, dat1_B, sign = T, pval = F) expect_equal( length(res), -- GitLab From a70a702bc815af9d2f16467190d5881d436bfe3d Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 20 Jan 2023 16:47:45 +0100 Subject: [PATCH 08/11] Add unit tests; remove unededed lines --- R/RandomWalkTest.R | 14 +-- tests/testthat/test-RandomWalkTest.R | 134 +++++++++++++++++++++++++-- 2 files changed, 131 insertions(+), 17 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 3d4741e..e38cb8b 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -114,8 +114,6 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, if (is.array(N.eff)) { if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be a numeric array or NA.") if (!identical(dim(N.eff), dim(skill_A)[-which(names(dim(skill_A)) == time_dim)])) { -# if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | -# any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { stop("If parameter 'N.eff' is provided with an array, it must ", "have the same dimensions as 'skill_A' except 'time_dim'.") } @@ -173,13 +171,6 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, ncores = ncores) } -# ## to remove the extra dimension created because of target_dims(N.eff) = NULL -# for (n in names(res)){ ## there may be a better way of doing this -# if (!identical(which(names(dim(res[[n]])) == ''),integer(0))){ -# res[[n]] <- ClimProjDiags::Subset(x = res[[n]], along = which(names(dim(res[[n]])) == ''), indices = 1, drop = 'selected') -# } -# } - return(res) } @@ -209,9 +200,10 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, p.val <- NA } - if (is.null(alpha)) { + if (pval) { output$p.val <- p.val - } else { + } + if (sign) { output$sign <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) } diff --git a/tests/testthat/test-RandomWalkTest.R b/tests/testthat/test-RandomWalkTest.R index e0cedbe..1af1d19 100644 --- a/tests/testthat/test-RandomWalkTest.R +++ b/tests/testthat/test-RandomWalkTest.R @@ -1,11 +1,17 @@ context("s2dv::RandomWalkTest tests") ############################################## + #dat1 set.seed(1) dat1_A <- array(rnorm(64), dim = c(sdate = 4, ftime = 4, lat = 2, lon = 2)) set.seed(2) dat1_B <- array(rnorm(64), dim = c(sdate = 4, ftime = 4, lat = 2, lon = 2)) - + #dat2 + set.seed(1) + dat2_A <- array(rnorm(8), dim = c(sdate = 4, ftime = 2)) + set.seed(2) + dat2_B <- array(rnorm(8), dim = c(sdate = 4, ftime = 2)) + N.eff2 <- array(3:4, dim = c(ftime = 2)) ############################################## test_that("1. Input checks", { @@ -31,6 +37,26 @@ test_that("1. Input checks", { "Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimension." ) expect_error( + RandomWalkTest(dat1_A, dat1_B, N.eff = as.array(T)), + "Parameter 'N.eff' must be a numeric array or NA." + ) + expect_error( + RandomWalkTest(dat1_A, dat1_B, N.eff = array(1:4, dim = c(ftime = 4))), + "If parameter 'N.eff' is provided with an array, it must have the same dimensions as 'skill_A' except 'time_dim'." + ) + expect_error( + RandomWalkTest(dat1_A, dat1_B, N.eff = 1), + "Parameter 'N.eff' must be NA or an array with the same dimensions as 'skill_A' except 'time_dim'." + ) + expect_error( + RandomWalkTest(dat1_A, dat1_B, alpha = 1), + "Parameter 'alpha' must be a number between 0 and 1." + ) + expect_error( + RandomWalkTest(dat1_A, dat1_B, test.type = 1), + "Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'." + ) + expect_error( RandomWalkTest(dat1_A, dat1_B, ncores = T, sign = T, pval = F), "Parameter 'ncores' must be a positive integer." ) @@ -47,14 +73,14 @@ test_that("2. Output checks: dat1", { ) expect_equal( names(res), - c("score", "signif") + c("score", "sign") ) expect_equal( dim(res$score), c(ftime = 4, lat = 2, lon = 2) ) expect_equal( - dim(res$signif), + dim(res$sign), c(ftime = 4, lat = 2, lon = 2) ) expect_equal( @@ -62,7 +88,7 @@ test_that("2. Output checks: dat1", { TRUE ) expect_equal( - is.logical(res$signif), + is.logical(res$sign), TRUE ) expect_equal( @@ -71,11 +97,107 @@ test_that("2. Output checks: dat1", { ) expect_equal( res$score[, 1, 1], - c(0, 0, -2, -2) + c(0, 0, 2, 2) ) expect_equal( res$score[, 1, 2], - c(0, 4, 2, 0) + c(0, -4, -2, 0) ) }) + +############################################## +test_that("3. Output checks: dat2", { + + res1 <- RandomWalkTest(dat2_A, dat2_B, sign = T, pval = T, test.type = "two.sided", alpha = 0.1) + + expect_equal( + names(res1), + c("score", "p.val", "sign") + ) + expect_equal( + dim(res1$score), + c(ftime = 2) + ) + expect_equal( + dim(res1$p.val), + c(ftime = 2) + ) + expect_equal( + dim(res1$sign), + c(ftime = 2) + ) + expect_equal( + as.vector(res1$score), + c(0, 0) + ) + expect_equal( + as.vector(res1$p), + c(1, 1) + ) + expect_equal( + as.vector(res1$sign), + c(F, F) + ) + +#------------------------------ + res2 <- RandomWalkTest(dat2_A + 1, dat2_B + 2, sign = T, pval = T, test.type = "greater", alpha = 0.1) + + expect_equal( + as.vector(res2$score), + c(2, 4) + ) + expect_equal( + as.vector(res2$p), + c(0.3125, 0.0625) + ) + expect_equal( + as.vector(res2$sign), + c(F, T) + ) + +#------------------------------ + res3 <- RandomWalkTest(dat2_A, dat2_B - 2, sign = T, pval = T, test.type = "less", alpha = 0.1) + expect_equal( + as.vector(res3$score), + c(-2, -4) + ) + expect_equal( + as.vector(res3$p), + c(0.3125, 0.0625) + ) + expect_equal( + as.vector(res3$sign), + c(F, T) + ) + +#------------------------------- + res4 <- RandomWalkTest(dat2_A, dat2_B - 2, sign = T, test.type = "less", alpha = 0.1, N.eff = N.eff2) + expect_equal( + dim(res4$score), + c(ftime = 2) + ) + expect_equal( + dim(res4$sign), + c(ftime = 2) + ) + expect_equal( + dim(res4$p.val), + c(ftime = 2) + ) + expect_equal( + as.vector(res4$score), + c(-2, -4) + ) + expect_equal( + as.vector(res4$p), + c(0.5000, 0.0625) + ) + expect_equal( + as.vector(res4$sign), + c(F, T) + ) + +}) + + -- GitLab From cb6d9d254457d2136cf899f15d580ebd843b1f3d Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 20 Jan 2023 17:21:39 +0100 Subject: [PATCH 09/11] Change RandomWalkTest() params --- R/AbsBiasSS.R | 2 +- R/CRPSS.R | 6 +++--- R/RPSS.R | 6 +++--- R/RandomWalkTest.R | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/AbsBiasSS.R b/R/AbsBiasSS.R index 0ceb009..8cb1bce 100644 --- a/R/AbsBiasSS.R +++ b/R/AbsBiasSS.R @@ -267,7 +267,7 @@ AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) ## Skill score and significance biasSS[i, j] <- 1 - mean(bias_exp) / mean(bias_ref) - sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref)$signif + sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref, sign = T, pval = F)$sign } } diff --git a/R/CRPSS.R b/R/CRPSS.R index a6b4a14..6b063ed 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -276,14 +276,14 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[j] - sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[j])$signif + sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[j], sign = T, pval = F)$sign } } } else { for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[i, j] - sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[i, j])$signif + sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[i, j], sign = T, pval = F)$sign } } } @@ -291,7 +291,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } else { crpss <- 1 - mean(crps_exp) / mean(crps_ref) # Significance - sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif + sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref, sign = T, pval = F)$sign } return(list(crpss = crpss, sign = sign)) diff --git a/R/RPSS.R b/R/RPSS.R index 3d50d2b..c1b1bf9 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -399,21 +399,21 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i in 1:nexp) { for (j in 1:nobs) { rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j] - sign[i, j] <- .RandomWalkTest(skill_A = rps_exp_mean[i, j], skill_B = rps_ref_mean[j])$signif + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp_mean[i, j], skill_B = rps_ref_mean[j], sign = T, pval = F)$sign } } } else { for (i in 1:nexp) { for (j in 1:nobs) { rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] - sign[i, j] <- .RandomWalkTest(skill_A = rps_exp_mean[i, j], skill_B = rps_ref_mean[i, j])$signif + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp_mean[i, j], skill_B = rps_ref_mean[i, j], sign = T, pval = F)$sign } } } } else { rpss <- 1 - mean(rps_exp) / mean(rps_ref) # Significance - sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref)$signif + sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, sign = T, pval = F)$sign } return(list(rpss = rpss, sign = sign)) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index e38cb8b..6f4536c 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -157,7 +157,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, N.eff = NULL), fun = .RandomWalkTest, test.type = test.type, - alpha = alpha, + alpha = alpha, pval = pval, sign = sign, ncores = ncores) } else { res <- Apply(data = list(skill_A = skill_A, @@ -167,7 +167,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, fun = .RandomWalkTest, N.eff = N.eff, test.type = test.type, - alpha = alpha, + alpha = alpha, pval = pval, sign = sign, ncores = ncores) } @@ -175,7 +175,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, } .RandomWalkTest <- function(skill_A, skill_B, N.eff = NA, test.type = 'two.sided.approx', - alpha = 0.05) { + alpha = 0.05, pval = TRUE, sign = FALSE) { if (is.na(N.eff)) { N.eff <- length(skill_A) @@ -193,8 +193,8 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', N.eff = NA, } else { if (!is.na(output$score)) { - if (is.null(alpha)) alpha <- 0.05 ## it is needed for binom.test(), but it does not affect the results since we only retrieve $p.value - p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1-alpha, alternative = test.type)$p.value + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + alternative = test.type)$p.value } else { p.val <- NA -- GitLab From ecc4f99c91885b9988fd4f08ed23e05b986d10a4 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 20 Jan 2023 17:38:46 +0100 Subject: [PATCH 10/11] Modify example --- R/RandomWalkTest.R | 3 ++- man/RandomWalkTest.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 6f4536c..62d4d9d 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -83,7 +83,8 @@ #' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) #' scores_A <- abs(fcst_A - reference) #' scores_B <- abs(fcst_B - reference) -#' res <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = F, sign = T) +#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') #' #'@import multiApply #'@importFrom ClimProjDiags Subset diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd index 9db3285..450d192 100644 --- a/man/RandomWalkTest.Rd +++ b/man/RandomWalkTest.Rd @@ -107,7 +107,8 @@ fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) scores_A <- abs(fcst_A - reference) scores_B <- abs(fcst_B - reference) -res <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B) +res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = F, sign = T) +res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') } \references{ -- GitLab From 209f758513f25aa86c7c9de97dbf8ce714d1a841 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 20 Jan 2023 17:47:03 +0100 Subject: [PATCH 11/11] Use FALSE instead of F in example --- R/RandomWalkTest.R | 2 +- man/RandomWalkTest.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 62d4d9d..7a222cd 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -83,7 +83,7 @@ #' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) #' scores_A <- abs(fcst_A - reference) #' scores_B <- abs(fcst_B - reference) -#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = F, sign = T) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) #' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') #' #'@import multiApply diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd index 450d192..cd02b4f 100644 --- a/man/RandomWalkTest.Rd +++ b/man/RandomWalkTest.Rd @@ -107,7 +107,7 @@ fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) scores_A <- abs(fcst_A - reference) scores_B <- abs(fcst_B - reference) -res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = F, sign = T) +res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') } -- GitLab