diff --git a/modules/Crossval/R/tmp/RPS.R b/modules/Crossval/R/tmp/RPS.R index 0ed599ac4c145e605bbe39b76feb2ebfdf204d29..f63972365f83a707ad6881ff07d61a3ae81f199b 100644 --- a/modules/Crossval/R/tmp/RPS.R +++ b/modules/Crossval/R/tmp/RPS.R @@ -43,7 +43,9 @@ #'@param Fair A logical indicating whether to compute the FairRPS (the #' potential RPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. -#'@param nmemb A numeric value indicating the number of members used to compute the probabilities. This parameter is necessary when calculating FairRPS from probabilities. Default is NULL. +#'@param nmemb A numeric value indicating the number of members used to compute +#' the probabilities. This parameter is necessary when calculating FairRPS from +#' probabilities. Default is NULL. #'@param weights A named numerical array of the weights for 'exp' probability #' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' #' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The @@ -56,11 +58,13 @@ #'@param return_mean A logical indicating whether to return the temporal mean #' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, #' if FALSE the time dimension is not aggregated. The default is TRUE. -#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it -#' means the lower limit for the fraction of the non-NA values. 1 is equal to -#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). -# The function returns NA if the fraction of non-NA values in the data is less -#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If 'na.rm' is numeric, +#' it represents the minimum required fraction of non-NA values in the data. A +#' value of 1 (or FALSE) means that NA values will not be removed, and the +#' function will return 'NA' if they are present. A value of 0 (or TRUE) means +#' that NA values will be removed before computation. If the fraction of non-NA +#' values in the input data is lower than 'na.rm', the function returns 'NA'. +#' Otherwise, RPS will be calculated. The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -87,10 +91,10 @@ #'@import multiApply #'@importFrom easyVerification convert2prob #'@export -RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, - dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, nmemb = NULL, weights = NULL, - cross.val = FALSE, return_mean = TRUE, +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE, nmemb = NULL, + weights = NULL, cross.val = FALSE, return_mean = TRUE, na.rm = FALSE, ncores = NULL) { # Check inputs @@ -184,12 +188,18 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL if (!is.null(cat_dim)) { if (cat_dim %in% names(dim(exp))) { if (is.null(nmemb)) { - stop("Parameter 'nmemb' necessary to compute Fair", - "score from probabilities") + stop("Parameter 'nmemb' necessary to compute Fair", + "score from probabilities") } } } } + ## nmemb + if (!is.null(nmemb)) { + if (!is.numeric(nmemb) || length(nmemb) != 1) { + stop("Parameter 'nmemb' must be a single numeric value.") + } + } ## return_mean if (!is.logical(return_mean) | length(return_mean) > 1) { stop("Parameter 'return_mean' must be either TRUE or FALSE.") @@ -334,7 +344,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL f_NAs <- na.rm } - if (f_NAs <= sum(good_values) / length(obs_mean)) { + if (f_NAs <= sum(good_values) / length(obs_mean) && + !all(good_values == FALSE)) { exp_data <- exp_data[good_values, , drop = F] obs_data <- obs_data[good_values, , drop = F] @@ -384,7 +395,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } else { R <- nmemb } - warning("Applying fair correction.") + warning("Applying fair correction.") adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) adjustment <- colSums(adjustment) rps[, i, j] <- rps[, i, j] + adjustment diff --git a/modules/Crossval/R/tmp/RPSS.R b/modules/Crossval/R/tmp/RPSS.R index fc9931ad8610f400f3cc59e902c9ee5bb3256508..76c0dba37b5267017829a1ab9a8aa8bd80897758 100644 --- a/modules/Crossval/R/tmp/RPSS.R +++ b/modules/Crossval/R/tmp/RPSS.R @@ -57,6 +57,12 @@ #'@param Fair A logical indicating whether to compute the FairRPSS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. +#'@param nmemb A numeric value indicating the number of members used to compute +#' the probabilities. This parameter is necessary when calculating FairRPS from +#' probabilities. Default is NULL. +#'@param nmemb_ref A numeric value indicating the number of members of the +#' reference forecast 'ref'. If 'ref' is a climatology, 'nmemb_ref' should be +#' the number of years used to compute the climatology. Default is NULL. #'@param weights_exp A named numerical array of the forecast ensemble weights #' for probability calculation. The dimension should include 'memb_dim', #' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension @@ -68,17 +74,25 @@ #'@param cross.val A logical indicating whether to compute the thresholds #' between probabilistics categories in cross-validation. The default value is #' FALSE. -#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it -#' means the lower limit for the fraction of the non-NA values. 1 is equal to -#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). -# The function returns NA if the fraction of non-NA values in the data is less -#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If 'na.rm' is numeric, +#' it represents the minimum required fraction of non-NA values in the data. A +#' value of 1 (or FALSE) means that NA values will not be removed, and the +#' function will return 'NA' if they are present. A value of 0 (or TRUE) means +#' that NA values will be removed before computation. If the fraction of non-NA +#' values in the input data is lower than 'na.rm', the function returns 'NA'. +#' Otherwise, RPS will be calculated. The default value is FALSE. #'@param sig_method.type A character string indicating the test type of the #' significance method. Check \code{RandomWalkTest()} parameter #' \code{test.type} for details. The default is 'two.sided.approx', which is #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +#' (and it will use the length of 'obs' along 'time_dim', so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as 'obs' except 'time_dim' +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -121,16 +135,17 @@ #'obs_probs <- GetProbs(obs, memb_dim = NULL) #'ref_probs <- GetProbs(ref, memb_dim = 'member') #'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, -#' cat_dim = 'bin') +#' N.eff = FALSE, cat_dim = 'bin') #' #'@import multiApply #'@export -RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, - dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, - weights_exp = NULL, weights_ref = NULL, +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE, nmemb = NULL, + nmemb_ref = NULL, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = FALSE, - sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + sig_method.type = 'two.sided.approx', alpha = 0.05, + N.eff = NA, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -257,6 +272,28 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + if (Fair) { + if (!is.null(cat_dim)) { + if (cat_dim %in% names(dim(exp))) { + if (is.null(nmemb)) { + stop("Parameter 'nmemb' necessary to compute Fair", + "score from probabilities") + } + } + } + } + ## nmemb + if (!is.null(nmemb)) { + if (!is.numeric(nmemb) || length(nmemb) != 1) { + stop("Parameter 'nmemb' must be a single numeric value.") + } + } + ## nmemb_ref + if (!is.null(nmemb_ref)) { + if (!is.numeric(nmemb_ref) || length(nmemb_ref) != 1) { + stop("Parameter 'nmemb_ref' must be a single numeric value.") + } + } ## cross.val if (!is.logical(cross.val) | length(cross.val) > 1) { stop("Parameter 'cross.val' must be either TRUE or FALSE.") @@ -351,7 +388,33 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", - "= 0.05 only. Returning the significance at the 0.05 significance level.") + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop("If parameter 'N.eff' is provided with an array, it must ", + "have the same dimensions as 'obs' except 'time_dim'.") + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop("Parameter 'N.eff' must be NA, FALSE, a numeric, or an array with ", + "the same dimensions as 'obs' except 'time_dim'.") + } + if ((!is.na(N.eff) & !isFALSE(N.eff)) && sig_method.type == 'two.sided.approx') { + .warning("'N.eff' will not be used if 'sig_method.type' is 'two.sided.approx'.") + } + if (identical(N.eff, NA) && !is.null(cat_dim)) { + if (identical(sig_method.type,'two.sided.approx')) { + # N.eff is not needed for this method + N.eff <- FALSE + } else { + stop("'N.eff' cannot be NA if probabilities are already provided ", + "(cat_dim != NULL). Please compute 'N.eff' with s2dv::Eno and ", + "provide the result to this function.") + } } ## ncores if (!is.null(ncores)) { @@ -396,30 +459,84 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', target_dims = list(exp = target_dims_exp, obs = target_dims_obs) } - - output <- Apply(data, - target_dims = target_dims, - fun = .RPSS, - time_dim = time_dim, memb_dim = memb_dim, - cat_dim = cat_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - nmemb = nmemb, nmemb_ref = nmemb_ref, - weights_exp = weights_exp, - weights_ref = weights_ref, - cross.val = cross.val, - na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims) + 1] <- list(NULL) + if (!is.null(ref)) { + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } + } else { # N.eff not an array + if (!is.null(ref)) { + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } + } return(output) } -.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, - dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, - weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, - na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE, + nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE, sig_method.type = 'two.sided.approx', + alpha = 0.05, N.eff = NA) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -489,7 +606,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dum <- match(indices_for_clim, which(good_values)) good_indices_for_clim <- dum[!is.na(dum)] - if (f_NAs <= sum(good_values) / length(good_values)) { + if (f_NAs <= sum(good_values) / length(good_values) && + !all(good_values == FALSE)) { rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], obs = obs[good_values, , j], time_dim = time_dim, memb_dim = memb_dim, @@ -497,7 +615,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = prob_thresholds, indices_for_clim = good_indices_for_clim, Fair = Fair, nmemb = nmemb, - weights = weights_exp[good_values, , i], + weights = weights_exp[good_values, , i], cross.val = cross.val, na.rm = na.rm) rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], obs = obs[good_values, , j], @@ -506,7 +624,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = prob_thresholds, indices_for_clim = good_indices_for_clim, Fair = Fair, nmemb = nmemb_ref, - weights = weights_ref[good_values, , k], + weights = weights_ref[good_values, , k], na.rm = na.rm, cross.val = cross.val) } } @@ -517,7 +635,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, - nmemb = nmemb, weights = weights_exp, + nmemb = nmemb, weights = weights_exp, cross.val = cross.val, na.rm = na.rm) # RPS of the reference forecast @@ -539,7 +657,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (j in 1:nobs) { # Use good values only good_values <- !is.na(rps_exp[, i, j]) - if (f_NAs <= sum(good_values) / length(good_values)) { + if (f_NAs <= sum(good_values) / length(good_values) && + !all(good_values == FALSE)) { obs_data <- obs[good_values, , j] if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) @@ -606,10 +725,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!any(ind_nonNA)) { sign[i, j] <- NA } else { + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, , j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, i, j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } @@ -627,12 +749,16 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } else { # rps_exp and rps_ref: [sdate] rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } return(list(rpss = rpss, sign = sign)) } +