diff --git a/R/RPS.R b/R/RPS.R index 76c81d254bf8758327290ba8946f7dfc9e7aad77..6a89d6da92634bf8cb1acee7f5f6e9dee151a83c 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -39,6 +39,9 @@ #' ensemble should have at least 70 members or span at least 10 time steps and #' have more than 45 members if consistency between the weighted and unweighted #' methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds between +#' probabilistics categories in cross-validation. +#' The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -61,7 +64,7 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights = NULL, ncores = NULL) { + weights = NULL, cross.val = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -133,6 +136,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } ## weights if (!is.null(weights)) { if (!is.array(weights) | !is.numeric(weights)) @@ -184,7 +191,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL memb_dim = memb_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, - weights = weights, ncores = ncores)$output1 + weights = weights, cross.val = cross.val, ncores = ncores)$output1 # Return only the mean RPS rps <- MeanDims(rps, time_dim, na.rm = FALSE) @@ -194,7 +201,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL .RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights = NULL) { + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights = NULL, + cross.val = FALSE) { # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -232,10 +240,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL } exp_probs <- .get_probs(data = exp_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = weights_data) + prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) # exp_probs: [bin, sdate] obs_probs <- .get_probs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL) + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) # obs_probs: [bin, sdate] probs_exp_cumsum <- apply(exp_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) @@ -261,50 +269,71 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL return(rps) } -.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weights = NULL) { +.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weights = NULL, cross.val = FALSE) { # if exp: [sdate, memb] # if obs: [sdate, (memb)] # Add dim [memb = 1] to obs if it doesn't have memb_dim if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) - + # Absolute thresholds - if (is.null(weights)) { - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[,i] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[,i] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]),dim = c(length(quantiles), dim(data)[1])) } - + + # quantiles: [bin-1, sdate] # Probabilities - probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] for (i_time in 1:dim(data)[1]) { if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) } else { if (is.null(weights)) { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[,i_time])) } else { sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) sorted_data <- sorted_arrays$data cumulative_weights <- sorted_arrays$cumulative_weights # find any quantiles that are outside the data range - integrated_probs <- array(dim = c(bin = length(quantiles))) - for (i_quant in 1:length(quantiles)) { + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { # for thresholds falling under the distribution - if (quantiles[i_quant] < min(sorted_data)) { - integrated_probs[i_quant] <- 0 + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 # for thresholds falling over the distribution - } else if (max(sorted_data) < quantiles[i_quant]) { - integrated_probs[i_quant] <- 1 + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 } else { - integrated_probs[i_quant] <- approx(sorted_data, cumulative_weights, quantiles[i_quant], "linear")$y + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, quantiles[i_quant, i_time], + "linear")$y } } - probs[, i_time] <- append(integrated_probs, 1) - append(0, integrated_probs) + probs[, i_time] <- append(integrated_probs[,i_time], 1) - append(0, integrated_probs[,i_time]) if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) } diff --git a/R/RPSS.R b/R/RPSS.R index cf15b8f2195b79bdc657373d763a3ee8761952c0..efd79500892e59c474460506dd95d054f67a5471 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -53,6 +53,9 @@ #' time steps and have more than 45 members if consistency between the weighted #' and unweighted methodologies is desired. #'@param weights_ref Same as 'weights_exp' but for the reference forecast. +#'@param cross.val A logical indicating whether to compute the thresholds between +#' probabilistics categories in cross-validation. +#' The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -91,7 +94,8 @@ #'@export RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, weights_exp = NULL, weights_ref = NULL, ncores = NULL) { + Fair = FALSE, weights = NULL, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -198,6 +202,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } ## weights if (!is.null(weights)) { .warning(paste0("Parameter 'weights' is deprecated and will be removed in the next release. ", @@ -301,6 +309,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', indices_for_clim = indices_for_clim, Fair = Fair, weights_exp = weights_exp, weights_ref = weights_ref, + cross.val = cross.val, ncores = ncores) return(output) @@ -309,7 +318,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - weights_exp = NULL, weights_ref = NULL) { + weights_exp = NULL, weights_ref = NULL, cross.val = FALSE) { # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -326,7 +335,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', # RPS of the forecast rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, weights = weights_exp) + Fair = Fair, weights = weights_exp, cross.val = cross.val) # RPS of the reference forecast if (is.null(ref)) { ## using climatology as reference forecast @@ -343,7 +352,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) # obs_probs: [bin, sdate] obs_probs <- .get_probs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL) + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) # clim_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) @@ -380,7 +389,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - Fair = Fair, weights = weights_ref) + Fair = Fair, weights = weights_ref, cross.val = cross.val) if (!is.null(dat_dim)) { if (isTRUE(remove_dat_dim)) { dim(rps_ref) <- dim(rps_ref)[-2] diff --git a/man/RPS.Rd b/man/RPS.Rd index 4d8236bba260d14f016ebb82304c536d545d01dd..16b7a9078f08b46408c52162d26ae22b6bd09632 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -14,6 +14,7 @@ RPS( indices_for_clim = NULL, Fair = FALSE, weights = NULL, + cross.val = FALSE, ncores = NULL ) } @@ -54,6 +55,10 @@ ensemble should have at least 70 members or span at least 10 time steps and have more than 45 members if consistency between the weighted and unweighted methodologies is desired.} +\item{cross.val}{A logical indicating whether to compute the thresholds between +probabilistics categories in cross-validation. +The default value is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/man/RPSS.Rd b/man/RPSS.Rd index a68f21ca4b864411bc19f6a71aa612c910493d66..d70425e62f7df67e3a3c5ef75b0ff9163fb98389 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -17,6 +17,7 @@ RPSS( weights = NULL, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, ncores = NULL ) } @@ -72,6 +73,10 @@ time steps and have more than 45 members if consistency between the weighted \item{weights_ref}{Same as 'weights_exp' but for the reference forecast.} +\item{cross.val}{A logical indicating whether to compute the thresholds between +probabilistics categories in cross-validation. +The default value is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index 9363a419f566712ce7fbd9d4a1c6f511b1c028eb..51ba992d93b3e1ee4828bb095bbe363126a8b716 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -84,6 +84,11 @@ test_that("1. Input checks", { RPS(exp1, obs1, Fair = 1), "Parameter 'Fair' must be either TRUE or FALSE." ) + # cross.val + expect_error( + RPS(exp1, obs1, cross.val = 1), + "Parameter 'cross.val' must be either TRUE or FALSE." + ) # weights expect_error( RPS(exp1, obs1, weights = c(0, 1)), @@ -141,6 +146,20 @@ test_that("2. Output checks: dat1", { c(0.3692964, 0.5346627), tolerance = 0.0001 ) + expect_equal( + dim(RPS(exp1, obs1, cross.val = T)), + c(lat = 2) + ) + expect_equal( + as.vector(RPS(exp1, obs1, cross.val = T)), + c(0.3111111, 0.5333333), + tolerance = 0.0001 + ) + expect_equal( + as.vector(RPS(exp1, obs1, cross.val = T, weights = weights1)), + c(0.3559286, 0.6032109), + tolerance = 0.0001 + ) }) diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index fc616e6d6890fc45e94ed91a7355ae8ea3c02558..36efee8df0c317e06a46439643da8dc81a0561ee 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -116,6 +116,11 @@ test_that("1. Input checks", { RPSS(exp1, obs1, Fair = 1), "Parameter 'Fair' must be either TRUE or FALSE." ) + # cross.val + expect_error( + RPSS(exp1, obs1, cross.val = 1), + "Parameter 'cross.val' must be either TRUE or FALSE." + ) # weights_exp and weights_ref expect_error( RPSS(exp1, obs1, weights_exp = c(0, 1)), @@ -235,6 +240,21 @@ test_that("2. Output checks: dat1", { as.vector(RPSS(exp1, obs1, ref1, prob_thresholds = seq(0.1, 0.9, 0.1))$sign), c(T, F) ) + expect_equal( + names(RPSS(exp1, obs1, cross.val = T)), + c("rpss", "sign") + ) + expect_equal( + as.vector(RPSS(exp1, obs1, cross.val = T)$rpss), + c(0.2631579, -0.1707317), + tolerance = 0.0001 + ) + expect_equal( + as.vector(RPSS(exp1, obs1, ref1, weights_exp = weights1, weights_ref = weights1, cross.val = T)$rpss), + c(0.6689428, 0.4396209), + tolerance = 0.0001 + ) + })