diff --git a/R/RPS.R b/R/RPS.R index 84956ac85a2ee39250e793ec80dab9f26df75eaa..8ded53e6d1546373f065bca04f07d12bebfbc73a 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -28,6 +28,11 @@ #'@param Fair A logical indicating whether to compute the FairRPSS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. +#'@param weights A named two-dimensional numerical array of the weights for each +#' member and time. The dimension names should include 'memb_dim' and +#' 'time_dim'. The default value is NULL. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -47,8 +52,8 @@ #'@importFrom easyVerification convert2prob #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + weights = NULL, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -106,6 +111,18 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop('Parameter "weights" must be a two-dimensional numeric array.') + if (length(dim(weights)) != 2 | any(!names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -122,7 +139,6 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', } else { target_dims_obs <- c(time_dim, memb_dim) } - rps <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c(time_dim, memb_dim), obs = target_dims_obs), @@ -130,8 +146,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', fun = .RPS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores)$output1 - + weights = weights, ncores = ncores)$output1 + # browser() # Return only the mean RPS rps <- MeanDims(rps, time_dim, na.rm = FALSE) @@ -140,15 +156,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { + indices_for_clim = NULL, Fair = FALSE, weights = NULL) { # exp: [sdate, memb] # obs: [sdate, (memb)] - - exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) + exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, weights = weights) # exp_probs: [bin, sdate] obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) + prob_thresholds = prob_thresholds, weights = NULL) # obs_probs: [bin, sdate] probs_exp_cumsum <- apply(exp_probs, 2, cumsum) @@ -168,7 +182,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', return(rps) } -.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { +.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weights = NULL) { # if exp: [sdate, memb] # if obs: [sdate, (memb)] @@ -176,7 +190,15 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) # Absolute thresholds - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } # Probabilities probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] @@ -184,10 +206,43 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (anyNA(data[i_time, ])) { probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) } else { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = c(bin = length(quantiles))) + for (i_quant in 1:length(quantiles)) { + # for thresholds falling under the distribution + if (quantiles[i_quant] < min(sorted_data)) { + integrated_probs[i_quant] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant]) { + integrated_probs[i_quant] <- 1 + } else { + integrated_probs[i_quant] <- approx(sorted_data, cumulative_weights, quantiles[i_quant], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs, 1) - append(0, integrated_probs) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } } } - return(probs) } +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} diff --git a/R/RPSS.R b/R/RPSS.R index 04c31378f6e1d733d83a5fb1ea1e5d297612973b..3b24777ddde51eaab88286ca6479a07724e7eeb3 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -34,6 +34,11 @@ #'@param Fair A logical indicating whether to compute the FairRPSS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. +#'@param weights A named two-dimensional numerical array of the weights for each +#' member and time. The dimension names should include 'memb_dim' and +#' 'time_dim'. The default value is NULL. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -62,7 +67,7 @@ #'@export RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { + weights = NULL, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -135,6 +140,18 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop('Parameter "weights" must be a two-dimensional numeric array.') + if (length(dim(weights)) != 2 | any(!names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -167,25 +184,26 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', fun = .RPSS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, ncores = ncores) return(output) } .RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { + indices_for_clim = NULL, Fair = FALSE, weights = NULL) { # exp: [sdate, memb] # obs: [sdate, (memb)] # ref: [sdate, memb] or NULL # RPS of the forecast rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair) + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights) # RPS of the reference forecast if (is.null(ref)) { ## using climatology as reference forecast obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) + prob_thresholds = prob_thresholds, weights = NULL) # obs_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) @@ -209,7 +227,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } else { # use "ref" as reference forecast rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair) + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights) } # RPSS diff --git a/man/RPS.Rd b/man/RPS.Rd index d1acc0282797fe813543f0ba47298bf0e1801148..5062671e53b053fc9aa58b395456cca7afd5c528 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -12,6 +12,7 @@ RPS( prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + weights = NULL, ncores = NULL ) } @@ -40,6 +41,10 @@ the whole period is used. The default value is NULL.} potential RPSS that the forecast would have with an infinite ensemble size). The default value is FALSE.} +\item{weights}{A named two-dimensional numerical array of the weights for each +member and time. The dimension names should include 'memb_dim' and +'time_dim'. The default value is NULL.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/man/RPSS.Rd b/man/RPSS.Rd index 6324ea2e1bf8537d5dff33ea137ea1a8b41e1ab1..ca33c07174cb051fab060cdf3cad2b778c5cd755 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -13,6 +13,7 @@ RPSS( prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + weights = NULL, ncores = NULL ) } @@ -47,6 +48,10 @@ the whole period is used. The default value is NULL.} potential RPSS that the forecast would have with an infinite ensemble size). The default value is FALSE.} +\item{weights}{A named two-dimensional numerical array of the weights for each +member and time. The dimension names should include 'memb_dim' and +'time_dim'. The default value is NULL.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index e5a5f0c9f3027cbdb5a3d2cb7183688bf5e3e32f..04df1bb397b6906c10a16a5be1f92c1858299103 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -7,7 +7,8 @@ set.seed(1) exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) set.seed(2) obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) - +set.seed(3) +weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) # dat2 set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -76,6 +77,19 @@ test_that("1. Input checks", { RPS(exp1, obs1, Fair = 1), "Parameter 'Fair' must be either TRUE or FALSE." ) + # weights + expect_error( + RPS(exp1, obs1, weights = c(0, 1)), + 'Parameter "weights" must be a two-dimensional numeric array.' + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, time = 10))), + "Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim." + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, sdate = 1))), + "Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'." + ) # ncores expect_error( RPS(exp2, obs2, ncores = 1.5), @@ -111,6 +125,11 @@ as.vector(RPS(exp1, obs1, prob_thresholds = seq(0.1, 0.9, 0.1))), c(1.600000, 1.888889), tolerance = 0.0001 ) +expect_equal( +as.vector(RPS(exp1, obs1, weights = weights1)), +c(0.3692964, 0.5346627), +tolerance = 0.0001 +) }) diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index b4c0b7a4fa5edb8804e08348ce4ec042de9adc4f..c63add02246ef4980a8c388bed7b54a239e62d24 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -9,7 +9,8 @@ set.seed(2) obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) set.seed(3) ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) - +set.seed(4) +weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) # dat2 set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -87,6 +88,19 @@ test_that("1. Input checks", { RPSS(exp1, obs1, Fair = 1), "Parameter 'Fair' must be either TRUE or FALSE." ) + # weights + expect_error( + RPS(exp1, obs1, weights = c(0, 1)), + 'Parameter "weights" must be a two-dimensional numeric array.' + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, time = 10))), + "Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim." + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, sdate = 1))), + "Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'." + ) # ncores expect_error( RPSS(exp2, obs2, ncores = 1.5), @@ -154,6 +168,11 @@ c(0.5259259, 0.4771242), tolerance = 0.0001 ) expect_equal( +as.vector(RPSS(exp1, obs1, ref1, weights = weights1)$rpss), +c(0.6596424, 0.4063579), +tolerance = 0.0001 +) +expect_equal( as.vector(RPSS(exp1, obs1, ref1)$sign), c(FALSE, FALSE) )