diff --git a/DESCRIPTION b/DESCRIPTION index ec571f4ce6bd0ba036ae274b7e085237d6c19863..bd6fe2dbe1028b0bef5c263d36a41515c79fe15a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,5 +50,5 @@ BugReports: https://earth.bsc.es/gitlab/es/s2dv/-/issues LazyData: true SystemRequirements: cdo Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.3.1 Config/testthat/edition: 3 diff --git a/R/CRPS.R b/R/CRPS.R index 6524beef0aa4ea4c3a66683abc4874b4d8d1cc51..bb63095c0e0e58f18a6c24c70d747776af0325be 100644 --- a/R/CRPS.R +++ b/R/CRPS.R @@ -25,6 +25,9 @@ #'@param Fair A logical indicating whether to compute the FairCRPS (the #' potential CRPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the CRPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -47,7 +50,7 @@ #'@importFrom ClimProjDiags Subset #'@export CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - Fair = FALSE, ncores = NULL) { + Fair = FALSE, return_mean = TRUE, ncores = NULL) { # Check inputs ## exp and obs (1) if (!is.array(exp) | !is.numeric(exp)) @@ -103,9 +106,13 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU "all dimensions except 'memb_dim' and 'dat_dim'.") } ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { + if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -124,17 +131,21 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU Fair = Fair, ncores = ncores)$output1 - # Return only the mean CRPS - crps <- MeanDims(crps, time_dim, na.rm = FALSE) + if (return_mean) { + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + } else { + crps <- crps + } return(crps) } .CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE) { + # exp: [sdate, memb, (dat_dim)] # obs: [sdate, (dat_dim)] - + # Adjust dimensions if needed if (is.null(dat_dim)) { nexp <- 1 @@ -145,28 +156,28 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU nexp <- as.numeric(dim(exp)[dat_dim]) nobs <- as.numeric(dim(obs)[dat_dim]) } - + # for FairCRPS R_new <- ifelse(Fair, Inf, NA) - + CRPS <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) - + for (i in 1:nexp) { for (j in 1:nobs) { exp_data <- exp[, , i] obs_data <- obs[, j] - + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) - + crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) CRPS[, i, j] <- crps } } - + if (is.null(dat_dim)) { dim(CRPS) <- c(dim(CRPS)[time_dim]) } - + return(CRPS) } diff --git a/R/RPS.R b/R/RPS.R index 29f9bc318b7b36cc98fd344b59c0ada1ed4e59e6..59b2d01a0d842967cdaa4d0351ca1321e19ccf8c 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -52,6 +52,9 @@ #'@param cross.val A logical indicating whether to compute the thresholds #' between probabilistic categories in cross-validation. The default value is #' FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. #'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it #' means the lower limit for the fraction of the non-NA values. 1 is equal to #' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). @@ -85,7 +88,8 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE, ncores = NULL) { + Fair = FALSE, weights = NULL, cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -171,9 +175,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } } ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { + if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } ## cross.val if (!is.logical(cross.val) | length(cross.val) > 1) { stop("Parameter 'cross.val' must be either TRUE or FALSE.") @@ -192,7 +200,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL "as 'memb_dim' and 'time_dim' in 'exp'.") } weights <- Reorder(weights, c(time_dim, memb_dim)) - + } else { if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) stop("Parameter 'weights' must have three dimensions with the names of ", @@ -204,7 +212,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") } weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) - + } } else if (!is.null(weights) & !is.null(cat_dim)) { .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", @@ -218,15 +226,15 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + length(ncores) > 1) { stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - + ############################### - + # Compute RPS - + ## Decide target_dims if (!is.null(memb_dim)) { target_dims_exp <- c(time_dim, memb_dim, dat_dim) @@ -238,7 +246,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } else { # cat_dim target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } - + rps <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = target_dims_exp, obs = target_dims_obs), @@ -249,10 +257,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL indices_for_clim = indices_for_clim, Fair = Fair, weights = weights, cross.val = cross.val, na.rm = na.rm, ncores = ncores)$output1 - - # Return only the mean RPS - rps <- MeanDims(rps, time_dim, na.rm = TRUE) - + + if (return_mean) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + return(rps) } @@ -267,14 +278,14 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL #--- if cat_dim: # exp: [sdate, bin, (dat)] # obs: [sdate, bin, (dat)] - + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs if (!is.null(memb_dim)) { if (!memb_dim %in% names(dim(obs))) { obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) } } - + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -285,17 +296,17 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL nexp <- as.numeric(dim(exp)[dat_dim]) nobs <- as.numeric(dim(obs)[dat_dim]) } - + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) - + for (i in 1:nexp) { for (j in 1:nobs) { exp_data <- exp[, , i] obs_data <- obs[, , j] - + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - + # Find the fraction of NAs ## If any member/bin is NA at this time step, it is not good value. exp_mean <- rowMeans(exp_data) @@ -309,7 +320,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } else { f_NAs <- na.rm } - + if (f_NAs <= sum(good_values) / length(obs_mean)) { exp_data <- exp_data[good_values, , drop = F] @@ -323,11 +334,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } else { weights_data <- weights #NULL } - + # Subset indices_for_clim dum <- match(indices_for_clim, which(good_values)) good_indices_for_clim <- dum[!is.na(dum)] - + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) @@ -347,7 +358,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL # rps: [sdate, nexp, nobs] rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) - + if (Fair) { # FairRPS ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) ## [formula taken from SpecsVerification::EnsRps] @@ -365,11 +376,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } } - + if (is.null(dat_dim)) { dim(rps) <- dim(exp)[time_dim] } - + return(rps) } diff --git a/man/CRPS.Rd b/man/CRPS.Rd index 453c1994608459bdb95eeb36149915b69599f19d..97e6a4838cee93f11a8ebb908ca51d27dd719d1f 100644 --- a/man/CRPS.Rd +++ b/man/CRPS.Rd @@ -11,6 +11,7 @@ CRPS( memb_dim = "member", dat_dim = NULL, Fair = FALSE, + return_mean = TRUE, ncores = NULL ) } @@ -36,6 +37,10 @@ default value is NULL.} potential CRPS that the forecast would have with an infinite ensemble size). The default value is FALSE.} +\item{return_mean}{A logical indicating whether to return the temporal mean +of the CRPS or not. If TRUE, the temporal mean is calculated along time_dim, +if FALSE the time dimension is not aggregated. The default is TRUE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/man/RPS.Rd b/man/RPS.Rd index 041ca0779961570b72cb3349dc8669f3aff0b525..b1374db10226d970d1a4c089f5999a30b011d4da 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -16,6 +16,7 @@ RPS( Fair = FALSE, weights = NULL, cross.val = FALSE, + return_mean = TRUE, na.rm = FALSE, ncores = NULL ) @@ -68,6 +69,10 @@ the weighted and unweighted methodologies is desired.} between probabilistic categories in cross-validation. The default value is FALSE.} +\item{return_mean}{A logical indicating whether to return the temporal mean +of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +if FALSE the time dimension is not aggregated. The default is TRUE.} + \item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric, it means the lower limit for the fraction of the non-NA values. 1 is equal to FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable).