diff --git a/R/RPS.R b/R/RPS.R index 84956ac85a2ee39250e793ec80dab9f26df75eaa..e0e19b43c92a2bdfa853bcc9be0211f1adcb17a3 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -48,7 +48,7 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { + ncores = NULL, weights = NULL) { # Check inputs ## exp and obs (1) @@ -122,7 +122,6 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', } else { target_dims_obs <- c(time_dim, memb_dim) } - rps <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = c(time_dim, memb_dim), obs = target_dims_obs), @@ -130,8 +129,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', fun = .RPS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores)$output1 - + ncores = ncores, weights = weights)$output1 + # browser() # Return only the mean RPS rps <- MeanDims(rps, time_dim, na.rm = FALSE) @@ -140,12 +139,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', .RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { + indices_for_clim = NULL, Fair = FALSE, weights = NULL) { # exp: [sdate, memb] # obs: [sdate, (memb)] - - exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) + exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, weights = weights) # exp_probs: [bin, sdate] obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) @@ -168,7 +165,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', return(rps) } -.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { +.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weights = NULL) { # if exp: [sdate, memb] # if obs: [sdate, (memb)] @@ -176,18 +173,59 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) # Absolute thresholds - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) - + if (is.null(weights)){ + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + weights <- s2dv::Reorder(data = weights, order = names(dim(data))) # move to check or in .rps ******* + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } # Probabilities probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] for (i_time in 1:dim(data)[1]) { if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) } else { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + if (is.null(weights)){ + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = c(bin=length(quantiles))) + for (i_quant in 1:length(quantiles)) { + # for thresholds falling under the distribution + if (quantiles[i_quant] < min(sorted_data)) { + integrated_probs[i_quant] = 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant]) { + integrated_probs[i_quant] = 1 + } else { + integrated_probs[i_quant] = approx(sorted_data, cumulative_weights, quantiles[i_quant], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs, 1) - append(0, integrated_probs) + # This shouldn't happen, idk if this cehck is needed then + if (min(probs[, i_time])<0) {print(paste0("ERROR negative probability in i_time=",i_time))} ######################## checks + if (max(probs[, i_time])>1) {print(paste0("ERROR >1 probability in i_time=",i_time))} ######################## checks + } } - } + } return(probs) } +.sorted_distributions <- function(data_vector, weights_vector){ + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} \ No newline at end of file