#'Compute the Ranked Probability Skill Score #' #'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score #'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to #'assess whether a forecast presents an improvement or worsening with respect to #'a reference forecast. The RPSS ranges between minus infinite and 1. If the #'RPSS is positive, it indicates that the forecast has higher skill than the #'reference forecast, while a negative value means that it has a lower skill.\cr #'Examples of reference forecasts are the climatological forecast (same #'probabilities for all categories for all time steps), persistence, a previous #'model version, and another model. It is computed as #'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained #'based on a Random Walk test at the specified confidence level (DelSole and #'Tippett, 2016).\cr #'The function accepts either the ensemble members or the probabilities of #'each data as inputs. If there is more than one dataset, RPSS will be #'computed for each pair of exp and obs data. The NA ratio of data will be #'examined before the calculation. If the ratio is higher than the threshold #'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are #'counted by per-pair method, which means that only the time steps that all the #'datasets have values count as non-NA values. #' #'@param exp A named numerical array of either the forecast with at least time #' and member dimensions, or the probabilities with at least time and category #' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. #'@param obs A named numerical array of either the observation with at least #' time dimension, or the probabilities with at least time and category #' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The #' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. #'@param ref A named numerical array of either the reference forecast with at #' least time and member dimensions, or the probabilities with at least time and #' category dimensions. The probabilities can be generated by #' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except #' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should #' not have dataset dimension. If there is corresponding reference for each #' experiment, the dataset dimension must have the same length as in 'exp'. If #' 'ref' is NULL, the climatological forecast is used as reference forecast. #' The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The #' default value is 'member'. If the data are probabilities, set memb_dim as #' NULL. #'@param cat_dim A character string indicating the name of the category #' dimension that is needed when exp, obs, and ref are probabilities. The #' default value is NULL, which means that the data are not probabilities. #'@param dat_dim A character string indicating the name of dataset dimension. #' The length of this dimension can be different between 'exp' and 'obs'. #' The default value is NULL. #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. #'@param indices_for_clim A vector of the indices to be taken along 'time_dim' #' for computing the thresholds between the probabilistic categories. If NULL, #' the whole period is used. The default value is NULL. #'@param Fair A logical indicating whether to compute the FairRPSS (the #' potential RPSS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. #'@param weights_exp A named numerical array of the forecast ensemble weights #' for probability calculation. The dimension should include 'memb_dim', #' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension #' lengths must be equal to 'exp' dimension lengths. The default value is NULL, #' which means no weighting is applied. The ensemble should have at least 70 #' members or span at least 10 time steps and have more than 45 members if #' consistency between the weighted and unweighted methodologies is desired. #'@param weights_ref Same as 'weights_exp' but for the reference forecast. #'@param cross.val A logical indicating whether to compute the thresholds #' between probabilistics categories in cross-validation. The default value is #' FALSE. #'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it #' means the lower limit for the fraction of the non-NA values. 1 is equal to #' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). # The function returns NA if the fraction of non-NA values in the data is less #' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. #'@param sig_method.type A character string indicating the test type of the #' significance method. Check \code{RandomWalkTest()} parameter #' \code{test.type} for details. The default is 'two.sided.approx', which is #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return #'\item{$rpss}{ #' A numerical array of RPSS with dimensions c(nexp, nobs, the rest dimensions #' of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of #' experiment (i.e., dat_dim in exp), and nobs is the number of observation #' i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. #'} #'\item{$sign}{ #' A logical array of the statistical significance of the RPSS with the same #' dimensions as $rpss. #'} #' #'@references #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 #'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 #' #'@examples #'set.seed(1) #'exp <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #'set.seed(2) #'obs <- array(rnorm(300), dim = c(lat = 3, lon = 2, sdate = 50)) #'set.seed(3) #'ref <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) #'weights <- sapply(1:dim(exp)['sdate'], function(i) { #' n <- abs(rnorm(10)) #' n/sum(n) #' }) #'dim(weights) <- c(member = 10, sdate = 50) #'# Use data as input #'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast #'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) #'res <- RPSS(exp = exp, obs = obs, alpha = 0.01, sig_method.type = 'two.sided') #' #'# Use probs as input #'exp_probs <- GetProbs(exp, memb_dim = 'member') #'obs_probs <- GetProbs(obs, memb_dim = NULL) #'ref_probs <- GetProbs(ref, memb_dim = 'member') #'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, #' cat_dim = 'bin') #' #'@import multiApply #'@export RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) if (!is.array(exp) | !is.numeric(exp)) { stop("Parameter 'exp' must be a numeric array.") } if (!is.array(obs) | !is.numeric(obs)) { stop("Parameter 'obs' must be a numeric array.") } if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if (!is.null(ref)) { if (!is.array(ref) | !is.numeric(ref)) stop("Parameter 'ref' must be a numeric array.") if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { stop("Parameter 'ref' must have dimension names.") } } ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") } if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } if (!is.null(ref) & !time_dim %in% names(dim(ref))) { stop("Parameter 'time_dim' is not found in 'ref' dimension.") } ## memb_dim & cat_dim if (is.null(memb_dim) + is.null(cat_dim) != 1) { stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") } ## memb_dim if (!is.null(memb_dim)) { if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { stop("Parameter 'memb_dim' is not found in 'ref' dimension.") } } ## cat_dim if (!is.null(cat_dim)) { if (!is.character(cat_dim) | length(cat_dim) > 1) { stop("Parameter 'cat_dim' must be a character string.") } if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") } } ## dat_dim if (!is.null(dat_dim)) { if (!is.character(dat_dim) | length(dat_dim) > 1) { stop("Parameter 'dat_dim' must be a character string.") } if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", " Set it as NULL if there is no dataset dimension.") } } ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(memb_dim)) { name_exp <- name_exp[-which(name_exp == memb_dim)] if (memb_dim %in% name_obs) { name_obs <- name_obs[-which(name_obs == memb_dim)] } } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop("Parameter 'exp' and 'obs' must have same length of ", "all dimensions except 'memb_dim' and 'dat_dim'.") } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) if (!is.null(memb_dim)) { name_ref <- name_ref[-which(name_ref == memb_dim)] } if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { stop("If parameter 'ref' has dataset dimension, it must be", " equal to dataset dimension of 'exp'.") } name_ref <- name_ref[-which(name_ref == dat_dim)] } } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { stop("Parameter 'exp' and 'ref' must have the same length of ", "all dimensions except 'memb_dim' and 'dat_dim' if there is ", "only one reference dataset.") } } ## prob_thresholds if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") } ## indices_for_clim if (is.null(indices_for_clim)) { indices_for_clim <- seq_len(dim(obs)[time_dim]) } else { if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") } else if (length(indices_for_clim) > dim(obs)[time_dim] | max(indices_for_clim) > dim(obs)[time_dim] | any(indices_for_clim) < 1) { stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") } } ## Fair if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } ## cross.val if (!is.logical(cross.val) | length(cross.val) > 1) { stop("Parameter 'cross.val' must be either TRUE or FALSE.") } ## weights_exp if (!is.null(weights_exp) & is.null(cat_dim)) { if (!is.array(weights_exp) | !is.numeric(weights_exp)) stop("Parameter 'weights_exp' must be a named numeric array.") if (is.null(dat_dim)) { if (length(dim(weights_exp)) != 2 | !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) { stop("Parameter 'weights_exp' must have two dimensions with the names of ", "'memb_dim' and 'time_dim'.") } if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | dim(weights_exp)[time_dim] != dim(exp)[time_dim]) { stop("Parameter 'weights_exp' must have the same dimension lengths as ", "'memb_dim' and 'time_dim' in 'exp'.") } weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim)) } else { if (length(dim(weights_exp)) != 3 | !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) { stop("Parameter 'weights_exp' must have three dimensions with the names of ", "'memb_dim', 'time_dim' and 'dat_dim'.") } if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | dim(weights_exp)[time_dim] != dim(exp)[time_dim] | dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) { stop("Parameter 'weights_exp' must have the same dimension lengths ", "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") } weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) } } else if (!is.null(weights_exp) & !is.null(cat_dim)) { .warning(paste0("Parameter 'exp' is probability already, so parameter ", "'weights_exp' is not used. Change 'weights_exp' to NULL.")) weights_exp <- NULL } ## weights_ref if (!is.null(weights_ref) & is.null(cat_dim)) { if (!is.array(weights_ref) | !is.numeric(weights_ref)) stop("Parameter 'weights_ref' must be a named numeric array.") if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { if (length(dim(weights_ref)) != 2 | !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) { stop("Parameter 'weights_ref' must have two dimensions with the names of ", "'memb_dim' and 'time_dim'.") } if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] | dim(weights_ref)[time_dim] != dim(exp)[time_dim]) { stop("Parameter 'weights_ref' must have the same dimension lengths as ", "'memb_dim' and 'time_dim' in 'ref'.") } weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim)) } else { if (length(dim(weights_ref)) != 3 | !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) { stop("Parameter 'weights_ref' must have three dimensions with the names of ", "'memb_dim', 'time_dim' and 'dat_dim'.") } if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] | dim(weights_ref)[time_dim] != dim(ref)[time_dim] | dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) { stop("Parameter 'weights_ref' must have the same dimension lengths ", "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.") } weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) } } else if (!is.null(weights_ref) & !is.null(cat_dim)) { .warning(paste0("Parameter 'ref' is probability already, so parameter ", "'weights_ref' is not used. Change 'weights_ref' to NULL.")) weights_ref <- NULL } ## na.rm if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') } ## alpha if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { stop("Parameter 'alpha' must be a number between 0 and 1.") } ## sig_method.type #NOTE: These are the types of RandomWalkTest() if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", "'greater', or 'less'.") } if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", "= 0.05 only. Returning the significance at the 0.05 significance level.") } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be either NULL or a positive integer.") } } ############################### # Compute RPSS ## Decide target_dims if (!is.null(memb_dim)) { target_dims_exp <- c(time_dim, memb_dim, dat_dim) if (!memb_dim %in% names(dim(obs))) { target_dims_obs <- c(time_dim, dat_dim) } else { target_dims_obs <- c(time_dim, memb_dim, dat_dim) } } else { # cat_dim target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } if (!is.null(ref)) { # use "ref" as reference forecast if (!is.null(memb_dim)) { if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { target_dims_ref <- c(time_dim, memb_dim, dat_dim) } else { target_dims_ref <- c(time_dim, memb_dim) } } else { target_dims_ref <- c(time_dim, cat_dim, dat_dim) } data <- list(exp = exp, obs = obs, ref = ref) target_dims = list(exp = target_dims_exp, obs = target_dims_obs, ref = target_dims_ref) } else { data <- list(exp = exp, obs = obs) target_dims = list(exp = target_dims_exp, obs = target_dims_obs) } output <- Apply(data, target_dims = target_dims, fun = .RPSS, time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, nmemb = nmemb, nmemb_ref = nmemb_ref, weights_exp = weights_exp, weights_ref = weights_ref, cross.val = cross.val, na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, ncores = ncores) return(output) } .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] # ref: [sdate, memb, (dat)] or NULL #--- if cat_dim: # exp: [sdate, bin, (dat)] # obs: [sdate, bin, (dat)] # ref: [sdate, bin, (dat)] or NULL if (isTRUE(na.rm)) { f_NAs <- 0 } else if (isFALSE(na.rm)) { f_NAs <- 1 } else { f_NAs <- na.rm } if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 } else { nexp <- as.numeric(dim(exp)[dat_dim]) nobs <- as.numeric(dim(obs)[dat_dim]) } # Calculate RPS if (!is.null(ref)) { # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref ## Insert memb_dim in obs if (!is.null(memb_dim)) { if (!memb_dim %in% names(dim(obs))) { obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) } } ## Insert dat_dim if (is.null(dat_dim)) { dim(obs) <- c(dim(obs), dat = nobs) dim(exp) <- c(dim(exp), dat = nexp) if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp) } if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) { nref <- 1 dim(ref) <- c(dim(ref), dat = nref) if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref) } else { nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp } # Find good values then calculate RPS rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { for (k in 1:nref) { if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps next } exp_data <- exp[, , i, drop = F] obs_data <- obs[, , j, drop = F] ref_data <- ref[, , k, drop = F] exp_mean <- rowMeans(exp_data) obs_mean <- rowMeans(obs_data) ref_mean <- rowMeans(ref_data) good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean) dum <- match(indices_for_clim, which(good_values)) good_indices_for_clim <- dum[!is.na(dum)] if (f_NAs <= sum(good_values) / length(good_values)) { rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], obs = obs[good_values, , j], time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, dat_dim = NULL, prob_thresholds = prob_thresholds, indices_for_clim = good_indices_for_clim, Fair = Fair, nmemb = nmemb, weights = weights_exp[good_values, , i], cross.val = cross.val, na.rm = na.rm) rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], obs = obs[good_values, , j], time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, dat_dim = NULL, prob_thresholds = prob_thresholds, indices_for_clim = good_indices_for_clim, Fair = Fair, nmemb = nmemb_ref, weights = weights_ref[good_values, , k], na.rm = na.rm, cross.val = cross.val) } } } } } else { # ref is NULL rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, nmemb = nmemb, weights = weights_exp, cross.val = cross.val, na.rm = na.rm) # RPS of the reference forecast if (!is.null(memb_dim)) { if (!memb_dim %in% names(dim(obs))) { obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) } } rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs)) if (is.null(dat_dim)) { dim(obs) <- c(dim(obs), nobs = nobs) dim(exp) <- c(dim(exp), nexp = nexp) dim(rps_exp) <- dim(rps_ref) } for (i in 1:nexp) { for (j in 1:nobs) { # Use good values only good_values <- !is.na(rps_exp[, i, j]) if (f_NAs <= sum(good_values) / length(good_values)) { obs_data <- obs[good_values, , j] if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) if (is.null(cat_dim)) { # calculate probs # Subset indices_for_clim dum <- match(indices_for_clim, which(good_values)) good_indices_for_clim <- dum[!is.na(dum)] obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) } else { obs_probs <- t(obs_data) } # obs_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) # clim_probs: [bin, sdate] # Calculate RPS for each time step probs_clim_cumsum <- apply(clim_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) } if (Fair) { # FairRPS if (!is.null(memb_dim)) { if (memb_dim %in% names(dim(exp))) { ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) ## [formula taken from SpecsVerification::EnsRps] R <- dim(obs)[1] #number of years } } else { R <- nmemb_ref } adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) adjustment <- colSums(adjustment) rps_ref[, i, j] <- rps_ref[, i, j] + adjustment } } } } if (is.null(dat_dim)) { dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim] } #---------------------------------------------- # Calculate RPSS if (!is.null(dat_dim)) { # rps_exp and rps_ref: [sdate, nexp, nobs] rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE) rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE) rpss <- array(dim = c(nexp = nexp, nobs = nobs)) sign <- array(dim = c(nexp = nexp, nobs = nobs)) if (!all(is.na(rps_exp_mean))) { for (i in 1:nexp) { for (j in 1:nobs) { rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] ind_nonNA <- !is.na(rps_exp[, i, j]) if (!any(ind_nonNA)) { sign[i, j] <- NA } else { sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, i, j], test.type = sig_method.type, alpha = alpha, sign = T, pval = F)$sign } } } } # Turn NaN into NA if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA } else { # dat_dim is NULL ind_nonNA <- !is.na(rps_exp) if (!any(ind_nonNA)) { rpss <- NA sign <- NA } else { # rps_exp and rps_ref: [sdate] rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], test.type = sig_method.type, alpha = alpha, sign = T, pval = F)$sign } } return(list(rpss = rpss, sign = sign)) }