diff --git a/R/RPS.R b/R/RPS.R index c5ff5ba483c307adc1f3110599ecbf2b904d829d..c385f10cf32097c58e94386515762fcb72200405 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -13,7 +13,8 @@ #'The function first calculates the probabilities for forecasts and observations, #'then use them to calculate RPS. Or, the probabilities of exp and obs can be #'provided directly to compute the score. If there is more than one dataset, RPS -#'will be computed for each pair of exp and obs data. +#'will be computed for each pair of exp and obs data. The fraction of acceptable +#'NAs can be adjusted. #' #'@param exp A named numerical array of either the forecasts with at least time #' and member dimensions, or the probabilities with at least time and category @@ -48,9 +49,14 @@ #' default value is NULL. The ensemble should have at least 70 members or span #' at least 10 time steps and have more than 45 members if consistency between #' the weighted and unweighted methodologies is desired. -#'@param cross.val A logical indicating whether to compute the thresholds between -#' probabilistic categories in cross-validation. -#' The default value is FALSE. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -79,7 +85,7 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, cross.val = FALSE, ncores = NULL) { + Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -203,6 +209,10 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL "'weights' is not used. Change 'weights' to NULL.")) weights <- NULL } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -235,10 +245,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL memb_dim = memb_dim, cat_dim = cat_dim, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, Fair = Fair, - weights = weights, cross.val = cross.val, ncores = ncores)$output1 + weights = weights, cross.val = cross.val, + na.rm = na.rm, ncores = ncores)$output1 # Return only the mean RPS - rps <- MeanDims(rps, time_dim, na.rm = FALSE) + rps <- MeanDims(rps, time_dim, na.rm = TRUE) return(rps) } @@ -246,7 +257,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL .RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, cross.val = FALSE) { + Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -283,40 +294,71 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - # If the data inputs are forecast/observation, calculate probabilities - if (is.null(cat_dim)) { - if (!is.null(weights)) { - weights_data <- weights[ , , i] - if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) - } else { - weights_data <- weights - } - - exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) - # exp_probs: [bin, sdate] - obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) - # obs_probs: [bin, sdate] - - } else { # inputs are probabilities already - exp_probs <- t(exp_data) - obs_probs <- t(obs_data) + # Find the fraction of NAs + ## If any member/bin is NA at this time step, it is not good value. + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm } - probs_exp_cumsum <- apply(exp_probs, 2, cumsum) - probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + if (f_NAs <= sum(good_values) / length(obs_mean)) { + + exp_data <- exp_data[good_values, , drop = F] + obs_data <- obs_data[good_values, , drop = F] + + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[which(good_values) , , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights #NULL + } - # rps: [sdate, nexp, nobs] - rps[ , i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) - if (Fair) { # FairRPS - ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] - R <- dim(exp)[2] #memb - R_new <- Inf - adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) - adjustment <- colSums(adjustment) - rps[ , i, j] <- rps[ , i, j] + adjustment + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + + # rps: [sdate, nexp, nobs] + rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) + + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + R_new <- Inf + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- colSums(adjustment) + rps[ , i, j] <- rps[ , i, j] + adjustment + } + + } else { ## not enough values different from NA + + rps[ , i, j] <- as.numeric(NA) + } + } } @@ -327,3 +369,5 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL return(rps) } + + diff --git a/R/RPSS.R b/R/RPSS.R index 6299eb8c19ecfd0f6dd00de25b1da3df8c581864..7e2b3ac8b6d7753f8eab108271c41a8e15357a4f 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -5,7 +5,7 @@ #'assess whether a forecast presents an improvement or worsening with respect to #'a reference forecast. The RPSS ranges between minus infinite and 1. If the #'RPSS is positive, it indicates that the forecast has higher skill than the -#'reference forecast, while a negative value means that it has a lower skill. +#'reference forecast, while a negative value means that it has a lower skill.\cr #'Examples of reference forecasts are the climatological forecast (same #'probabilities for all categories for all time steps), persistence, a previous #'model version, and another model. It is computed as @@ -13,8 +13,12 @@ #'based on a Random Walk test at the 95% confidence level (DelSole and Tippett, #'2016).\cr #'The function accepts either the ensemble members or the probabilities of -#' each data as inputs. If there is more than one dataset, RPSS will be -#' computed for each pair of exp and obs data. +#'each data as inputs. If there is more than one dataset, RPSS will be +#'computed for each pair of exp and obs data. The NA ratio of data will be +#'examined before the calculation. If the ratio is higher than the threshold +#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are +#'counted by per-pair method, which means that only the time steps that all the +#'datasets have values count as non-NA values. #' #'@param exp A named numerical array of either the forecast with at least time #' and member dimensions, or the probabilities with at least time and category @@ -61,9 +65,14 @@ #' members or span at least 10 time steps and have more than 45 members if #' consistency between the weighted and unweighted methodologies is desired. #'@param weights_ref Same as 'weights_exp' but for the reference forecast. -#'@param cross.val A logical indicating whether to compute the thresholds between -#' probabilistics categories in cross-validation. -#' The default value is FALSE. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistics categories in cross-validation. The default value is +#' FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -110,7 +119,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, ncores = NULL) { + cross.val = FALSE, na.rm = FALSE, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -301,6 +310,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', "'weights_ref' is not used. Change 'weights_ref' to NULL.")) weights_ref <- NULL } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -355,7 +368,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', weights_exp = weights_exp, weights_ref = weights_ref, cross.val = cross.val, - ncores = ncores) + na.rm = na.rm, ncores = ncores) return(output) @@ -363,8 +376,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE) { - + Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -374,6 +387,14 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', # obs: [sdate, bin, (dat)] # ref: [sdate, bin, (dat)] or NULL + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -381,107 +402,177 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', nexp <- as.numeric(dim(exp)[dat_dim]) nobs <- as.numeric(dim(obs)[dat_dim]) } - - # RPS of the forecast - rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, - cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, - cross.val = cross.val) - # RPS of the reference forecast - if (is.null(ref)) { ## using climatology as reference forecast + # Calculate RPS + + if (!is.null(ref)) { + + # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref + ## Insert memb_dim in obs if (!is.null(memb_dim)) { if (!memb_dim %in% names(dim(obs))) { obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) } } + ## Insert dat_dim if (is.null(dat_dim)) { - dim(obs) <- c(dim(obs), nobs = nobs) + dim(obs) <- c(dim(obs), dat = nobs) + dim(exp) <- c(dim(exp), dat = nexp) + if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp) } - rps_ref <- array(dim = c(dim(obs)[time_dim], nobs = nobs)) - - for (j in 1:nobs) { - if (is.null(cat_dim)) { # calculate probs - obs_data <- obs[ , , j] - if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) - # obs_probs: [bin, sdate] - } else { - obs_probs <- t(obs[ , , j]) + if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) { + nref <- 1 + dim(ref) <- c(dim(ref), dat = nref) + if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref) + } else { + nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp + } + + # Find good values then calculate RPS + rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + for (k in 1:nref) { + if (nref != 1 & k!=i) { # if nref is 1 or equal to nexp, calculate rps + next + } + exp_data <- exp[, , i, drop = F] + obs_data <- obs[, , j, drop = F] + ref_data <- ref[, , k, drop = F] + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + ref_mean <- rowMeans(ref_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean) + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + if (f_NAs <= sum(good_values) / length(good_values)) { + rps_exp[good_values,i,j] <- .RPS(exp = exp[good_values, , i], obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, weights = weights_exp[good_values, , i], + cross.val = cross.val, na.rm = na.rm) + rps_ref[good_values,i,j] <- .RPS(exp = ref[good_values, , k], obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, weights = weights_ref[good_values, , k], + na.rm = na.rm, cross.val = cross.val) + } + } } - clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), - 1 - prob_thresholds[length(prob_thresholds)]) - clim_probs <- array(clim_probs, dim = dim(obs_probs)) - # clim_probs: [bin, sdate] - - # Calculate RPS for each time step - probs_clim_cumsum <- apply(clim_probs, 2, cumsum) - probs_obs_cumsum <- apply(obs_probs, 2, cumsum) - rps_ref[ , j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) - # if (Fair) { # FairRPS - # ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] - # R <- dim(exp)[2] #memb - # R_new <- Inf - # adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) - # adjustment <- apply(adjustment, 2, sum) - # rps_ref <- rps_ref + adjustment - # } + } + + } else { # ref is NULL + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_exp, + cross.val = cross.val, na.rm = na.rm) + # RPS of the reference forecast + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } } + + rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs)) + if (is.null(dat_dim)) { - dim(rps_ref) <- dim(exp)[time_dim] + dim(obs) <- c(dim(obs), nobs = nobs) + dim(exp) <- c(dim(exp), nexp = nexp) + dim(rps_exp) <- dim(rps_ref) } - } else { # use "ref" as reference forecast - if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) { - remove_dat_dim <- TRUE - ref <- InsertDim(ref, posdim = 3, lendim = 1, name = dat_dim) - if (!is.null(weights_ref)) { - weights_ref <- InsertDim(weights_ref, posdim = 3, lendim = 1, name = dat_dim) + for (i in 1:nexp) { + for (j in 1:nobs) { + # Use good values only + good_values <- !is.na(rps_exp[, i, j]) + if (f_NAs <= sum(good_values) / length(good_values)) { + obs_data <- obs[good_values, , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) + + if (is.null(cat_dim)) { # calculate probs + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + } else { + obs_probs <- t(obs_data) + } + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) } - } else { - remove_dat_dim <- FALSE - } + # if (Fair) { # FairRPS + # ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + # R <- dim(exp)[2] #memb + # R_new <- Inf + # adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + # adjustment <- apply(adjustment, 2, sum) + # rps_ref <- rps_ref + adjustment + # } - rps_ref <- .RPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, - cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, weights = weights_ref, - cross.val = cross.val) - if (!is.null(dat_dim)) { - if (isTRUE(remove_dat_dim)) { - dim(rps_ref) <- dim(rps_ref)[-2] } } } - - if (!is.null(dat_dim)) { - rps_exp_mean <- MeanDims(rps_exp, time_dim, na.rm = FALSE) - rps_ref_mean <- MeanDims(rps_ref, time_dim, na.rm = FALSE) + if (is.null(dat_dim)) { + dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim] + } + +#---------------------------------------------- + # Calculate RPSS + + if (!is.null(dat_dim)) { + # rps_exp and rps_ref: [sdate, nexp, nobs] + rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE) + rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE) rpss <- array(dim = c(nexp = nexp, nobs = nobs)) sign <- array(dim = c(nexp = nexp, nobs = nobs)) - if (length(dim(rps_ref_mean)) == 1) { - for (i in 1:nexp) { - for (j in 1:nobs) { - rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[j] - sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, j])$sign - } - } - } else { + if (any(!is.na(rps_exp_mean))) { for (i in 1:nexp) { for (j in 1:nobs) { rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] - sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[, i, j], skill_B = rps_ref[, i, j])$sign + ind_nonNA <- !is.na(rps_exp[, i, j]) + if (!any(ind_nonNA)) { + sign[i, j] <- NA + } else { + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, i, j])$sign + } } } } - } else { - rpss <- 1 - mean(rps_exp) / mean(rps_ref) - # Significance - sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref, sign = T, pval = F)$sign + + # Turn NaN into NA + if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA + + } else { # dat_dim is NULL + + ind_nonNA <- !is.na(rps_exp) + if (!any(ind_nonNA)) { + rpss <- NA + sign <- NA + } else { + # rps_exp and rps_ref: [sdate] + rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], sign = T, pval = F)$sign + } } - + return(list(rpss = rpss, sign = sign)) } diff --git a/man/RPS.Rd b/man/RPS.Rd index 2e21227f899b3b42f10ea35e32dff76eef24c715..041ca0779961570b72cb3349dc8669f3aff0b525 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -16,6 +16,7 @@ RPS( Fair = FALSE, weights = NULL, cross.val = FALSE, + na.rm = FALSE, ncores = NULL ) } @@ -63,9 +64,14 @@ default value is NULL. The ensemble should have at least 70 members or span at least 10 time steps and have more than 45 members if consistency between the weighted and unweighted methodologies is desired.} -\item{cross.val}{A logical indicating whether to compute the thresholds between -probabilistic categories in cross-validation. -The default value is FALSE.} +\item{cross.val}{A logical indicating whether to compute the thresholds +between probabilistic categories in cross-validation. The default value is +FALSE.} + +\item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric, it +means the lower limit for the fraction of the non-NA values. 1 is equal to +FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +than na.rm. Otherwise, RPS will be calculated. The default value is FALSE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} @@ -90,7 +96,8 @@ and 1.\cr The function first calculates the probabilities for forecasts and observations, then use them to calculate RPS. Or, the probabilities of exp and obs can be provided directly to compute the score. If there is more than one dataset, RPS -will be computed for each pair of exp and obs data. +will be computed for each pair of exp and obs data. The fraction of acceptable +NAs can be adjusted. } \examples{ # Use synthetic data diff --git a/man/RPSS.Rd b/man/RPSS.Rd index a6abe342d9f408330a97302c1ca3918e2476ad3f..0cf7ba56d30901ffed25af2a49d2fa703043b20c 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -18,6 +18,7 @@ RPSS( weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE, ncores = NULL ) } @@ -79,9 +80,14 @@ consistency between the weighted and unweighted methodologies is desired.} \item{weights_ref}{Same as 'weights_exp' but for the reference forecast.} -\item{cross.val}{A logical indicating whether to compute the thresholds between -probabilistics categories in cross-validation. -The default value is FALSE.} +\item{cross.val}{A logical indicating whether to compute the thresholds +between probabilistics categories in cross-validation. The default value is +FALSE.} + +\item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric, it +means the lower limit for the fraction of the non-NA values. 1 is equal to +FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +than na.rm. Otherwise, RPS will be calculated. The default value is FALSE.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} @@ -104,7 +110,7 @@ based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to assess whether a forecast presents an improvement or worsening with respect to a reference forecast. The RPSS ranges between minus infinite and 1. If the RPSS is positive, it indicates that the forecast has higher skill than the -reference forecast, while a negative value means that it has a lower skill. +reference forecast, while a negative value means that it has a lower skill.\cr Examples of reference forecasts are the climatological forecast (same probabilities for all categories for all time steps), persistence, a previous model version, and another model. It is computed as @@ -113,7 +119,11 @@ based on a Random Walk test at the 95% confidence level (DelSole and Tippett, 2016).\cr The function accepts either the ensemble members or the probabilities of each data as inputs. If there is more than one dataset, RPSS will be -computed for each pair of exp and obs data. +computed for each pair of exp and obs data. The NA ratio of data will be +examined before the calculation. If the ratio is higher than the threshold +(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are +counted by per-pair method, which means that only the time steps that all the +datasets have values count as non-NA values. } \examples{ set.seed(1) diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R index 624eb378060ebb7e415b865795ead5910b2abdd3..d87da87eb4785fa64604671f36d129f676e82823 100644 --- a/tests/testthat/test-RPS.R +++ b/tests/testthat/test-RPS.R @@ -28,10 +28,18 @@ obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) set.seed(1) exp3 <- array(rnorm(40), dim = c(member = 2, sdate = 10, dataset = 2)) set.seed(2) -obs3 <- array(rnorm(30), dim = c(member = 1, sdate = 10, dataset = 3)) +obs3 <- array(rnorm(60), dim = c(member = 3, sdate = 10, dataset = 3)) set.seed(3) weights3 <- array(abs(rnorm(30)), dim = c(member = 2, sdate = 10, dataset = 2)) +# dat4 +exp4 <- exp3 +obs4 <- obs3 +obs4[2, 1, 1] <- NA +obs4[3, 2, 1] <- NA +exp4[1, 1, 2] <- NA +weights4 <- weights3 + ############################################## test_that("1. Input checks", { # exp and obs (1) @@ -199,7 +207,7 @@ test_that("3. Output checks: dat2", { }) ############################################## -test_that("3. Output checks: dat3", { +test_that("4. Output checks: dat3", { expect_equal( dim(RPS(exp3, obs3, dat_dim = 'dataset')), @@ -207,12 +215,12 @@ test_that("3. Output checks: dat3", { ) expect_equal( as.vector(RPS(exp3, obs3, dat_dim = 'dataset')), - c(0.75, 0.65, 0.75, 0.85, 0.75, 0.55), + c(0.3388889, 0.3388889, 0.2944444, 0.3277778, 0.3388889, 0.3388889), tolerance = 0.0001 ) expect_equal( as.vector(RPS(exp3, obs3, dat_dim = 'dataset', indices_for_clim = 2:5, prob_thresholds = seq(0.1, 0.9, 0.1))), - c(2.75, 2.45, 2.55, 2.55, 2.65, 2.15), + c(1.394444, 1.394444, 1.250000, 1.316667, 1.394444, 1.394444), tolerance = 0.0001 ) # weights @@ -222,8 +230,77 @@ test_that("3. Output checks: dat3", { ) expect_equal( as.vector(RPS(exp3, obs3, weights = weights3, dat_dim = 'dataset')), - c(0.7365024, 0.8316852, 0.6778686, 1.0256509, 0.8406320, 0.6385640), + c(0.3255765, 0.4290578, 0.2917297, 0.3554689, 0.3255765, 0.4290578), tolerance = 0.0001 ) }) + +############################################## +test_that("5. Output checks: dat4", { + + res1 <- RPS(exp4, obs4, dat_dim = 'dataset') + + expect_equal( + which(is.na(res1)), + c(1, 2, 4, 6) + ) + expect_equal( + res1[1, 2:3], + RPS(exp3, obs3, dat_dim = 'dataset')[1, 2:3] + ) + + res2 <- RPS(exp4, obs4, dat_dim = 'dataset', na.rm = T) + + expect_equal( + res2, + RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0) + ) + expect_equal( + which(is.na(res2)), + integer(0) + ) + expect_equal( + c(res2), + c(0.3472222, 0.3680556, 0.2944444, 0.3333333, 0.3388889, 0.2222222), + tolerance = 0.0001 + ) + + expect_equal( + RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0.8), + res2 + ) + expect_equal( + RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0.95), + res1 + ) + expect_equal( + which(is.na(RPS(exp4, obs4, dat_dim = 'dataset', na.rm = 0.9))), + c(1, 2) + ) + + # weights + res3 <- RPS(exp4, obs4, weights = weights4, dat_dim = 'dataset') + expect_equal( + which(is.na(res3)), + c(1, 2, 4, 6) + ) + expect_equal( + res3[1, 2:3], + RPS(exp3, obs3, dat_dim = 'dataset',weights = weights3)[1, 2:3] + ) + + res4 <- RPS(exp4, obs4, weights = weights4, dat_dim = 'dataset', na.rm = 0) + expect_equal( + c(res4), + c(0.3865228, 0.4885273, 0.2917297, 0.4143631, 0.3255765, 0.4028817), + tolerance = 0.0001 + ) + + expect_equal( + which(is.na(RPS(exp4, obs4, weights = weights4, dat_dim = 'dataset', na.rm = 0.9))), + c(1, 2) + ) + +}) + diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index f054325f729ff120a5c1570d1b2c12a164d3ea14..81c5a84949cdcef2c219ce4b16f15e8e18b740cd 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -15,6 +15,11 @@ exp1_2 <- GetProbs(exp1, memb_dim = 'member') obs1_2 <- GetProbs(obs1, memb_dim = NULL) ref1_2 <- GetProbs(ref1, memb_dim = 'member') +# dat1_3: NAs +exp1_3 <- exp1; exp1_3[1, 2, 1] <- NA +obs1_3 <- obs1; obs1_3[2, 1] <- NA +ref1_3 <- ref1; ref1_3[1, 3, 1] <- NA + # dat2 set.seed(1) exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) @@ -49,6 +54,12 @@ weights_exp4 <- array(abs(rnorm(60)), dim = c(member = 2, sdate = 10, dataset = set.seed(5) weights_ref4 <- array(abs(rnorm(20)), dim = c(member = 2, sdate = 10)) +# dat4_2: NAs +exp4_2 <- exp4; exp4_2[1, 2, 1, 1] <- NA +obs4_2 <- obs4; obs4_2[1, 1:4, 1, 1] <- NA +ref4_2 <- ref4 + + ############################################## test_that("1. Input checks", { # exp and obs (1) @@ -268,6 +279,29 @@ test_that("2. Output checks: dat1", { RPSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, cat_dim = 'bin') ) + # dat1_3 + expect_equal( + as.vector(RPSS(exp1_3, obs1_3)$rpss), + c(NA, -0.05263158), + tolerance = 0.0001 + ) + expect_equal( + as.vector(RPSS(exp1_3, obs1_3)$sign), + c(NA, FALSE) + ) + expect_equal( + as.vector(RPSS(exp1_3, obs1_3, na.rm = T)$rpss), + c(0.16666667, -0.05263158), + tolerance = 0.0001 + ) + expect_equal( + as.vector(RPSS(exp1_3, obs1_3, na.rm = T)$sign), + c(F, F) + ) + expect_equal( + as.vector(RPSS(exp1_3, obs1_3, na.rm = 0.9)$sign), + c(F, F) + ) }) @@ -434,4 +468,38 @@ test_that("5. Output checks: dat4", { RPSS(exp3, obs3, weights_exp = weights3, dat_dim = 'dataset')$rpss ) + # dat4_2: NAs + expect_equal( + as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$rpss[, , 1]), + c(NA, NA, NA, NA, c(-0.3076923, 0.1538462)), + tolerance = 0.0001 + ) + expect_equal( + as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$rpss[, , 2]), + c(0, 0.1333333, -0.4, 0.1176471, 0, 0.3529412), + tolerance = 0.0001 + ) + expect_equal( + which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$rpss)), + which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset')$sign)) + ) + expect_equal( + as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.9)$rpss[, , 1]), + c(NA, NA, NA, -0.3333333, -0.3076923, 0.1538462), + tolerance = 0.0001 + ) + expect_equal( + which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.9)$rpss)), + which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.9)$sign)) + ) + expect_equal( + as.vector(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.6)$rpss[, , 1]), + c(0.25, 0.1666667, -0.1666667, -0.3333333, -0.3076923, 0.1538462), + tolerance = 0.0001 + ) + expect_equal( + which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.6)$rpss)), + which(is.na(RPSS(exp4_2, obs4_2, ref4_2, dat_dim = 'dataset', na.rm = 0.6)$sign)) + ) + })