diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 5127ae14e566b4eef51ca5a4c61df06750e53f09..dc491efbd11fa5a6efd38b460e383e30065c5ac0 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -101,3 +101,11 @@ metrics: long_name: "Continuous Ranked Probability Score" crpss: long_name: "Continuous Ranked Probability Skill Score" + mean_bias: + long_name: "Mean Bias" + mean_bias_ss: + long_name: "Mean Bias Skill Score" + mean_bias_ss_significance: + long_name: "Mean Bias Skill Score Statistical Significance" + enssprerr: + long_name: "Ensemble Spread-To-Error Ratio" diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 717ef6922f3015943e871790a37ad26ad2d85696..4e7896f526108a94abdda8672f000155d7522ccb 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -31,7 +31,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] Indicators: diff --git a/modules/Loading/testing_recipes/recipe_era5land.yml b/modules/Loading/testing_recipes/recipe_era5land.yml index 55da61e5f970039391319604b3026bb075c7b037..c99906b68c37e65c19c98ab062947d6f2d078e37 100644 --- a/modules/Loading/testing_recipes/recipe_era5land.yml +++ b/modules/Loading/testing_recipes/recipe_era5land.yml @@ -33,7 +33,7 @@ Analysis: Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] #, [1/4, 2/4, 3/4]] + percentiles: [[1/4, 2/4, 3/4]] Indicators: index: no ncores: 1 diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index a301a1e6eb8b7c1d96427d7a92404343efd8b562..f50a06ff760742f199ccf3582dd21f688b4081ee 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -6,6 +6,7 @@ save_data <- function(recipe, archive, data, calibrated_data = NULL, skill_metrics = NULL, probabilities = NULL) { + # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive @@ -13,6 +14,7 @@ save_data <- function(recipe, archive, data, # calibrated_data: output of calibrate_datasets() # skill_metrics: output of compute_skill_metrics() # probabilities: output of compute_probabilities() + # mean_bias: output of compute_mean_bias() if (is.null(recipe)) { stop("The 'recipe' parameter is mandatory.") @@ -33,9 +35,11 @@ save_data <- function(recipe, archive, data, # Export hindcast, forecast and observations onto outfile if (!is.null(calibrated_data)) { - save_forecast(calibrated_data$hcst, recipe, dict, outdir, archive = archive, type = 'hcst') + save_forecast(calibrated_data$hcst, recipe, dict, outdir, + archive = archive, type = 'hcst') if (!is.null(calibrated_data$fcst)) { - save_forecast(calibrated_data$fcst, recipe, dict, outdir, archive = archive, type = 'fcst') + save_forecast(calibrated_data$fcst, recipe, dict, outdir, + archive = archive, type = 'fcst') } save_observations(data$obs, recipe, dict, outdir, archive = archive) } @@ -55,16 +59,20 @@ save_data <- function(recipe, archive, data, # Export skill metrics onto outfile if (!is.null(skill_metrics)) { - save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, archive = archive) + save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, + archive = archive) } if (!is.null(corr_metrics)) { - save_corr(corr_metrics, recipe, dict, data$hcst, outdir, archive = archive) + save_corr(corr_metrics, recipe, dict, data$hcst, outdir, + archive = archive) } # Export probabilities onto outfile if (!is.null(probabilities)) { - save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, archive = archive) - save_probabilities(probabilities$probs, recipe, data$hcst, outdir, archive = archive) + save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, + archive = archive) + save_probabilities(probabilities$probs, recipe, data$hcst, outdir, + archive = archive) } } diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 0975f0870da5f6cf80c494d3f9a19b8721ed6ae6..70f6cc9243a6465d452d9d375e6f95bafccc8682 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -24,7 +24,8 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { "obs" = {file <- paste0(var, gg, "-obs_", date)}, "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, shortdate)}, - "probs" = {file <- paste0(var, gg, "-probs_", date)}) + "probs" = {file <- paste0(var, gg, "-probs_", date)}, + "bias" = {file <- paste0(var, gg, "-bias_", date)}) return(paste0(dir, file, ".nc")) diff --git a/modules/Skill/AbsBiasSS.R b/modules/Skill/AbsBiasSS.R new file mode 100644 index 0000000000000000000000000000000000000000..0838f251a97e62a4e82f65423a6ccd567b2b6b77 --- /dev/null +++ b/modules/Skill/AbsBiasSS.R @@ -0,0 +1,280 @@ +#'Compute the Absolute Mean Bias Skill Score +#' +#'The Absolute Mean Bias Skill Score is based on the Absolute Mean Error (Wilks, +#' 2011) between the ensemble mean forecast and the observations. It measures +#'the accuracy of the forecast in comparison with a reference forecast to assess +#'whether the forecast presents an improvement or a worsening with respect to +#'that reference. The Mean Bias Skill Score ranges between minus infinite and 1. +#'Positive values indicate that the forecast has higher skill than the reference +#'forecast, while negative values indicate that it has a lower skill. Examples +#'of reference forecasts are the climatological forecast (average of the +#'observations), a previous model version, or another model. It is computed as +#'\code{AbsBiasSS = 1 - AbsBias_exp / AbsBias_ref}. The statistical significance +#'is obtained based on a Random Walk test at the 95% confidence level (DelSole +#'and Tippett, 2016). If there is more than one dataset, the result will be +#'computed for each pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiement, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$biasSS}{ +#' A numerical array of BiasSS with dimensions nexp, nobs and the rest +#' dimensions of 'exp' except 'time_dim' and 'memb_dim'. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the BiasSS +#' with the same dimensions as $biasSS. nexp is the number of +#' experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#' (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'biasSS1 <- AbsBiasSS(exp = exp, obs = obs, ref = ref, memb_dim = 'member') +#'biasSS2 <- AbsBiasSS(exp = exp, obs = obs, ref = NULL, memb_dim = 'member') +#' +#'@import multiApply +#'@export +AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, + dat_dim = NULL, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################ + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = na.rm) + } + } + + ## Mean bias skill score + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { + target_dims_ref <- c(time_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim)) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .AbsBiasSS, + dat_dim = dat_dim, + na.rm = na.rm, + ncores = ncores) + + return(output) +} + +.AbsBiasSS <- function(exp, obs, ref = NULL, dat_dim = NULL, na.rm = FALSE) { + # exp and obs: [sdate, (dat_dim)] + # ref: [sdate, (dat_dim)] or NULL + + # Adjust exp, obs, ref to have dat_dim temporarily + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + exp <- InsertDim(exp, posdim = 2, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') + if (!is.null(ref)) { + ref <- InsertDim(ref, posdim = 2, lendim = 1, name = 'dataset') + } + ref_dat_dim <- FALSE + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + if (length(dim(ref)) == 1) { # ref: [sdate] + ref_dat_dim <- FALSE + } else { + ref_dat_dim <- TRUE + } + } + + biasSS <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + exp_data <- exp[, i] + if (isTRUE(ref_dat_dim)) { + ref_data <- ref[, i] + } else { + ref_data <- ref + } + for (j in 1:nobs) { + obs_data <- obs[, j] + + if (isTRUE(na.rm)) { + if (is.null(ref)) { + good_values <- !is.na(exp_data) & !is.na(obs_data) + exp_data <- exp_data[good_values] + obs_data <- obs_data[good_values] + } else { + good_values <- !is.na(exp_data) & !is.na(ref_data) & !is.na(obs_data) + exp_data <- exp_data[good_values] + ref_data <- ref_data[good_values] + obs_data <- obs_data[good_values] + } + } + ## Bias of the exp + bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = TRUE) + ## Bias of the ref + if (is.null(ref)) { ## Climatological forecast + ref_data <- mean(obs_data, na.rm = na.rm) + } + bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = TRUE) + ## Skill score and significance + biasSS[i, j] <- 1 - bias_exp / bias_ref + sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref)$signif + } + } + + if (is.null(dat_dim)) { + dim(biasSS) <- NULL + dim(sign) <- NULL + } + + + return(list(biasSS = biasSS, sign = sign)) +} diff --git a/modules/Skill/Bias.R b/modules/Skill/Bias.R new file mode 100644 index 0000000000000000000000000000000000000000..0319a0f08e23e388046ce895e7a06aa82a0d9a41 --- /dev/null +++ b/modules/Skill/Bias.R @@ -0,0 +1,189 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + ncores = ncores)$output1 + + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE) { + # exp and obs: [sdate, (dat)] + + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + + return(bias) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index d6b959c8a035ce8b7d09f8074339859f951947dc..1288a96850516d7989b0d1f128e4be1ebcd3fe28 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -8,9 +8,12 @@ # - ask Carlos which decadal metrics he is currently using source("modules/Skill/s2s.metrics.R") +## TODO: Remove once the new version of s2dv is released source("modules/Skill/CRPS.R") source("modules/Skill/CRPSS.R") source("modules/Skill/RandomWalkTest.R") +source("modules/Skill/Bias.R") +source("modules/Skill/AbsBiasSS.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -88,7 +91,6 @@ compute_skill_metrics <- function(exp, obs, recipe) { Fair = Fair, ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill - # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, @@ -97,7 +99,6 @@ compute_skill_metrics <- function(exp, obs, recipe) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign - # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, @@ -107,7 +108,6 @@ compute_skill_metrics <- function(exp, obs, recipe) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign - # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, @@ -117,13 +117,13 @@ compute_skill_metrics <- function(exp, obs, recipe) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign - # CRPS and FCRPS + # CRPS and FCRPS } else if (metric %in% c('crps', 'fcrps')) { skill <- CRPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill - # CRPSS and FCRPSS + # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) @@ -131,7 +131,20 @@ compute_skill_metrics <- function(exp, obs, recipe) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign - + # Mean bias (climatology) + } else if (metric == 'mean_bias') { + skill <- Bias(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # Mean bias skill score + } else if (metric == 'mean_bias_ss') { + skill <- AbsBiasSS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$biasSS + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation } else if (metric %in% c('enscorr', 'corr')) { ## TODO: Implement option for Kendall and Spearman methods? @@ -148,6 +161,22 @@ compute_skill_metrics <- function(exp, obs, recipe) { skill_metrics[[ paste0(metric, "_p.value") ]] <- skill$p.val skill_metrics[[ paste0(metric, "_conf.low") ]] <- skill$conf.lower skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper + } else if (metric == 'enssprerr') { + # Remove ensemble dim from obs to avoid veriApply warning + obs_noensdim <- ClimProjDiags::Subset(obs$data, "ensemble", 1, + drop = "selected") + capture.output( + skill <- easyVerification::veriApply(verifun = 'EnsSprErr', + fcst = exp$data, + obs = obs_noensdim, + tdim = which(names(dim(exp$data))==time_dim), + ensdim = which(names(dim(exp$data))==memb_dim), + na.rm = na.rm, + ncpus = ncores) + ) + remove(obs_noensdim) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics @@ -157,11 +186,13 @@ compute_skill_metrics <- function(exp, obs, recipe) { 'rpss'))) { stop("Some of the requested metrics are not available.") } + capture.output( skill <- Compute_verif_metrics(exp$data, obs$data, skill_metrics = metric_name, verif.dims=c("syear", "sday", "sweek"), na.rm = na.rm, ncores = ncores) + ) skill <- .drop_dims(skill) if (metric_name == "frps") { # Compute yearly mean for FRPS @@ -181,11 +212,14 @@ compute_probabilities <- function(data, recipe) { } else { ncores <- recipe$Analysis$ncores } - if (is.null(recipe$Analysis$remove_NAs)) { - na.rm = T - } else { - na.rm = recipe$Analysis$remove_NAs - } + + ## TODO: Remove commented lines and include warning if quantile() + ## can not accept na.rm = FALSE + # if (is.null(recipe$Analysis$remove_NAs)) { + # na.rm = T + # } else { + # na.rm = recipe$Analysis$remove_NAs + # } named_probs <- list() named_quantiles <- list() @@ -194,42 +228,39 @@ compute_probabilities <- function(data, recipe) { "thresholds are provided in the recipe.") } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { + # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) + # Compute quantiles and probability bins probs <- Compute_probs(data$data, thresholds, ncores = ncores, - na.rm = na.rm) + na.rm = TRUE) + # Separate quantiles into individual arrays and name them percentile_xx for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { - named_quantiles <- append(named_quantiles, - list(ClimProjDiags::Subset(probs$quantiles, - 'bin', i))) - names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", - as.integer(thresholds[i]*100)) + named_quantiles <- append(named_quantiles, + list(probs$quantiles[i, , , , , , ,])) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) } + # Separate probability bins into individual arrays and name them: + # prob_b = prob. below, prob_a = prob. above, prob_xx_to_yy for (i in seq(1:dim(probs$probs)['bin'][[1]])) { - if (i == 1) { - name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) - } else if (i == dim(probs$probs)['bin'][[1]]) { - name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) - } else { - name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) - } - named_probs <- append(named_probs, - list(ClimProjDiags::Subset(probs$probs, - 'bin', i))) - names(named_probs)[length(named_probs)] <- name_i + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs$probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, list(probs$probs[i, , , , , , , ,])) + names(named_probs)[length(named_probs)] <- name_i } - # remove(probs) } } - named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) - named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) - print("##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") return(list(probs=named_probs, percentiles=named_quantiles)) } - .drop_dims <- function(metric_array) { # Drop all singleton dimensions metric_array <- drop(metric_array) @@ -237,7 +268,7 @@ compute_probabilities <- function(data, recipe) { if (!("time" %in% names(dim(metric_array)))) { dim(metric_array) <- c("time" = 1, dim(metric_array)) } - # If array has memb_exp (EnsCorr case), change name to 'ensemble' + # If array has memb_exp (Corr case), change name to 'ensemble' if ("exp_memb" %in% names(dim(metric_array))) { names(dim(metric_array))[which(names(dim(metric_array)) == "exp_memb")] <- "ensemble" diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R index a8cab57c16618a163a8526331ebcabdaa78b31d4..889330453d15bb3e90e7f14de5eb5b456697643d 100644 --- a/modules/Skill/s2s.probs.R +++ b/modules/Skill/s2s.probs.R @@ -23,7 +23,7 @@ Compute_probs <- function(data, thresholds, if (any(!is.na(x))){ colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) } else { - c(NA, NA, NA) + rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as probability bins. } } } diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 766b7e04f620b3ddc5bac02e1d37538f608f11b6..0e1b6f4b023c39e01841ffa98353e16a0bca690d 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -32,11 +32,13 @@ Analysis: method: mse_min Skill: metric: RPSS CRPSS EnsCorr Corr Enscorr_specs + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: ./tests/out-logs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 05c1c4118370a29d0cec8fed282d422f2e0b4e15..0f9149b9cc1a8b54be6123d265827e0a6b199993 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -6,14 +6,14 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" +recipe <- read_yaml(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets suppressWarnings({invisible(capture.output( data <- load_datasets(recipe_file) ))}) -recipe <- read_yaml(recipe_file) - suppressWarnings({invisible(capture.output( calibrated_data <- calibrate_datasets(data, recipe) ))}) @@ -23,6 +23,20 @@ suppressWarnings({invisible(capture.output( skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) ))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(calibrated_data$hcst, recipe) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, archive = archive) +))}) + +outdir <- get_dir(recipe) + +# ------- TESTS -------- + test_that("1. Loading", { expect_equal( @@ -189,3 +203,18 @@ rep(FALSE, 3) ) }) + +test_that("4. Saving", { + +expect_equal( +list.files(outdir), +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", + "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", "tas-skill_month11.nc") +) + +}) + +# Delete files +unlink(paste0(outdir, list.files(outdir)))