From 0394dd79cf87f970020b0ae31d1eefb612806164 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 3 Nov 2023 17:45:53 +0100 Subject: [PATCH 01/43] check recipe scorecards --- tools/check_recipe.R | 30 +++++++++++++++++++++++++++--- tools/prepare_outputs.R | 2 +- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index e4c59a55..339c8111 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -442,10 +442,11 @@ check_recipe <- function(recipe) { } # Skill - AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rpss", - "frps", "frpss", "crps", "crpss", "bss10", "bss90", + AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", + "rpss", "frps", "frpss", "crps", "crps_syear", + "crpss", "bss10", "bss90", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", - "rpss_clim", "enscorr_specs", "frps_specs", "rpss_specs", + "crps_clim", "enscorr_specs", "frps_specs", "rpss_specs", "frpss_specs", "bss10_specs", "bss90_specs") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { @@ -571,6 +572,28 @@ check_recipe <- function(recipe) { } else { sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, ", | |,")[[1]] + if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { + if ('rpss' %in% tolower(sc_metrics)) { + if (!('rps_clim_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rps_clim_syear') + } + if (!('rps_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rps_syear') + } + } + if ('crpss' %in% tolower(sc_metrics)) { + if (!('crps_clim_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'crps_clim_syear') + } + if (!('crps_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'crps_syear') + } + } + if ('enscorr' %in% tolower(sc_metrics)) { + recipe$Analysis$Workflow$Statistics <- c('variance', 'covariance') + } + recipe$Analysis$Workflow$Skill$metric <- requested_metrics + } if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { error(recipe$Run$logger, paste0("All of the metrics requested under 'Scorecards' must ", @@ -718,4 +741,5 @@ check_recipe <- function(recipe) { } else { info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") } + return(recipe) } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index fa4d23ed..c7f3e3e6 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -94,7 +94,7 @@ prepare_outputs <- function(recipe_file, warn(recipe$Run$logger, "Recipe checks disabled. The recipe will not be checked for errors.") } else { - check_recipe(recipe) + recipe <- check_recipe(recipe) } # Restructure the recipe to make the atomic recipe more readable if (restructure) { -- GitLab From bee3bdbe4e5d9b0261e722ef5b287a7fa2f558ce Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 7 Nov 2023 16:25:24 +0100 Subject: [PATCH 02/43] included cross-validation in CRPS_clim and RPS_clim --- modules/Skill/R/CRPS_clim.R | 30 ++++++++++++++----- modules/Skill/R/RPS_clim.R | 12 +++++--- modules/Skill/Skill.R | 6 ++-- .../recipe_scorecards_s2s-suite.yml | 15 ++++++---- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/modules/Skill/R/CRPS_clim.R b/modules/Skill/R/CRPS_clim.R index 36db4e94..b66cab78 100644 --- a/modules/Skill/R/CRPS_clim.R +++ b/modules/Skill/R/CRPS_clim.R @@ -1,13 +1,27 @@ -CRPS_clim <- function(obs, memb_dim ='ensemble'){ +# CRPS version for climatology +CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross.val= TRUE){ time_dim <- names(dim(obs)) obs_time_len <- dim(obs)[time_dim] - + + if (isFALSE(clim.cross.val)) { ## Without cross-validation ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - names(dim(ref)) <- c(time_dim, memb_dim) - # ref: [sdate, memb] - # obs: [sdate] + } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb] + # obs: [sdate] crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, - dat_dim = NULL, Fair = FALSE) - # crps_ref should be [sdate] - return(mean(crps_ref)) + dat_dim = NULL, Fair = FALSE) + + # crps_ref should be [sdate] + if (return_mean == TRUE) { + return(mean(crps_ref)) + } else { + return(crps_ref) + } } diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R index e8b6452d..4a079cd4 100644 --- a/modules/Skill/R/RPS_clim.R +++ b/modules/Skill/R/RPS_clim.R @@ -1,12 +1,12 @@ # RPS version for climatology -RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3)) { +RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3), cross.val = TRUE, return_mean = TRUE) { if (is.null(indices_for_clim)){ indices_for_clim <- 1:length(obs) } obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: - prob_thresholds = prob_thresholds, weights = NULL, cross.val = T) ## here! + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) # clim_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) @@ -15,6 +15,10 @@ RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3) probs_clim_cumsum <- apply(clim_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) - - return(mean(rps_ref)) + + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index fd54f17f..dcd28eb7 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -89,7 +89,8 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill rps_clim <- Apply(list(data$obs$data), target_dims = c(time_dim, memb_dim), - RPS_clim)$output1 + cross.val = cross.val, + fun = RPS_clim)$output1 rps_clim <- .drop_dims(rps_clim) skill_metrics[[paste0(metric, "_clim")]] <- rps_clim # Ranked Probability Skill Score and Fair version @@ -140,7 +141,8 @@ Skill <- function(recipe, data, agg = 'global') { skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill crps_clim <- Apply(list(data$obs$data), target_dims = time_dim, - fun = CRPS_clim, memb_dim = memb_dim)$output1 + fun = CRPS_clim, memb_dim = memb_dim, + clim.cross.val = cross.val)$output1 crps_clim <- .drop_dims(crps_clim) skill_metrics[['crps_clim']] <- crps_clim # CRPSS and FCRPSS diff --git a/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml index ae17f9cd..1fd6adc3 100644 --- a/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml +++ b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml @@ -19,9 +19,9 @@ Analysis: hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: - latmin: -90 # Mandatory, int: minimum latitude + latmin: 30 # Mandatory, int: minimum latitude latmax: 90 # Mandatory, int: maximum latitude lonmin: 0 # Mandatory, int: minimum longitude lonmax: 359.9 # Mandatory, int: maximum longitude @@ -31,13 +31,18 @@ Analysis: Workflow: Calibration: method: raw # Mandatory, str: Calibration method. See docu. + save: 'none' Anomalies: compute: yes - cross_validation: no + cross_validation: no + save: 'none' Skill: - metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # str: Skill metric or list of skill metrics. See docu. + metric: rps rpss crps crpss # str: Skill metric or list of skill metrics. See docu. + cross_validation: yes + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10], [9/10]] # frac: Quantile thresholds. + save: 'none' Indicators: index: no ncores: 15 # Optional, int: number of cores, defaults to 1 @@ -46,5 +51,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/cross_validation/tercile_cross_val/ECMWF-SEAS5/prlr/ + output_dir: /esarchive/scratch/nmilders/scorecards_data/test/ code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ -- GitLab From cdb70ec6ffa65c751f2362505b02efce615ab74a Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 8 Nov 2023 14:22:10 +0100 Subject: [PATCH 03/43] Included cross-validation in RMSSS --- modules/Skill/R/tmp/RMSSS.R | 482 ++++++++++++++++++++++++++++++++++++ modules/Skill/Skill.R | 5 + 2 files changed, 487 insertions(+) create mode 100644 modules/Skill/R/tmp/RMSSS.R diff --git a/modules/Skill/R/tmp/RMSSS.R b/modules/Skill/R/tmp/RMSSS.R new file mode 100644 index 00000000..e2fc4a80 --- /dev/null +++ b/modules/Skill/R/tmp/RMSSS.R @@ -0,0 +1,482 @@ +#'Compute root mean square error skill score +#' +#'Compute the root mean square error skill score (RMSSS) between an array of +#'forecast 'exp' and an array of observation 'obs'. The two arrays should +#'have the same dimensions except along 'dat_dim' and 'memb_dim'. The RMSSSs +#'are computed along 'time_dim', the dimension which corresponds to the start +#'date dimension. +#'RMSSS computes the root mean square error skill score of each exp in 1:nexp +#'against each obs in 1:nobs which gives nexp * nobs RMSSS for each grid point +#'of the array.\cr +#'The p-value and significance test are optionally provided by an one-sided +#'Fisher test or Random Walk test.\cr +#' +#'@param exp A named numeric array of experimental data which contains at least +#' time dimension (time_dim). It can also be a vector with the same length as +#' 'obs', then the vector will automatically be 'time_dim'. +#'@param obs A named numeric array of observational data which contains at least +#' time dimension (time_dim). The dimensions should be the same as parameter +#' 'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also +#' be a vector with the same length as 'exp', then the vector will +#' automatically be 'time_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension, or 0 (typical climatological forecast) or 1 +#' (normalized climatological forecast). If it is an array, the dimensions must +#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +#' reference dataset, it should not have dataset dimension. If there is +#' corresponding reference for each experiment, the dataset dimension must +#' have the same length as in 'exp'. If 'ref' is NULL, the typical +#' climatological forecast is used as reference forecast (equivalent to 0.) +#' The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL. +#'@param time_dim A character string indicating the name of dimension along +#' which the RMSSS are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the data are +#' already the ensemble mean. The default value is NULL. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: RMSSS = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test Ho: RMSSS = 0. The default value is +#' FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. +#'@param sig_method A character string indicating the significance method. The +#' options are "one-sided Fisher" (default) and "Random Walk". +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details if parameter "sig_method" is "Random Walk". The +#' default is NULL (since "one-sided Fisher" doesn't have different test +#' types.) +#'@param clim.cross.val A logical indicating whether to build the climatological +#' forecast in cross-validation (i.e. excluding the observed value of the time +#' step when building the probabilistic distribution function for that +#' particular time step). Only used if 'ref' is NULL. The default value is +#' NULL, in which case the climatological reference is set to zero. FALSE +#' indicates calculating the ref from the obs data without cross-validation, +#' and TRUE indicates calculating the ref from the obs data applying +#' cross-validation. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr +#'\item{$rmsss}{ +#' A numerical array of the root mean square error skill score. +#'} +#'\item{$p.val}{ +#' A numerical array of the p-value with the same dimensions as $rmsss. +#' Only present if \code{pval = TRUE}. +#'} +#'\item{sign}{ +#' A logical array of the statistical significance of the RMSSS with the same +#' dimensions as $rmsss. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'rmsss <- RMSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') +#' +#' set.seed(1) +#' exp1 <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#' set.seed(2) +#' obs1 <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +#' res1 <- RMSSS(exp1, obs1, time_dim = 'time', dat_dim = 'dataset') +#' +#' exp2 <- array(rnorm(30), dim = c(lat = 2, time = 3, memb = 5)) +#' obs2 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +#' res2 <- RMSSS(exp2, obs2, time_dim = 'time', memb_dim = 'memb') +#' +#' exp3 <- array(rnorm(30), dim = c(lat = 2, time = 3)) +#' obs3 <- array(rnorm(15), dim = c(time = 3, lat = 2)) +#' res3 <- RMSSS(exp3, obs3, time_dim = 'time') +#' +#'@rdname RMSSS +#'@import multiApply +#'@importFrom stats pf +#'@export +RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, + memb_dim = NULL, pval = TRUE, clim.cross.val = NULL, sign = FALSE, alpha = 0.05, + sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp))) + names(dim(exp)) <- c(time_dim) + obs <- array(obs, dim = c(length(obs))) + names(dim(obs)) <- c(time_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.numeric(ref)) { + stop("Parameter 'ref' must be numeric.") + } + if (is.array(ref)) { + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + stop("Parameter 'ref' must be a numeric array or number 0 or 1.") + } + } else { + ref <- 0 + } + if (!is.array(ref)) { # 0 or 1 + ref <- array(data = ref, dim = dim(exp)) + } + + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be one numeric value.") + } + ## sig_method + if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { + stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") + } + ## sig_method.type + if (sig_method == 'Random Walk') { + if (is.null(sig_method.type)) { + .warning("Parameter 'sig_method.type' must be specified if 'sig_method' is ", + "Random Walk. Assign it as 'two.sided'.") + .warning("Note that in s2dv <= 1.4.1, Random Walk uses 'two.sided.approx' method.", + "If you want to retain the same functionality, please specify parameter ", + "'sig_method.type' as 'two.sided.approx'.") + sig_method.type <- "two.sided" + } + if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { + stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") + } + if (sig_method.type == 'two.sided.approx' & pval == T) { + .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") + pval <- FALSE + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + alpha <- 0.05 + } + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + if (memb_dim %in% name_exp) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!all(name_exp == name_obs)) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'dat_dim' and 'memb_dim'.")) + } + + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + + if (dim(exp)[time_dim] <= 2) { + stop("The length of time_dim must be more than 2 to compute RMSSS.") + } + + + ############################### + # # Sort dimension + # name_exp <- names(dim(exp)) + # name_obs <- names(dim(obs)) + # order_obs <- match(name_exp, name_obs) + # obs <- Reorder(obs, order_obs) + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim, na.rm = T) + } + if (memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = T) + } + } + + ############################### + # Calculate RMSSS + + data <- list(exp = exp, obs = obs, ref = ref) + if (!is.null(dat_dim)) { + if (dat_dim %in% names(dim(ref))) { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim, dat_dim)) + } else { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim)) + } + } else { + target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) + } + + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + + return(res) +} + +.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sig_method.type = NULL) { + # exp: [sdate, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, (dat)] or NULL + + ## Previous ref code + # if (is.null(ref)) { + # ref <- array(data = 0, dim = dim(obs)) + # } else if (identical(ref, 0) | identical(ref, 1)) { + # ref <- array(ref, dim = dim(exp)) + # } + + ## Include ref condition for cross-validation + obs_time_len <- dim(obs)[time_dim] + + if (is.null(ref)) { + if (is.null(clim.cross.val)) { ## If clim.cross.val is NULL reference is set to 0 + ref <- array(data = 0, dim = dim(obs)) + } else if (isFALSE(clim.cross.val)) { ## Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + } else if (identical(ref, 0) | identical(ref, 1)) { + ref <- array(ref, dim = dim(exp)) + } + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + nref <- 1 + # Add dat dim back temporarily + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + dim(ref) <- c(dim(ref), dat = 1) + + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 + } + } + + nsdate <- as.numeric(dim(exp)[1]) + + # RMS of forecast + dif1 <- array(dim = c(nsdate, nexp, nobs)) + names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + rms_exp <- colMeans(dif1^2, na.rm = TRUE)^0.5 # [nexp, nobs] + + # RMS of reference + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + rms_ref <- colMeans(dif2^2, na.rm = TRUE)^0.5 # [nref, nobs] + if (nexp != nref) { + # expand rms_ref to nexp (nref is 1) + rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) + rms_ref <- Reorder(rms_ref, c(2, 1)) + } + + rmsss <- 1 - rms_exp / rms_ref + + ################################################# + + if (sig_method == 'one-sided Fisher') { + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + if (is.null(ref)) { + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } else { + eno2 <- Eno(dif2, time_dim) + if (nref != nexp) { + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + } + + F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not enough valid data rmsss to NA + rmsss[which(!tmp)] <- NA + } + + } else if (sig_method == "Random Walk") { + + if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) + if (nref == nexp) { + error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) + } else { + # nref = 1 + error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) + } + aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, + test.type = sig_method.type, + pval = pval, sign = sign, alpha = alpha) + if (sign) signif[i, j] <- aux$sign + if (pval) p_val[i, j] <- aux$p.val + } + } + } + + ################################### + # Remove extra dimensions if dat_dim = NULL + if (is.null(dat_dim)) { + dim(rmsss) <- NULL + if (pval) dim(p_val) <- NULL + if (sign) dim(signif) <- NULL + } + ################################### + + # output + res <- list(rmsss = rmsss) + if (pval) { + p.val <- list(p.val = p_val) + res <- c(res, p.val) + } + if (sign) { + signif <- list(sign = signif) + res <- c(res, signif) + } + + return(res) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index dcd28eb7..84f9233e 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -19,6 +19,10 @@ source("modules/Skill/R/tmp/GetProbs.R") source("modules/Skill/compute_skill_metrics.R") source("modules/Skill/compute_probabilities.R") +## Temporary +source("modules/Skill/R/tmp/RMSSS.R") +.RandomWalkTest <- s2dv:::.RandomWalkTest + Skill <- function(recipe, data, agg = 'global') { # data$hcst: s2dv_cube containing the hindcast @@ -233,6 +237,7 @@ Skill <- function(recipe, data, agg = 'global') { pval = FALSE, sign = TRUE, sig_method = 'Random Walk', + clim.cross.val = cross.val, ncores = ncores) # Compute ensemble mean and modify dimensions skill <- lapply(skill, function(x) { -- GitLab From a8990e703a875859c23124577132ee6b1c66f44d Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 9 Nov 2023 16:29:14 +0100 Subject: [PATCH 04/43] changed scorecards check --- tools/check_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 339c8111..dadaa822 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -590,7 +590,7 @@ check_recipe <- function(recipe) { } } if ('enscorr' %in% tolower(sc_metrics)) { - recipe$Analysis$Workflow$Statistics <- c('variance', 'covariance') + recipe$Analysis$Workflow$Statistics <- c('standard_deviation', 'covariance') } recipe$Analysis$Workflow$Skill$metric <- requested_metrics } -- GitLab From 330deb317c4f4e1ff7d51408d0893ea3e1b401c9 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 9 Nov 2023 16:35:41 +0100 Subject: [PATCH 05/43] removed RMSSS cross-val --- modules/Skill/R/tmp/RMSSS.R | 482 ------------------------------------ modules/Skill/Skill.R | 5 - 2 files changed, 487 deletions(-) delete mode 100644 modules/Skill/R/tmp/RMSSS.R diff --git a/modules/Skill/R/tmp/RMSSS.R b/modules/Skill/R/tmp/RMSSS.R deleted file mode 100644 index e2fc4a80..00000000 --- a/modules/Skill/R/tmp/RMSSS.R +++ /dev/null @@ -1,482 +0,0 @@ -#'Compute root mean square error skill score -#' -#'Compute the root mean square error skill score (RMSSS) between an array of -#'forecast 'exp' and an array of observation 'obs'. The two arrays should -#'have the same dimensions except along 'dat_dim' and 'memb_dim'. The RMSSSs -#'are computed along 'time_dim', the dimension which corresponds to the start -#'date dimension. -#'RMSSS computes the root mean square error skill score of each exp in 1:nexp -#'against each obs in 1:nobs which gives nexp * nobs RMSSS for each grid point -#'of the array.\cr -#'The p-value and significance test are optionally provided by an one-sided -#'Fisher test or Random Walk test.\cr -#' -#'@param exp A named numeric array of experimental data which contains at least -#' time dimension (time_dim). It can also be a vector with the same length as -#' 'obs', then the vector will automatically be 'time_dim'. -#'@param obs A named numeric array of observational data which contains at least -#' time dimension (time_dim). The dimensions should be the same as parameter -#' 'exp' except the length of 'dat_dim' and 'memb_dim' dimension. It can also -#' be a vector with the same length as 'exp', then the vector will -#' automatically be 'time_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension, or 0 (typical climatological forecast) or 1 -#' (normalized climatological forecast). If it is an array, the dimensions must -#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one -#' reference dataset, it should not have dataset dimension. If there is -#' corresponding reference for each experiment, the dataset dimension must -#' have the same length as in 'exp'. If 'ref' is NULL, the typical -#' climatological forecast is used as reference forecast (equivalent to 0.) -#' The default value is NULL. -#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. The default value is NULL. -#'@param time_dim A character string indicating the name of dimension along -#' which the RMSSS are computed. The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the ensemble mean; it should be set to NULL if the data are -#' already the ensemble mean. The default value is NULL. -#'@param pval A logical value indicating whether to compute or not the p-value -#' of the test Ho: RMSSS = 0. The default value is TRUE. -#'@param sign A logical value indicating whether to compute or not the -#' statistical significance of the test Ho: RMSSS = 0. The default value is -#' FALSE. -#'@param alpha A numeric of the significance level to be used in the -#' statistical significance test. The default value is 0.05. -#'@param sig_method A character string indicating the significance method. The -#' options are "one-sided Fisher" (default) and "Random Walk". -#'@param sig_method.type A character string indicating the test type of the -#' significance method. Check \code{RandomWalkTest()} parameter -#' \code{test.type} for details if parameter "sig_method" is "Random Walk". The -#' default is NULL (since "one-sided Fisher" doesn't have different test -#' types.) -#'@param clim.cross.val A logical indicating whether to build the climatological -#' forecast in cross-validation (i.e. excluding the observed value of the time -#' step when building the probabilistic distribution function for that -#' particular time step). Only used if 'ref' is NULL. The default value is -#' NULL, in which case the climatological reference is set to zero. FALSE -#' indicates calculating the ref from the obs data without cross-validation, -#' and TRUE indicates calculating the ref from the obs data applying -#' cross-validation. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A list containing the numeric arrays with dimension:\cr -#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr -#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and -#'nobs are omitted.\cr -#'\item{$rmsss}{ -#' A numerical array of the root mean square error skill score. -#'} -#'\item{$p.val}{ -#' A numerical array of the p-value with the same dimensions as $rmsss. -#' Only present if \code{pval = TRUE}. -#'} -#'\item{sign}{ -#' A logical array of the statistical significance of the RMSSS with the same -#' dimensions as $rmsss. Only present if \code{sign = TRUE}. -#'} -#' -#'@examples -#'# Load sample data as in Load() example: -#'example(Load) -#'clim <- Clim(sampleData$mod, sampleData$obs) -#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) -#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'rmsss <- RMSSS(ano_exp, ano_obs, dat_dim = 'dataset', memb_dim = 'member') -#' -#' set.seed(1) -#' exp1 <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) -#' set.seed(2) -#' obs1 <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) -#' res1 <- RMSSS(exp1, obs1, time_dim = 'time', dat_dim = 'dataset') -#' -#' exp2 <- array(rnorm(30), dim = c(lat = 2, time = 3, memb = 5)) -#' obs2 <- array(rnorm(15), dim = c(time = 3, lat = 2)) -#' res2 <- RMSSS(exp2, obs2, time_dim = 'time', memb_dim = 'memb') -#' -#' exp3 <- array(rnorm(30), dim = c(lat = 2, time = 3)) -#' obs3 <- array(rnorm(15), dim = c(time = 3, lat = 2)) -#' res3 <- RMSSS(exp3, obs3, time_dim = 'time') -#' -#'@rdname RMSSS -#'@import multiApply -#'@importFrom stats pf -#'@export -RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, - memb_dim = NULL, pval = TRUE, clim.cross.val = NULL, sign = FALSE, alpha = 0.05, - sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (is.null(exp) | is.null(obs)) { - stop("Parameter 'exp' and 'obs' cannot be NULL.") - } - if (!is.numeric(exp) | !is.numeric(obs)) { - stop("Parameter 'exp' and 'obs' must be a numeric array.") - } - if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector - if (length(exp) == length(obs)) { - exp <- array(exp, dim = c(length(exp))) - names(dim(exp)) <- c(time_dim) - obs <- array(obs, dim = c(length(obs))) - names(dim(obs)) <- c(time_dim) - } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) - } - } else if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) - } - if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - if (!is.null(ref)) { - if (!is.numeric(ref)) { - stop("Parameter 'ref' must be numeric.") - } - if (is.array(ref)) { - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { - stop("Parameter 'ref' must have dimension names.") - } - } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { - stop("Parameter 'ref' must be a numeric array or number 0 or 1.") - } - } else { - ref <- 0 - } - if (!is.array(ref)) { # 0 or 1 - ref <- array(data = ref, dim = dim(exp)) - } - - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string or NULL.") - } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - " Set it as NULL if there is no dataset dimension.") - } - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - } - ## pval - if (!is.logical(pval) | length(pval) > 1) { - stop("Parameter 'pval' must be one logical value.") - } - ## sign - if (!is.logical(sign) | length(sign) > 1) { - stop("Parameter 'sign' must be one logical value.") - } - ## alpha - if (!is.numeric(alpha) | length(alpha) > 1) { - stop("Parameter 'alpha' must be one numeric value.") - } - ## sig_method - if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { - stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") - } - ## sig_method.type - if (sig_method == 'Random Walk') { - if (is.null(sig_method.type)) { - .warning("Parameter 'sig_method.type' must be specified if 'sig_method' is ", - "Random Walk. Assign it as 'two.sided'.") - .warning("Note that in s2dv <= 1.4.1, Random Walk uses 'two.sided.approx' method.", - "If you want to retain the same functionality, please specify parameter ", - "'sig_method.type' as 'two.sided.approx'.") - sig_method.type <- "two.sided" - } - if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { - stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") - } - if (sig_method.type == 'two.sided.approx' & pval == T) { - .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") - pval <- FALSE - if (alpha != 0.05) { - .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", - "= 0.05 only. Returning the significance at the 0.05 significance level.") - alpha <- 0.05 - } - } - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - ## exp, obs, and ref (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - if (!is.null(memb_dim)) { - if (memb_dim %in% name_exp) { - name_exp <- name_exp[-which(name_exp == memb_dim)] - } - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] - } - } - if (!is.null(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] - } - if (!all(name_exp == name_obs)) { - stop("Parameter 'exp' and 'obs' must have the same dimension names.") - } - if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'dat_dim' and 'memb_dim'.")) - } - - name_ref <- sort(names(dim(ref))) - if (!is.null(memb_dim) && memb_dim %in% name_ref) { - name_ref <- name_ref[-which(name_ref == memb_dim)] - } - if (!is.null(dat_dim)) { - if (dat_dim %in% name_ref) { - if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be ", - "equal to dataset dimension of 'exp'.")) - } - name_ref <- name_ref[-which(name_ref == dat_dim)] - } - } - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) - } - - if (dim(exp)[time_dim] <= 2) { - stop("The length of time_dim must be more than 2 to compute RMSSS.") - } - - - ############################### - # # Sort dimension - # name_exp <- names(dim(exp)) - # name_obs <- names(dim(obs)) - # order_obs <- match(name_exp, name_obs) - # obs <- Reorder(obs, order_obs) - - ############################### - ## Ensemble mean - if (!is.null(memb_dim)) { - if (memb_dim %in% names(dim(exp))) { - exp <- MeanDims(exp, memb_dim, na.rm = T) - } - if (memb_dim %in% names(dim(obs))) { - obs <- MeanDims(obs, memb_dim, na.rm = T) - } - if (memb_dim %in% names(dim(ref))) { - ref <- MeanDims(ref, memb_dim, na.rm = T) - } - } - - ############################### - # Calculate RMSSS - - data <- list(exp = exp, obs = obs, ref = ref) - if (!is.null(dat_dim)) { - if (dat_dim %in% names(dim(ref))) { - target_dims <- list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim), - ref = c(time_dim, dat_dim)) - } else { - target_dims <- list(exp = c(time_dim, dat_dim), - obs = c(time_dim, dat_dim), - ref = c(time_dim)) - } - } else { - target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) - } - - res <- Apply(data, - target_dims = target_dims, - fun = .RMSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, sig_method.type = sig_method.type, - ncores = ncores) - - return(res) -} - -.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', - sig_method.type = NULL) { - # exp: [sdate, (dat)] - # obs: [sdate, (dat)] - # ref: [sdate, (dat)] or NULL - - ## Previous ref code - # if (is.null(ref)) { - # ref <- array(data = 0, dim = dim(obs)) - # } else if (identical(ref, 0) | identical(ref, 1)) { - # ref <- array(ref, dim = dim(exp)) - # } - - ## Include ref condition for cross-validation - obs_time_len <- dim(obs)[time_dim] - - if (is.null(ref)) { - if (is.null(clim.cross.val)) { ## If clim.cross.val is NULL reference is set to 0 - ref <- array(data = 0, dim = dim(obs)) - } else if (isFALSE(clim.cross.val)) { ## Without cross-validation - ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) - ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) - for (i in 1:obs_time_len) { - ref[i, ] <- obs[-i] - } - } - } else if (identical(ref, 0) | identical(ref, 1)) { - ref <- array(ref, dim = dim(exp)) - } - - if (is.null(dat_dim)) { - # exp: [sdate] - # obs: [sdate] - nexp <- 1 - nobs <- 1 - nref <- 1 - # Add dat dim back temporarily - dim(exp) <- c(dim(exp), dat = 1) - dim(obs) <- c(dim(obs), dat = 1) - dim(ref) <- c(dim(ref), dat = 1) - - } else { - # exp: [sdate, dat_exp] - # obs: [sdate, dat_obs] - nexp <- as.numeric(dim(exp)[2]) - nobs <- as.numeric(dim(obs)[2]) - if (dat_dim %in% names(dim(ref))) { - nref <- as.numeric(dim(ref)[2]) - } else { - dim(ref) <- c(dim(ref), dat = 1) - nref <- 1 - } - } - - nsdate <- as.numeric(dim(exp)[1]) - - # RMS of forecast - dif1 <- array(dim = c(nsdate, nexp, nobs)) - names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') - - for (i in 1:nobs) { - dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) - } - - rms_exp <- colMeans(dif1^2, na.rm = TRUE)^0.5 # [nexp, nobs] - - # RMS of reference - dif2 <- array(dim = c(nsdate, nref, nobs)) - names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') - for (i in 1:nobs) { - dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) - } - rms_ref <- colMeans(dif2^2, na.rm = TRUE)^0.5 # [nref, nobs] - if (nexp != nref) { - # expand rms_ref to nexp (nref is 1) - rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) - rms_ref <- Reorder(rms_ref, c(2, 1)) - } - - rmsss <- 1 - rms_exp / rms_ref - - ################################################# - - if (sig_method == 'one-sided Fisher') { - p_val <- array(dim = c(nexp = nexp, nobs = nobs)) - ## pval and sign - if (pval || sign) { - eno1 <- Eno(dif1, time_dim) - if (is.null(ref)) { - eno2 <- Eno(obs, time_dim) - eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) - eno2 <- Reorder(eno2, c(2, 1)) - } else { - eno2 <- Eno(dif2, time_dim) - if (nref != nexp) { - eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) - eno2 <- Reorder(eno2, c(2, 1)) - } - } - - F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) - tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 - p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) - if (sign) signif <- p_val <= alpha - # If there isn't enough valid data, return NA - p_val[which(!tmp)] <- NA - if (sign) signif[which(!tmp)] <- NA - - # change not enough valid data rmsss to NA - rmsss[which(!tmp)] <- NA - } - - } else if (sig_method == "Random Walk") { - - if (sign) signif <- array(dim = c(nexp = nexp, nobs = nobs)) - if (pval) p_val <- array(dim = c(nexp = nexp, nobs = nobs)) - - for (i in 1:nexp) { - for (j in 1:nobs) { - error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) - if (nref == nexp) { - error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) - } else { - # nref = 1 - error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) - } - aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = sig_method.type, - pval = pval, sign = sign, alpha = alpha) - if (sign) signif[i, j] <- aux$sign - if (pval) p_val[i, j] <- aux$p.val - } - } - } - - ################################### - # Remove extra dimensions if dat_dim = NULL - if (is.null(dat_dim)) { - dim(rmsss) <- NULL - if (pval) dim(p_val) <- NULL - if (sign) dim(signif) <- NULL - } - ################################### - - # output - res <- list(rmsss = rmsss) - if (pval) { - p.val <- list(p.val = p_val) - res <- c(res, p.val) - } - if (sign) { - signif <- list(sign = signif) - res <- c(res, signif) - } - - return(res) -} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 84f9233e..dcd28eb7 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -19,10 +19,6 @@ source("modules/Skill/R/tmp/GetProbs.R") source("modules/Skill/compute_skill_metrics.R") source("modules/Skill/compute_probabilities.R") -## Temporary -source("modules/Skill/R/tmp/RMSSS.R") -.RandomWalkTest <- s2dv:::.RandomWalkTest - Skill <- function(recipe, data, agg = 'global') { # data$hcst: s2dv_cube containing the hindcast @@ -237,7 +233,6 @@ Skill <- function(recipe, data, agg = 'global') { pval = FALSE, sign = TRUE, sig_method = 'Random Walk', - clim.cross.val = cross.val, ncores = ncores) # Compute ensemble mean and modify dimensions skill <- lapply(skill, function(x) { -- GitLab From 83e97c77cd08bc4ccc100389c63162c6cf79648f Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 10 Nov 2023 17:47:23 +0100 Subject: [PATCH 06/43] Included _syear metrics --- modules/Skill/R/tmp/CRPS.R | 177 ++++++++ modules/Skill/R/tmp/RPS.R | 380 ++++++++++++++++++ modules/Skill/Skill.R | 70 +++- modules/Statistics/Statistics.R | 89 ++++ .../recipe_scorecards_s2s-suite.yml | 11 +- tools/check_recipe.R | 3 +- 6 files changed, 715 insertions(+), 15 deletions(-) create mode 100644 modules/Skill/R/tmp/CRPS.R create mode 100644 modules/Skill/R/tmp/RPS.R create mode 100644 modules/Statistics/Statistics.R diff --git a/modules/Skill/R/tmp/CRPS.R b/modules/Skill/R/tmp/CRPS.R new file mode 100644 index 00000000..c08375c4 --- /dev/null +++ b/modules/Skill/R/tmp/CRPS.R @@ -0,0 +1,177 @@ +#'Compute the Continuous Ranked Probability Score +#' +#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill +#'metric to evaluate the full distribution of probabilistic forecasts. It has a +#'negative orientation (i.e., the higher-quality forecast the smaller CRPS) and +#'it rewards the forecast that has probability concentration around the observed +#'value. In case of a deterministic forecast, the CRPS is reduced to the mean +#'absolute error. It has the same units as the data. The function is based on +#'enscrps_cpp from SpecsVerification. If there is more than one dataset, CRPS +#'will be computed for each pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. The +#' default value is NULL. +#'@param Fair A logical indicating whether to compute the FairCRPS (the +#' potential CRPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@return_mean A logical idicating whether to return the temporal mean of CRPS +#' or not. When TRUE the temporal mean is calculated, when FALSE the time +#' dimension is not aggregated. The default is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of CRPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- CRPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom SpecsVerification enscrps_cpp +#'@importFrom ClimProjDiags Subset +#'@export +CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE, return_mean = TRUE, ncores = NULL) { + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).") + } + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute CRPS + crps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim)), + fun = .CRPS, + time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, + Fair = Fair, + ncores = ncores)$output1 + + if (return_mean == TRUE) { + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + } else { + crps <- crps + } + + return(crps) +} + +.CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE) { + # exp: [sdate, memb, (dat_dim)] + # obs: [sdate, (dat_dim)] + + # Adjust dimensions if needed + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # for FairCRPS + R_new <- ifelse(Fair, Inf, NA) + + CRPS <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[ , , i] + obs_data <- obs[ , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) + + crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) + CRPS[ , i, j] <- crps + } + } + + if (is.null(dat_dim)) { + dim(CRPS) <- c(dim(CRPS)[time_dim]) + } + + return(CRPS) +} diff --git a/modules/Skill/R/tmp/RPS.R b/modules/Skill/R/tmp/RPS.R new file mode 100644 index 00000000..54ec8440 --- /dev/null +++ b/modules/Skill/R/tmp/RPS.R @@ -0,0 +1,380 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'and 1.\cr +#'The function first calculates the probabilities for forecasts and observations, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. The fraction of acceptable +#'NAs can be adjusted. +#' +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilities, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPS (the +#' potential RPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. +#'@return_mean A logical idicating whether to return the temporal mean of CRPS +#' or not. When TRUE the temporal mean is calculated, when FALSE the time +#' dimension is not aggregated. The default is TRUE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'# Use synthetic data +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights) & is.null(cat_dim)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + if (is.null(dat_dim)) { + if (length(dim(weights)) != 2 | any(!names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of 'memb_dim' and 'time_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop(paste0("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.")) + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim] | + dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + stop(paste0("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) + } + weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + + } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + fun = .RPS, + dat_dim = dat_dim, time_dim = time_dim, + memb_dim = memb_dim, cat_dim = cat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, cross.val = cross.val, + na.rm = na.rm, ncores = ncores)$output1 + + if (return_mean == TRUE) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + + return(rps) +} + + +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[ , , i] + obs_data <- obs[ , , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + + # Find the fraction of NAs + ## If any member/bin is NA at this time step, it is not good value. + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (f_NAs <= sum(good_values) / length(obs_mean)) { + + exp_data <- exp_data[good_values, , drop = F] + obs_data <- obs_data[good_values, , drop = F] + + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[which(good_values) , , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights #NULL + } + + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + + # rps: [sdate, nexp, nobs] + rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) + + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + R_new <- Inf + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- colSums(adjustment) + rps[ , i, j] <- rps[ , i, j] + adjustment + } + + } else { ## not enough values different from NA + + rps[ , i, j] <- as.numeric(NA) + + } + + } + } + + if (is.null(dat_dim)) { + dim(rps) <- dim(exp)[time_dim] + } + + return(rps) +} + + + diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index dcd28eb7..d3f60d4e 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -19,6 +19,10 @@ source("modules/Skill/R/tmp/GetProbs.R") source("modules/Skill/compute_skill_metrics.R") source("modules/Skill/compute_probabilities.R") +## Temporary +source("modules/Skill/R/tmp/RPS.R") +source("modules/Skill/R/tmp/CRPS.R") + Skill <- function(recipe, data, agg = 'global') { # data$hcst: s2dv_cube containing the hindcast @@ -84,15 +88,39 @@ Skill <- function(recipe, data, agg = 'global') { memb_dim = memb_dim, Fair = Fair, cross.val = cross.val, + return_mean = TRUE, ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill - rps_clim <- Apply(list(data$obs$data), - target_dims = c(time_dim, memb_dim), - cross.val = cross.val, - fun = RPS_clim)$output1 - rps_clim <- .drop_dims(rps_clim) - skill_metrics[[paste0(metric, "_clim")]] <- rps_clim + # RPS_clim + } else if (metric %in% c('rps_clim')) { + skill <- Apply(list(data$obs$data), + target_dims = c(time_dim, memb_dim), + cross.val = cross.val, + return_mean = TRUE, + fun = RPS_clim)$output1 + skill <- .drop_dims(skill) + skill_metrics[[metric]] <- skill + # RPS_syear and FRPS_syear + } else if (metric %in% c('rps_syear', 'frps_syear')) { + skill <- RPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + cross.val = cross.val, + return_mean = FALSE, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[metric]] <- skill + # RPS_clim_syear + } else if (metric %in% c('rps_clim_syear')) { ## not returning syear dimension name + skill <- Apply(list(data$obs$data), + target_dims = c(time_dim, memb_dim), + cross.val = cross.val, + fun = RPS_clim, + return_mean = FALSE, output_dims = 'syear')$output1 + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, @@ -137,14 +165,36 @@ Skill <- function(recipe, data, agg = 'global') { time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, + return_mean = TRUE, ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill - crps_clim <- Apply(list(data$obs$data), target_dims = time_dim, + # CRPS_clim + } else if (metric %in% c('crps_clim')) { + skill <- Apply(list(data$obs$data), target_dims = time_dim, + fun = CRPS_clim, memb_dim = memb_dim, + clim.cross.val = cross.val, + return_mean = TRUE)$output1 + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # CRPS_syear and FCRPS_syear + } else if (metric %in% c('crps_syear', 'fcrps_syear')) { + skill <- CRPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + return_mean = FALSE, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # CRPS_clim_syear + } else if (metric %in% c('crps_clim_syear')) { + skill <- Apply(list(data$obs$data), target_dims = time_dim, fun = CRPS_clim, memb_dim = memb_dim, - clim.cross.val = cross.val)$output1 - crps_clim <- .drop_dims(crps_clim) - skill_metrics[['crps_clim']] <- crps_clim + clim.cross.val = cross.val, + return_mean = FALSE)$output1 + skill <- .drop_dims(skill) + skill_metrics[[metric]] <- skill # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(data$hcst$data, data$obs$data, diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R new file mode 100644 index 00000000..40f1889b --- /dev/null +++ b/modules/Statistics/Statistics.R @@ -0,0 +1,89 @@ + + +compute_statistics <- function(recipe, data, agg = 'global'){ + + # data$hcst: s2dv_cube containing the hindcast + + # obs: s2dv_cube containing the observations + # recipe: auto-s2s recipe as provided by read_yaml + + time_dim <- 'syear' + memb_dim <- 'ensemble' + + + ## Duplicate obs along hcst ensemble dimension + obs_data <- adrop(data$obs$data, drop = 9) + obs_data <- InsertDim(data = obs_data, pos = 9, lendim = 25, name = 'ensemble') + + + statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) + + statistics <- list() + + for (stat in strsplit(statistics_list, ", | |,")[[1]]) { + # Whether the fair version of the metric is to be computed + if (stat %in% c('cov', 'covariance')) { + + covariance <- Apply(data = list(x= obs_data, y=data$hcst$data), + target_dims = c(time_dim, memb_dim), + fun = function(x,y){cov(as.vector(x),as.vector(y), + use = "everything", + method = "pearson")})$output1 + + statistics[[ stat ]] <- covariance + + } ## close if on cov + + + if (stat %in% c('std', 'standard_deviation')) { + + ## Calculate standard deviation + std_hcst <- Apply(data = data$hcst$data, + target_dims = c(time_dim, memb_dim), + fun = 'sd')$output1 + + std_obs <- Apply(data = data$obs$data, + target_dims = c(time_dim, memb_dim), + fun = 'sd')$output1 + + statistics[[ stat ]] <- list('std_hcst' = std_hcst, 'std_obs' = std_obs) + + } ## close if on std + + if (stat %in% c('var', 'variance')) { + + ## Calculate standard deviation + var_hcst <- (Apply(data = data$hcst$data, + target_dims = c(time_dim, memb_dim), + fun = 'sd')$output1)^2 + + var_obs <- (Apply(data = data$obs$data, + target_dims = c(time_dim, memb_dim), + fun = 'sd')$output1)^2 + + statistics[[ stat ]] <- list('var_hcst' = var_hcst, 'var_obs' = var_obs) + + } ## close if on var + + } + + info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") + .log_memory_usage(recipe$Run$logger, when = "After statistics computation") + + # Save outputs + if (recipe$Analysis$Workflow$Skill$save != 'none') { + info(recipe$Run$logger, "##### START SAVING STATISTICS #####") + } + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Statistics/") + + if (recipe$Analysis$Workflow$Statistics$save == 'all') { + # Save all statistics + save_metrics(recipe = recipe, skill = statistics, ## Not able to save data with these dimensions + data_cube = data$hcst, agg = agg) ## The length of parameter 'order' should be the same with the dimension length of parameter 'data'. + } + + # Return results + return(statistics) +} + diff --git a/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml index 1fd6adc3..17c1acae 100644 --- a/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml +++ b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml @@ -16,15 +16,15 @@ Analysis: Time: sdate: '0101' ## MMDD fcst_year: # Optional, int: Forecast year 'YYYY' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: latmin: 30 # Mandatory, int: minimum latitude - latmax: 90 # Mandatory, int: maximum latitude + latmax: 35 # Mandatory, int: maximum latitude lonmin: 0 # Mandatory, int: minimum longitude - lonmax: 359.9 # Mandatory, int: maximum longitude + lonmax: 10 # Mandatory, int: maximum longitude Regrid: method: conservative # conservative for prlr, bilinear for tas, psl, sfcWind type: to_system @@ -37,9 +37,12 @@ Analysis: cross_validation: no save: 'none' Skill: - metric: rps rpss crps crpss # str: Skill metric or list of skill metrics. See docu. + metric: rps_syear rps_clim_syear crps_syear crps_clim_syear # str: Skill metric or list of skill metrics. See docu. cross_validation: yes save: 'all' + Statistics: + metric: cov std var + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10], [9/10]] # frac: Quantile thresholds. save: 'none' diff --git a/tools/check_recipe.R b/tools/check_recipe.R index dadaa822..e68f0b90 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -446,7 +446,8 @@ check_recipe <- function(recipe) { "rpss", "frps", "frpss", "crps", "crps_syear", "crpss", "bss10", "bss90", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", - "crps_clim", "enscorr_specs", "frps_specs", "rpss_specs", + "rps_clim_syear", "crps_clim", "crps_clim_syear", + "enscorr_specs", "frps_specs", "rpss_specs", "frpss_specs", "bss10_specs", "bss90_specs") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { -- GitLab From aecc413b308022462c5de4cc7c08779229e7d3bc Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 15 Nov 2023 16:04:58 +0100 Subject: [PATCH 07/43] Add save_metrics_scorecards development --- modules/Saving/R/get_dir.R | 5 +- modules/Saving/R/get_filename.R | 10 +- modules/Saving/R/save_metrics_scorecards.R | 67 ++ modules/Saving/R/tmp/CST_SaveExp.R | 1000 ++++++++++++++++++++ modules/Saving/Saving.R | 2 + modules/Skill/Skill.R | 18 +- 6 files changed, 1095 insertions(+), 7 deletions(-) create mode 100644 modules/Saving/R/save_metrics_scorecards.R create mode 100644 modules/Saving/R/tmp/CST_SaveExp.R diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R index a2cbc79f..63ca13e6 100644 --- a/modules/Saving/R/get_dir.R +++ b/modules/Saving/R/get_dir.R @@ -10,12 +10,14 @@ get_dir <- function(recipe, variable, agg = "global") { # variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] outdir <- recipe$Run$output_dir system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (tolower(recipe$Analysis$Output_format) == 'scorecards') { # Define output dir name accordint to Scorecards format dict <- read_yaml("conf/output_dictionaries/scorecards.yml") # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - dir <- paste0(outdir, "/", system, "/", variable, "/") + dir <- paste0(outdir, system, "/", reference, "/", calib.method, "/", variable, "/") } else { # Default generic output format based on FOCUS # Get startdate or hindcast period @@ -38,7 +40,6 @@ get_dir <- function(recipe, variable, agg = "global") { } } ## TODO: Remove calibration method from output directory? - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) store.freq <- recipe$Analysis$Variables$freq ## TODO: Change "_country" if (!is.null(recipe$Analysis$Region$name)) { diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index 1c925651..ca40f1b5 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -36,7 +36,15 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "obs" = {type_info <- paste0("-obs_", date, "_")}, "percentiles" = {type_info <- "-percentiles_"}, "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}) + "bias" = {type_info <- paste0("-bias_", date, "_")}, + # new + "rps_syear" = {type_info <- paste0("_rps-syear_")}, + "rps_clim_syear" = {type_info <- paste0("_rps-clim-syear_")}, + "crps_syear" = {type_info <- paste0("_crps-syear_")}, + "crps_clim_syear" = {type_info <- paste0("_crps-clim-syear_")}, + "crps" = {type_info <- paste0("_crps_")}, + "mean_bias" = {type_info <- paste0("_mean-bias_")}, + {type_info <- paste0("_", file.type, "_")}) # Build file name file <- paste0("scorecards_", system, "_", reference, "_", diff --git a/modules/Saving/R/save_metrics_scorecards.R b/modules/Saving/R/save_metrics_scorecards.R new file mode 100644 index 00000000..020f2593 --- /dev/null +++ b/modules/Saving/R/save_metrics_scorecards.R @@ -0,0 +1,67 @@ +save_metrics_scorecards <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + # This needs to be developed: + coords <- c(data_cube$coords['longitude'], + data_cube$coords['latitude']) + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + # Subset skill arrays + subset_skill <- lapply(skill, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + + for (i in 1:length(subset_skill)) { + if (any('syear' %in% names(dim(subset_skill[[i]])))) { + sdate_dim_save = 'syear' + dates <- data_cube$attrs$Dates + } else { + sdate_dim_save = NULL + dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) + } + extra_string <- get_filename(NULL, recipe, variable, + fcst.sdate, agg, names(subset_skill)[[i]]) + SaveExp(data = subset_skill[[i]], destination = outdir, + Dates = dates, + coords = coords, + varname = names(subset_skill)[[i]], + metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, + startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, + ftime_dim = 'time', var_dim = NULL, memb_dim = NULL, + drop_dims = NULL, single_file = TRUE, + extra_string = extra_string) + } + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") +} \ No newline at end of file diff --git a/modules/Saving/R/tmp/CST_SaveExp.R b/modules/Saving/R/tmp/CST_SaveExp.R new file mode 100644 index 00000000..1154a41c --- /dev/null +++ b/modules/Saving/R/tmp/CST_SaveExp.R @@ -0,0 +1,1000 @@ +#'Save objects of class 's2dv_cube' to data in NetCDF format +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#' +#'@description This function allows to divide and save a object of class +#''s2dv_cube' into a NetCDF file, allowing to reload the saved data using +#'\code{Start} function from StartR package. If the original 's2dv_cube' object +#'has been created from \code{CST_Load()}, then it can be reloaded with +#'\code{Load()}. +#' +#'@param data An object of class \code{s2dv_cube}. +#'@param destination A character string containing the directory name in which +#' to save the data. NetCDF file for each starting date are saved into the +#' folder tree: \cr +#' destination/Dataset/variable/. By default the function +#' creates and saves the data into the working directory. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no +#' start date dimension. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. By default, it is set to 'time'. It can be NULL if there is no +#' forecast time dimension. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' By default, it is set to 'dataset'. It can be NULL if there is no dataset +#' dimension. +#'@param var_dim A character string indicating the name of variable dimension. +#' By default, it is set to 'var'. It can be NULL if there is no variable +#' dimension. +#'@param memb_dim A character string indicating the name of the member dimension. +#' By default, it is set to 'member'. It can be NULL if there is no member +#' dimension. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files. It must be a vector of the same +#' length as the start date dimension of data. It must be a vector of class +#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. +#' If it is NULL, the coordinate corresponding the the start date dimension or +#' the first Date of each time step will be used as the name of the files. +#' It is NULL by default. +#'@param drop_dims A vector of character strings indicating the dimension names +#' of length 1 that need to be dropped in order that they don't appear in the +#' netCDF file. It is NULL by default (optional). +#'@param single_file A logical value indicating if all object is saved in a +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for Datasets, variable and start date. It is FALSE +#' by default. +#'@param extra_string A character string to be include as part of the file name, +#' for instance, to identify member or realization. It would be added to the +#' file name between underscore characters. +#'@param units_time_since A logical value indicating if the time units are +#' saved as 'time unit since' (e.g. 'horurs since') (TRUE) or as time unit +#' index (e.g. days, months, or hours) (FALSE). It is set as TRUE by default. +#' +#'@return Multiple or single NetCDF files containing the data array.\cr +#'\item{\code{single_file = TRUE}}{ +#' All data is saved in a single file located in the specified destination +#' path with the following name: +#' ___.nc. Multiple +#' variables are saved separately in the same file. The forecast time units +#' is extracted from the frequency of the time steps (hours, days, months). +#' The first value of forecast time is 1. If no frequency is found, the units +#' will be 'hours since' each start date and the time steps are assumed to be +#' equally spaced. +#'} +#'\item{\code{single_file = FALSE}}{ +#' The data array is subset and stored into multiple files. Each file +#' contains the data subset for each start date, variable and dataset. Files +#' with different variables and Datasets are stored in separated directories +#' within the following directory tree: destination/Dataset/variable/. +#' The name of each file will be: +#' __.nc. +#'} +#' +#'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and +#'\code{\link{s2dv_cube}} +#' +#'@examples +#'\dontrun{ +#'data <- lonlat_temp_st$exp +#'destination <- "./" +#'CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', +#' var_dim = 'var', dat_dim = 'dataset') +#'} +#' +#'@import ncdf4 +#'@importFrom s2dv Reorder +#'@importFrom ClimProjDiags Subset +#'@import multiApply +#'@export +CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', + ftime_dim = 'time', dat_dim = 'dataset', + var_dim = 'var', memb_dim = 'member', + startdates = NULL, drop_dims = NULL, + single_file = FALSE, extra_string = NULL, + units_time_since = TRUE) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + # Check object structure + if (!all(c('data', 'attrs') %in% names(data))) { + stop("Parameter 'data' must have at least 'data' and 'attrs' elements ", + "within the 's2dv_cube' structure.") + } + if (!inherits(data$attrs, 'list')) { + stop("Level 'attrs' must be a list with at least 'Dates' element.") + } + if (!all(c('coords') %in% names(data))) { + warning("Element 'coords' not found. No coordinates will be used.") + } + # metadata + if (is.null(data$attrs$Variable$metadata)) { + warning("No metadata found in element Variable from attrs.") + } else { + if (!inherits(data$attrs$Variable$metadata, 'list')) { + stop("Element metadata from Variable element in attrs must be a list.") + } + if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) { + warning("Metadata is not found for any coordinate.") + } else if (!any(names(data$attrs$Variable$metadata) %in% + data$attrs$Variable$varName)) { + warning("Metadata is not found for any variable.") + } + } + # Dates + if (is.null(data$attrs$Dates)) { + stop("Element 'Dates' from 'attrs' level cannot be NULL.") + } + if (is.null(dim(data$attrs$Dates))) { + stop("Element 'Dates' from 'attrs' level must have time dimensions.") + } + # sdate_dim + if (!is.null(sdate_dim)) { + if (!is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (length(sdate_dim) > 1) { + warning("Parameter 'sdate_dim' has length greater than 1 and ", + "only the first element will be used.") + sdate_dim <- sdate_dim[1] + } + } else if (length(dim(data$attrs$Dates)) == 1) { + sdate_dim <- 'sdate' + dim(data$data) <- c(sdate = 1, dim(data$data)) + data$dims <- dim(data$data) + dim(data$attrs$Dates) <- c(sdate = 1, dim(data$attrs$Dates)) + data$coords[[sdate_dim]] <- data$attrs$Dates[1] + } + # startdates + if (is.null(startdates)) { + startdates <- data$coords[[sdate_dim]] + } else { + if (!is.character(startdates)) { + warning(paste0("Parameter 'startdates' is not a character string, ", + "it will not be used.")) + startdates <- data$coords[[sdate_dim]] + } + if (!is.null(sdate_dim)) { + if (dim(data$data)[sdate_dim] != length(startdates)) { + warning(paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", sdate_dim,"', it will not be used.")) + startdates <- data$coords[[sdate_dim]] + } + } + } + + SaveExp(data = data$data, + destination = destination, + Dates = data$attrs$Dates, + coords = data$coords, + varname = data$attrs$Variable$varName, + metadata = data$attrs$Variable$metadata, + Datasets = data$attrs$Datasets, + startdates = startdates, + dat_dim = dat_dim, sdate_dim = sdate_dim, + ftime_dim = ftime_dim, var_dim = var_dim, + memb_dim = memb_dim, + drop_dims = drop_dims, + extra_string = extra_string, + single_file = single_file, + units_time_since = units_time_since) +} +#'Save a multidimensional array with metadata to data in NetCDF format +#'@description This function allows to save a data array with metadata into a +#'NetCDF file, allowing to reload the saved data using \code{Start} function +#'from StartR package. If the original 's2dv_cube' object has been created from +#'\code{CST_Load()}, then it can be reloaded with \code{Load()}. +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#' +#'@param data A multi-dimensional array with named dimensions. +#'@param destination A character string indicating the path where to store the +#' NetCDF files. +#'@param Dates A named array of dates with the corresponding sdate and forecast +#' time dimension. If there is no sdate_dim, you can set it to NULL. +#' It must have ftime_dim dimension. +#'@param coords A named list with elements of the coordinates corresponding to +#' the dimensions of the data parameter. The names and length of each element +#' must correspond to the names of the dimensions. If any coordinate is not +#' provided, it is set as an index vector with the values from 1 to the length +#' of the corresponding dimension. +#'@param varname A character string indicating the name of the variable to be +#' saved. +#'@param metadata A named list where each element is a variable containing the +#' corresponding information. The information must be contained in a list of +#' lists for each variable. +#'@param Datasets A vector of character string indicating the names of the +#' datasets. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files. It must be a vector of the same +#' length as the start date dimension of data. It must be a vector of class +#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. +#' If it is NULL, the first Date of each time step will be used as the name of +#' the files. It is NULL by default. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no +#' start date dimension. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. By default, it is set to 'time'. It can be NULL if there is no +#' forecast time dimension. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' By default, it is set to 'dataset'. It can be NULL if there is no dataset +#' dimension. +#'@param var_dim A character string indicating the name of variable dimension. +#' By default, it is set to 'var'. It can be NULL if there is no variable +#' dimension. +#'@param memb_dim A character string indicating the name of the member dimension. +#' By default, it is set to 'member'. It can be NULL if there is no member +#' dimension. +#'@param drop_dims A vector of character strings indicating the dimension names +#' of length 1 that need to be dropped in order that they don't appear in the +#' netCDF file. It is NULL by default (optional). +#'@param single_file A logical value indicating if all object is saved in a +#' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, +#' the array is separated for Datasets, variable and start date. It is FALSE +#' by default (optional). +#'@param extra_string A character string to be include as part of the file name, +#' for instance, to identify member or realization. It would be added to the +#' file name between underscore characters (optional). +#'@param units_time_since A logical value indicating if the time units are +#' saved as 'time unit since' (e.g. 'horurs since') (TRUE) or as time unit +#' index (e.g. days, months, or hours) (FALSE). It is set as TRUE by default. +#' +#'@return Multiple or single NetCDF files containing the data array.\cr +#'\item{\code{single_file = TRUE}}{ +#' All data is saved in a single file located in the specified destination +#' path with the following name: +#' ___.nc. Multiple +#' variables are saved separately in the same file. The forecast time units +#' is extracted from the frequency of the time steps (hours, days, months). +#' The first value of forecast time is 1. If no frequency is found, the units +#' will be 'hours since' each start date and the time steps are assumed to be +#' equally spaced. +#'} +#'\item{\code{single_file = FALSE}}{ +#' The data array is subset and stored into multiple files. Each file +#' contains the data subset for each start date, variable and dataset. Files +#' with different variables and Datasets are stored in separated directories +#' within the following directory tree: destination/Dataset/variable/. +#' The name of each file will be: +#' __.nc. +#'} +#' +#'@examples +#'\dontrun{ +#'data <- lonlat_temp_st$exp$data +#'lon <- lonlat_temp_st$exp$coords$lon +#'lat <- lonlat_temp_st$exp$coords$lat +#'coords <- list(lon = lon, lat = lat) +#'Datasets <- lonlat_temp_st$exp$attrs$Datasets +#'varname <- 'tas' +#'Dates <- lonlat_temp_st$exp$attrs$Dates +#'destination = './' +#'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata +#'SaveExp(data = data, destination = destination, coords = coords, +#' Datasets = Datasets, varname = varname, Dates = Dates, +#' metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', +#' var_dim = 'var', dat_dim = 'dataset') +#'} +#' +#'@import ncdf4 +#'@importFrom s2dv Reorder +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, + varname = NULL, metadata = NULL, Datasets = NULL, + startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', + ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', + drop_dims = NULL, single_file = FALSE, extra_string = NULL, + units_time_since = TRUE) { + ## Initial checks + # data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + dimnames <- names(dim(data)) + if (is.null(dimnames)) { + stop("Parameter 'data' must be an array with named dimensions.") + } + # destination + if (!is.character(destination) | length(destination) > 1) { + stop("Parameter 'destination' must be a character string of one element ", + "indicating the name of the file (including the folder if needed) ", + "where the data will be saved.") + } + # Dates + if (!is.null(Dates)) { + if (!inherits(Dates, "POSIXct") & !inherits(Dates, "Date")) { + stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") + } + if (is.null(dim(Dates))) { + stop("Parameter 'Dates' must have dimension names.") + } + } + # drop_dims + if (!is.null(drop_dims)) { + if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { + warning("Parameter 'drop_dims' must be character string containing ", + "the data dimension names to be dropped. It will not be used.") + } else if (!all(dim(data)[drop_dims] %in% 1)) { + warning("Parameter 'drop_dims' can only contain dimension names ", + "that are of length 1. It will not be used.") + } else { + data <- Subset(x = data, along = drop_dims, + indices = lapply(1:length(drop_dims), function(x) 1), + drop = 'selected') + dimnames <- names(dim(data)) + } + } + # coords + if (!is.null(coords)) { + if (!all(names(coords) %in% dimnames)) { + coords <- coords[-which(!names(coords) %in% dimnames)] + } + for (i_coord in dimnames) { + if (i_coord %in% names(coords)) { + if (length(coords[[i_coord]]) != dim(data)[i_coord]) { + warning(paste0("Coordinate '", i_coord, "' has different lenght as ", + "its dimension and it will not be used.")) + coords[[i_coord]] <- 1:dim(data)[i_coord] + } + } else { + # warning(paste0("Coordinate '", i_coord, "' is not provided ", + # "and it will be set as index in element coords.")) + coords[[i_coord]] <- 1:dim(data)[i_coord] + } + } + } else { + coords <- sapply(dimnames, function(x) 1:dim(data)[x]) + } + # varname + if (is.null(varname)) { + warning("Parameter 'varname' is NULL. It will be assigned to 'X'.") + varname <- 'X' + } else if (length(varname) > 1) { + multiple_vars <- TRUE + } else { + multiple_vars <- FALSE + } + if (!all(sapply(varname, is.character))) { + stop("Parameter 'varname' must be a character string with the ", + "variable names.") + } + # metadata + if (is.null(metadata)) { + warning("Parameter 'metadata' is not provided so the metadata saved ", + "will be incomplete.") + } + # single_file + if (!inherits(single_file, 'logical')) { + warning("Parameter 'single_file' must be a logical value. It will be ", + "set as FALSE.") + single_file <- FALSE + } + # extra_string + if (!is.null(extra_string)) { + if (!is.character(extra_string)) { + stop("Parameter 'extra_string' must be a character string.") + } + } + # units_time_since + if (!is.logical(units_time_since)) { + warning("Parameter 'units_time_since' must be a logical value. It will be ", + "set as TRUE.") + units_time_since <- TRUE + } + + ## Dimensions checks + # Spatial coordinates + if (!any(dimnames %in% .KnownLonNames()) | + !any(dimnames %in% .KnownLatNames())) { + lon_dim <- NULL + lat_dim <- NULL + } else { + lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] + lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] + if (length(lon_dim) > 1) { + warning("Found more than one longitudinal dimension. Only the first one ", + "will be used.") + lon_dim <- lon_dim[1] + } + if (length(lat_dim) > 1) { + warning("Found more than one latitudinal dimension. Only the first one ", + "will be used.") + lat_dim <- lat_dim[1] + } + } + # ftime_dim + if (!is.null(ftime_dim)) { + if (!is.character(ftime_dim)) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!all(ftime_dim %in% dimnames)) { + stop("Parameter 'ftime_dim' is not found in 'data' dimension.") + } + if (length(ftime_dim) > 1) { + warning("Parameter 'ftime_dim' has length greater than 1 and ", + "only the first element will be used.") + ftime_dim <- ftime_dim[1] + } + } + # sdate_dim + if (!is.null(sdate_dim)) { + if (!is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (length(sdate_dim) > 1) { + warning("Parameter 'sdate_dim' has length greater than 1 and ", + "only the first element will be used.") + sdate_dim <- sdate_dim[1] + } + if (!all(sdate_dim %in% dimnames)) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!all(memb_dim %in% dimnames)) { + stop("Parameter 'memb_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no member dimension.") + } + } + # dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!all(dat_dim %in% dimnames)) { + stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no Datasets dimension.") + } + if (length(dat_dim) > 1) { + warning("Parameter 'dat_dim' has length greater than 1 and ", + "only the first element will be used.") + dat_dim <- dat_dim[1] + } + n_datasets <- dim(data)[dat_dim] + } else { + n_datasets <- 1 + } + # var_dim + if (!is.null(var_dim)) { + if (!is.character(var_dim)) { + stop("Parameter 'var_dim' must be a character string.") + } + if (!all(var_dim %in% dimnames)) { + stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no variable dimension.") + } + if (length(var_dim) > 1) { + warning("Parameter 'var_dim' has length greater than 1 and ", + "only the first element will be used.") + var_dim <- var_dim[1] + } + n_vars <- dim(data)[var_dim] + } else { + n_vars <- 1 + } + # minimum dimensions + if (all(dimnames %in% c(var_dim, dat_dim))) { + if (!single_file) { + warning("Parameter data has only ", + paste(c(var_dim, dat_dim), collapse = ' and '), " dimensions ", + "and it cannot be splitted in multiple files. All data will ", + "be saved in a single file.") + single_file <- TRUE + } + } + # Dates dimension check + if (!is.null(Dates)) { + if (all(c(ftime_dim, sdate_dim) %in% names(dim(Dates)))) { + if (any(!names(dim(Dates)) %in% c(ftime_dim, sdate_dim))) { + if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } else { + stop("Parameter 'Dates' must have only sdate_dim and ftime_dim dimensions.") + } + } + if (is.null(startdates)) { + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } else if ((!inherits(startdates, "POSIXct") & !inherits(startdates, "Date")) && + (!is.character(startdates) | (any(nchar(startdates) > 10) | any(nchar(startdates) < 1)))) { + warning("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } + if (!is.null(format(startdates, "%Y%m%d"))) { + startdates <- format(startdates, "%Y%m%d") + } + } else if (any(ftime_dim %in% names(dim(Dates)))) { + if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim)] == 1)) { + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } + } + } + # startdates + if (is.null(startdates)) { + if (is.null(sdate_dim)) { + startdates <- 'XXX' + } else { + startdates <- rep('XXX', dim(data)[sdate_dim]) + } + } else { + if (is.null(sdate_dim)) { + if (length(startdates) != 1) { + warning("Parameter 'startdates' has length more than 1. Only first ", + "value will be used.") + startdates <- startdates[[1]] + } + } + } + # Datasets + if (is.null(Datasets)) { + if (!single_file) { + warning("Parameter 'Datasets' is NULL. Files will be saved with a ", + "directory name of 'XXX'.") + } + Datasets <- rep('XXX', n_datasets ) + } + if (inherits(Datasets, 'list')) { + Datasets <- names(Datasets) + } + if (n_datasets > length(Datasets)) { + warning("Dimension 'Datasets' in 'data' is greater than those listed in ", + "element 'Datasets' and the first element will be reused.") + Datasets <- c(Datasets, rep(Datasets[1], n_datasets - length(Datasets))) + } else if (n_datasets < length(Datasets)) { + warning("Dimension 'Datasets' in 'data' is smaller than those listed in ", + "element 'Datasets' and only the firsts elements will be used.") + Datasets <- Datasets[1:n_datasets] + } + + ## Unknown dimensions check + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) + if (!all(dimnames %in% alldims)) { + unknown_dims <- dimnames[which(!dimnames %in% alldims)] + memb_dim <- c(memb_dim, unknown_dims) + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) + } + # Reorder + if (any(dimnames != alldims)) { + data <- Reorder(data, alldims) + dimnames <- names(dim(data)) + if (!is.null(attr(data, 'dimensions'))) { + attr(data, 'dimensions') <- dimnames + } + } + + ## NetCDF dimensions definition + defined_dims <- NULL + extra_info_dim <- NULL + if (is.null(Dates)) { + filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim))] + } else { + filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim, sdate_dim, ftime_dim))] + } + for (i_coord in filedims) { + dim_info <- list() + # vals + if (i_coord %in% names(coords)) { + if (is.numeric(coords[[i_coord]])) { + dim_info[['vals']] <- as.vector(coords[[i_coord]]) + } else { + dim_info[['vals']] <- 1:dim(data)[i_coord] + } + } else { + dim_info[['vals']] <- 1:dim(data)[i_coord] + } + # name + dim_info[['name']] <- i_coord + # len + dim_info[['len']] <- as.numeric(dim(data)[i_coord]) + # unlim + dim_info[['unlim']] <- FALSE + # create_dimvar + dim_info[['create_dimvar']] <- TRUE + ## metadata + if (i_coord %in% names(metadata)) { + if ('variables' %in% names(attributes(metadata[[i_coord]]))) { + # from Start: 'lon' or 'lat' + attrs <- attributes(metadata[[i_coord]])[['variables']][[i_coord]] + i_coord_info <- attrs[!sapply(attrs, inherits, 'list')] + } else if (inherits(metadata[[i_coord]], 'list')) { + # from Start and Load: main var + i_coord_info <- metadata[[i_coord]] + } else if (!is.null(attributes(metadata[[i_coord]]))) { + # from Load + i_coord_info <- attributes(metadata[[i_coord]]) + } else { + stop("Metadata is not correct.") + } + # len + if ('size' %in% names(i_coord_info)) { + if (i_coord_info[['size']] != dim(data)[i_coord]) { + dim_info[['original_len']] <- i_coord_info[['size']] + i_coord_info[['size']] <- NULL + } + } + # units + if (!('units' %in% names(i_coord_info))) { + dim_info[['units']] <- '' + } else { + dim_info[['units']] <- i_coord_info[['units']] + i_coord_info[['units']] <- NULL + } + # calendar + if (!('calendar' %in% names(i_coord_info))) { + dim_info[['calendar']] <- NA + } else { + dim_info[['calendar']] <- i_coord_info[['calendar']] + i_coord_info[['calendar']] <- NULL + } + # longname + if ('long_name' %in% names(i_coord_info)) { + dim_info[['longname']] <- i_coord_info[['long_name']] + i_coord_info[['long_name']] <- NULL + } else if ('longname' %in% names(i_coord_info)) { + dim_info[['longname']] <- i_coord_info[['longname']] + i_coord_info[['longname']] <- NULL + } else { + if (i_coord %in% .KnownLonNames()) { + dim_info[['longname']] <- 'longitude' + } else if (i_coord %in% .KnownLatNames()) { + dim_info[['longname']] <- 'latitude' + } + } + # extra information + if (!is.null(names(i_coord_info))) { + extra_info_dim[[i_coord]] <- i_coord_info + } + } else { + # units + dim_info[['units']] <- "adim" + # longname + dim_info[['longname']] <- i_coord + # calendar + dim_info[['calendar']] <- NA + } + new_dim <- list(ncdim_def(name = dim_info[['name']], units = dim_info[['units']], + vals = dim_info[['vals']], unlim = dim_info[['unlim']], + create_dimvar = dim_info[['create_dimvar']], + calendar = dim_info[['calendar']], + longname = dim_info[['longname']])) + names(new_dim) <- i_coord + defined_dims <- c(defined_dims, new_dim) + } + + defined_vars <- list() + if (!single_file) { + for (i in 1:n_datasets) { + path <- file.path(destination, Datasets[i], varname) + for (j in 1:n_vars) { + dir.create(path[j], recursive = TRUE) + startdates <- gsub("-", "", startdates) + dim(startdates) <- c(length(startdates)) + names(dim(startdates)) <- sdate_dim + if (is.null(dat_dim) & is.null(var_dim)) { + data_subset <- data + } else if (is.null(dat_dim)) { + data_subset <- Subset(data, c(var_dim), list(j), drop = 'selected') + } else if (is.null(var_dim)) { + data_subset <- Subset(data, along = c(dat_dim), list(i), drop = 'selected') + } else { + data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') + } + if (is.null(Dates)) { + input_data <- list(data_subset, startdates) + target_dims <- list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL) + } else { + input_data <- list(data_subset, startdates, Dates) + target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, ftime_dim) + } + Apply(data = input_data, + target_dims = target_dims, + fun = .saveExp, + destination = path[j], + defined_dims = defined_dims, + ftime_dim = ftime_dim, + varname = varname[j], + metadata_var = metadata[[varname[j]]], + extra_info_dim = extra_info_dim, + extra_string = extra_string) + } + } + } else { + # Datasets definition + # From here + if (!is.null(dat_dim)) { + new_dim <- list(ncdim_def(name = dat_dim, units = "adim", + vals = 1 : dim(data)[dat_dim], + longname = 'Datasets', create_dimvar = TRUE)) + names(new_dim) <- dat_dim + defined_dims <- c(new_dim, defined_dims) + extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) + } + first_sdate <- last_sdate <- NULL + save_hours_since <- TRUE + if (!is.null(Dates)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + # ftime definition + leadtimes <- as.numeric(Dates - sdates)/3600 + } else { + # sdate definition + sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + differ <- as.numeric((sdates - sdates[1])/3600) + new_dim <- list(ncdim_def(name = sdate_dim, units = paste('hours since', sdates[1]), + vals = differ, + longname = sdate_dim, create_dimvar = TRUE)) + names(new_dim) <- sdate_dim + defined_dims <- c(defined_dims, new_dim) + first_sdate <- sdates[1] + last_sdate <- sdates[length(sdates)] + # ftime definition + Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) + differ_ftime <- apply(Dates, 2, function(x){as.numeric((x - x[1])/3600)}) + dim(differ_ftime) <- dim(Dates) + leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') + + if (all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { + if (!units_time_since) save_hours_since <- FALSE + } else { + warning("Time steps are not equal for all start dates. Only ", + "forecast time values for the first start date will be saved ", + "correctly.") + } + } + + if (!save_hours_since) { + # NOTE: Are the units readable by Start? + if (all(diff(leadtimes/24) == 1)) { + # daily values + dim_time <- list(ncdim_def(name = ftime_dim, units = 'days', + vals = round(leadtimes/24) + 1, + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + names(dim_time) <- ftime_dim + defined_dims <- c(defined_dims, dim_time) + } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { + # monthly values + dim_time <- list(ncdim_def(name = ftime_dim, units = 'months', + vals = round(leadtimes/730) + 1, + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + names(dim_time) <- ftime_dim + defined_dims <- c(defined_dims, dim_time) + } else { + # other frequency + dim_time <- list(ncdim_def(name = ftime_dim, units = 'hours', + vals = leadtimes + 1, + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + names(dim_time) <- ftime_dim + defined_dims <- c(defined_dims, dim_time) + } + } else { + # Save in units 'hours since' + dim_time <- list(ncdim_def(name = ftime_dim, + units = paste('hours since', + paste(sdates, collapse = ', ')), + vals = leadtimes, + calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + names(dim_time) <- ftime_dim + defined_dims <- c(defined_dims, dim_time) + } + } + + # var definition + defined_vars <- list() + extra_info_var <- NULL + for (j in 1:n_vars) { + var_info <- list() + i_var_info <- metadata[[varname[j]]][!sapply(metadata[[varname[j]]], inherits, 'list')] + ## Define metadata + # name + var_info[['name']] <- varname[j] + # units + if ('units' %in% names(i_var_info)) { + var_info[['units']] <- i_var_info[['units']] + i_var_info[['units']] <- NULL + } else { + var_info[['units']] <- '' + } + # dim + var_info[['dim']] <- defined_dims + # missval + if ('missval' %in% names(i_var_info)) { + var_info[['missval']] <- i_var_info[['missval']] + i_var_info[['missval']] <- NULL + } else { + var_info[['missval']] <- NULL + } + # longname + if (any(c('longname', 'long_name') %in% names(i_var_info))) { + longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] + var_info[['longname']] <- i_var_info[[longname]] + i_var_info[[longname]] <- NULL + } else { + var_info[['longname']] <- varname[j] + } + # prec + if ('prec' %in% names(i_var_info)) { + var_info[['prec']] <- i_var_info[['prec']] + i_var_info[['prec']] <- NULL + } else { + prec <- typeof(data) + if (prec == 'character') { + var_info[['prec']] <- 'char' + } + if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { + var_info[['prec']] <- prec + } else { + var_info[['prec']] <- 'double' + } + } + # extra information + if (!is.null(names(i_var_info))) { + extra_info_var[[varname[j]]] <- i_var_info + } + new_var <- list(ncvar_def(name = var_info[['name']], + units = var_info[['units']], + dim = var_info[['dim']], + missval = var_info[['missval']], + longname = var_info[['longname']], + prec = var_info[['prec']])) + + names(new_var) <- varname[j] + defined_vars <- c(defined_vars, new_var) + } + if (is.null(extra_string)) { + gsub("-", "", first_sdate) + file_name <- paste0(paste(c(varname, + gsub("-", "", first_sdate), + gsub("-", "", last_sdate)), + collapse = '_'), ".nc") + } else { + nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) + if (nc == ".nc") { + file_name <- extra_string + } else { + file_name <- paste0(extra_string, ".nc") + } + } + full_filename <- file.path(destination, file_name) + file_nc <- nc_create(full_filename, defined_vars) + if (is.null(var_dim)) { + ncvar_put(file_nc, varname, vals = data) + } else { + for (j in 1:n_vars) { + ncvar_put(file_nc, defined_vars[[j]]$name, + vals = Subset(data, var_dim, j, drop = 'selected')) + } + } + # Additional dimension attributes + for (dim in names(defined_dims)) { + if (dim %in% names(extra_info_dim)) { + for (info_dim in names(extra_info_dim[[dim]])) { + add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') + ncatt_put(file_nc, dim, info_dim, add_info_dim) + } + } + } + # Additional dimension attributes + for (var in names(defined_vars)) { + if (var %in% names(extra_info_var)) { + for (info_var in names(extra_info_var[[var]])) { + add_info_var <- paste0(extra_info_var[[var]][[info_var]], collapse = ', ') + ncatt_put(file_nc, var, info_var, add_info_var) + } + } + } + nc_close(file_nc) + } +} + +.saveExp <- function(data, startdates = NULL, dates = NULL, destination = "./", + defined_dims, ftime_dim = 'time', varname = 'var', + metadata_var = NULL, extra_info_dim = NULL, + extra_string = NULL) { + # ftime_dim + if (!is.null(dates)) { + differ <- as.numeric((dates - dates[1])/3600) + dim_time <- list(ncdim_def(name = ftime_dim, units = paste('hours since', dates[1]), + vals = differ, calendar = 'proleptic_gregorian', + longname = ftime_dim, unlim = TRUE)) + names(dim_time) <- ftime_dim + defined_dims <- c(defined_dims, dim_time) + } + + ## Define var metadata + var_info <- NULL + extra_info_var <- NULL + i_var_info <- metadata_var[!sapply(metadata_var, inherits, 'list')] + + # name + var_info[['name']] <- varname + # units + if ('units' %in% names(i_var_info)) { + var_info[['units']] <- i_var_info[['units']] + i_var_info[['units']] <- NULL + } else { + var_info[['units']] <- '' + } + # dim + var_info[['dim']] <- defined_dims + # missval + if ('missval' %in% names(i_var_info)) { + var_info[['missval']] <- i_var_info[['missval']] + i_var_info[['missval']] <- NULL + } else { + var_info[['missval']] <- NULL + } + # longname + if (any(c('longname', 'long_name') %in% names(i_var_info))) { + longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] + var_info[['longname']] <- i_var_info[[longname]] + i_var_info[[longname]] <- NULL + } else { + var_info[['longname']] <- varname + } + # prec + if ('prec' %in% names(i_var_info)) { + var_info[['prec']] <- i_var_info[['prec']] + i_var_info[['prec']] <- NULL + } else { + prec <- typeof(data) + if (prec == 'character') { + var_info[['prec']] <- 'char' + } + if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { + var_info[['prec']] <- prec + } else { + var_info[['prec']] <- 'double' + } + } + # extra information + if (!is.null(names(i_var_info))) { + extra_info_var <- i_var_info + } + + datanc <- ncvar_def(name = var_info[['name']], + units = var_info[['units']], + dim = var_info[['dim']], + missval = var_info[['missval']], + longname = var_info[['longname']], + prec = var_info[['prec']]) + + if (is.null(extra_string)) { + file_name <- paste0(varname, "_", startdates, ".nc") + } else { + file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") + } + full_filename <- file.path(destination, file_name) + file_nc <- nc_create(full_filename, datanc) + ncvar_put(file_nc, datanc, data) + + # Additional attributes + for (dim in names(defined_dims)) { + if (dim %in% names(extra_info_dim)) { + for (info_dim in names(extra_info_dim[[dim]])) { + add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') + ncatt_put(file_nc, dim, info_dim, add_info_dim) + } + } + } + # Additional dimension attributes + if (!is.null(extra_info_var)) { + for (info_var in names(extra_info_var)) { + add_info_var <- paste0(extra_info_var[[info_var]], collapse = ', ') + ncatt_put(file_nc, varname, info_var, add_info_var) + } + } + + nc_close(file_nc) +} diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index fc9fe4ee..60e886b9 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -14,6 +14,8 @@ source("modules/Saving/R/get_time.R") source("modules/Saving/R/get_latlon.R") source("modules/Saving/R/get_global_attributes.R") source("modules/Saving/R/drop_dims.R") +source("modules/Saving/R/save_metrics_scorecards.R") +source("modules/Saving/R/tmp/CST_SaveExp.R") Saving <- function(recipe, data, skill_metrics = NULL, diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index d3f60d4e..355acab5 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -366,8 +366,13 @@ Skill <- function(recipe, data, agg = 'global') { if (recipe$Analysis$Workflow$Skill$save == 'all') { corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) if (length(corr_metric_names) == 0) { - save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, agg = agg) + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + save_metrics_scorecards(recipe = recipe, skill = skill_metrics, + data_cube = data$hcst, agg = agg) + } else { + save_metrics(recipe = recipe, skill = skill_metrics, + data_cube = data$hcst, agg = agg) + } } else { # Save corr if (length(skill_metrics[corr_metric_names]) > 0) { @@ -376,8 +381,13 @@ Skill <- function(recipe, data, agg = 'global') { } # Save other skill metrics if (length(skill_metrics[-corr_metric_names]) > 0) { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], - data_cube = data$hcst, agg = agg) + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + save_metrics_scorecards(recipe = recipe, skill = skill_metrics[-corr_metric_names], + data_cube = data$hcst, agg = agg) + } else { + save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + data_cube = data$hcst, agg = agg) + } } } } -- GitLab From 7d152eb3ec602443fd58931fd5725caacd3fbf7d Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 16 Nov 2023 12:07:16 +0100 Subject: [PATCH 08/43] changed statistics outpt structure --- modules/Skill/Skill.R | 2 +- modules/Statistics/Statistics.R | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 355acab5..6c066ff5 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -385,7 +385,7 @@ Skill <- function(recipe, data, agg = 'global') { save_metrics_scorecards(recipe = recipe, skill = skill_metrics[-corr_metric_names], data_cube = data$hcst, agg = agg) } else { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + save_metrics_scorecards(recipe = recipe, skill = skill_metrics[-corr_metric_names], data_cube = data$hcst, agg = agg) } } diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 40f1889b..fe36c4ce 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -46,7 +46,9 @@ compute_statistics <- function(recipe, data, agg = 'global'){ target_dims = c(time_dim, memb_dim), fun = 'sd')$output1 - statistics[[ stat ]] <- list('std_hcst' = std_hcst, 'std_obs' = std_obs) + statistics[[ 'std_hcst' ]] <- std_hcst + statistics[[ 'std_obs' ]] <- std_obs + } ## close if on std @@ -61,7 +63,8 @@ compute_statistics <- function(recipe, data, agg = 'global'){ target_dims = c(time_dim, memb_dim), fun = 'sd')$output1)^2 - statistics[[ stat ]] <- list('var_hcst' = var_hcst, 'var_obs' = var_obs) + statistics[[ 'var_hcst' ]] <- var_hcst + statistics[[ 'var_obs' ]] <- var_obs } ## close if on var @@ -79,7 +82,7 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics - save_metrics(recipe = recipe, skill = statistics, ## Not able to save data with these dimensions + save_metrics_scorecards(recipe = recipe, skill = statistics, ## Not able to save data with these dimensions data_cube = data$hcst, agg = agg) ## The length of parameter 'order' should be the same with the dimension length of parameter 'data'. } -- GitLab From 89c029eb0e6a9d3641ae881f9b977ab66460210d Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 23 Nov 2023 17:31:00 +0100 Subject: [PATCH 09/43] Add the new version of LoadMetrics and provide an example in Scorecards.R --- modules/Saving/R/get_filename.R | 14 +- modules/Saving/R/save_metrics_scorecards.R | 12 ++ modules/Scorecards/R/tmp/LoadMetrics.R | 219 ++++++--------------- modules/Scorecards/Scorecards.R | 50 +++-- modules/Skill/Skill.R | 2 +- 5 files changed, 116 insertions(+), 181 deletions(-) diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index ca40f1b5..9f6151e8 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -38,17 +38,17 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "probs" = {type_info <- paste0("-probs_", date, "_")}, "bias" = {type_info <- paste0("-bias_", date, "_")}, # new - "rps_syear" = {type_info <- paste0("_rps-syear_")}, - "rps_clim_syear" = {type_info <- paste0("_rps-clim-syear_")}, - "crps_syear" = {type_info <- paste0("_crps-syear_")}, - "crps_clim_syear" = {type_info <- paste0("_crps-clim-syear_")}, - "crps" = {type_info <- paste0("_crps_")}, - "mean_bias" = {type_info <- paste0("_mean-bias_")}, + "rps_syear" = {type_info <- paste0("rps_syear")}, + "rps_clim_syear" = {type_info <- paste0("rps_clim_syear")}, + "crps_syear" = {type_info <- paste0("crps_syear")}, + "crps_clim_syear" = {type_info <- paste0("crps_clim_syear")}, + "crps" = {type_info <- paste0("crps")}, + "mean_bias" = {type_info <- paste0("mean_bias")}, {type_info <- paste0("_", file.type, "_")}) # Build file name file <- paste0("scorecards_", system, "_", reference, "_", - var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) + var, "_",type_info, "_", hcst_start, "-", hcst_end, "_s", shortdate) } else { switch(file.type, "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, diff --git a/modules/Saving/R/save_metrics_scorecards.R b/modules/Saving/R/save_metrics_scorecards.R index 020f2593..9c133395 100644 --- a/modules/Saving/R/save_metrics_scorecards.R +++ b/modules/Saving/R/save_metrics_scorecards.R @@ -7,6 +7,18 @@ save_metrics_scorecards <- function(recipe, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq + # archive <- get_archive(recipe) + # global_attributes <- .get_global_attributes(recipe, archive) + # ## TODO: Sort out the logic once default behavior is decided + # if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + # (recipe$Analysis$Workflow$Anomalies$compute)) { + # global_attributes <- c(list(from_anomalies = "Yes"), + # global_attributes) + # } else { + # global_attributes <- c(list(from_anomalies = "No"), + # global_attributes) + # } + # Select start date # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (fcst.horizon == 'decadal') { diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index e5e15421..55030d95 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -1,7 +1,15 @@ -#' Scorecards load metrics from verification suite +#'Scorecards load metrics from verification suite #' -#'@description Scorecards function to load saved data files +#'Scorecards function to load saved data files. This function is created to +#'load NetCDF files that contain 1 metric for each file. It calls startR::Start +#'internally. The NetCDF file is supposed to have the following dimensions: +#''longitude', 'latitude' and 'time'. If 'syear' is also a dimension, you must +#'set the parameter 'syear' different to NULL. #' +#'@param input_path A character string indicating the path where metrics output +#' files from verification suite are saved (or any other compatible files) +#'@param metrics A vector of character strings indicating the metrics name to +#' be loaded. #'@param system A vector of character strings defining the names of the #' system names following the archive.yml format from verification suite. #' Accepted system names: 'ECMWF-SEAS5', 'DWD-GFCS2.1', 'CMCC-SPS3.5', @@ -12,107 +20,40 @@ #'@param var A character string following the format from #' variable-dictionary.yml from verification suite (TO DO: multiple variables). #' The accepted names are: 'psl', 'tas', 'sfcWind', 'prlr'. -#'@param start.year A numeric indicating the start year of the reference period -#'@param end.year A numeric indicating the end year of the reference period -#'@param start.months A vector indicating the numbers of the start months -#'@param forecast.months A vector indicating the numbers of the forecast months -#'@param input.path A character string indicating the path where metrics output -#' files from verification suite are saved (or any other compatible files) +#'@param period A character string indicating the start and end years of the +#' reference period (e.g. '1993-203') +#'@param start_months A vector indicating the numbers of the start months +#'@param forecast_months A vector indicating the numbers of the forecast months +#'@param syear Used when the NetCDF file contains a metric that has the +#' dimension 'syear'. It can be any value. If there is no 'syear' dimension, +#' you must set it to NULL. It is NULL by default. #' #'@return A is a list by system and reference containing an array of with #' the following dimensions: longitude, latitude, forecast months, metrics, #' start dates. - #'@examples #'\dontrun{ #'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), -#' reference. = 'ERA5', +#' reference = 'ERA5', #' var = 'tas', -#' start.year = 1993, -#' end.year = 2016, +#' period = '1993-2016', #' metrics = c('mean_bias', 'enscorr', 'rpss', 'crpss', 'enssprerr'), -#' start.months = sprintf("%02d", 1:12), -#' forecast.months = 1:6, -#' input.path = '/esarchive/scratch/nmilders/scorecards_data/input_data') +#' start_months = sprintf("%02d", 1:12), +#' forecast_months = 1:6, +#' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') #'} #'@import easyNCDF #'@import multiApply -#'@export -LoadMetrics <- function(system, reference, var, start.year, end.year, - metrics, start.months, forecast.months, - inf_to_na = FALSE, - input.path) { - - # Initial checks - ## system - if (!is.character(system)) { - stop("Parameter 'system' must be a character vector with the system names.") - } - ## reference - if (!is.character(reference)) { - stop("Parameter 'reference' must be a character vector with the reference ", - "names.") - } - ## var - if (!is.character(var)) { - stop("Parameter 'var' must be a character vector with the var ", - "names.") - } - if (length(var) > 1) { - warning("Parameter 'var' must be of length one. Only the first value ", - "will be used.") - var <- var[1] - } - ## start.year - if (!is.numeric(start.year)) { - stop("Parameter 'start.year' must be a numeric value.") - } - ## end.year - if (!is.numeric(end.year)) { - stop("Parameter 'end.year' must be a numeric value.") - } - ## metrics - if (!is.character(metrics)) { - stop("Parameter 'metrics' cannot be NULL.") - } - ## start.months - if (is.character(start.months)) { - warning("Parameter 'start.months' must be a numeric vector indicating ", - "the starting months.") - start.months <- as.numeric(start.months) - } - if (!is.numeric(start.months)) { - stop("Parameter 'start.months' must be a numeric vector indicating ", - "the starting months.") - } - start.months <- sprintf("%02d", start.months) - ## Check if sdates are continuous or discrete - if (all(diff(as.numeric(start.months)) == 1)) { - consecutive_start.months <- TRUE - } else { - consecutive_start.months <- FALSE - } - ## forecast.months - if (!is.numeric(forecast.months)) { - stop("Parameter 'forecast.months' must be a numeric vector indicating ", - "the starting months.") - } - ## input.path - if (!is.character(input.path)) { - stop("Parameter 'input.path must be a character string.") - } - if (length(input.path) > 1) { - input.path <- input.path[1] - warning("Parameter 'input.path' has length greater than 1 and only the ", - "first element will be used.") - } +LoadMetrics <- function(input_path, metrics, var, system, reference, + period = NULL, start_months = NULL, + forecast_months = NULL, inf_to_na = FALSE, + calib_method = NULL, syear = NULL) { + ## Remove . from names system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - period <- paste0(start.year, "-", end.year) - ## Define empty list to saved data all_metrics <- sapply(system, function(x) NULL) ## Load data for each system @@ -122,14 +63,15 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, ## Load data for each reference for (ref in 1:length(reference)) { ## Call function to load metrics data - met <- .Loadmetrics(input.path = input.path, # recipe$Run$output, + met <- .loadmetrics(input_path = input_path, # recipe$Run$output, system = system[sys], reference = reference[ref], var = var, period = period, - start.months = start.months, - forecast.months = forecast.months, - metrics = metrics) + start_months = start_months, + forecast_months = forecast_months, + metrics = metrics, calib_method = calib_method, + syear = syear) ## Save metric data as array in reference list by_reference[[reference[ref]]] <- met ## Remove -Inf from crpss data if variable is precipitation @@ -142,74 +84,41 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, } ## close loop on system return(all_metrics) -} ## close function +} -############################################################ +.loadmetrics <- function(input_path, metrics, var, system = NULL, + reference = NULL, period = NULL, start_months = NULL, + forecast_months = NULL, calib_method = NULL, + syear = NULL) { + # (1) Create the path to the files + directory_path <- paste0(input_path, system, "/", reference, "/", + calib_method, "/", var, "/") + file_path <- paste0(directory_path, "scorecards_", system, "_", reference, + "_", var, "_$var$_", period, "_s", "$smonths$", ".nc") -.Loadmetrics <- function(input.path, system, reference, - var, period, start.months, - forecast.months, metrics) { - - ## Load data for each start date - allfiles <- sapply(start.months, function(m) { - paste0(input.path, "/", system, "/", var, - "/scorecards_", system, "_", reference, "_", - var, "-skill_", period, "_s", m, # mod.pressure, - ".nc")}) - allfiles_exist <- sapply(allfiles, file.exists) - - # Check dims - files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] - allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) - if (length(files_exist_by_month) == 0) { - stop("No files are found.") - } + # (2) Create the startR arguments list + if (is.null(forecast_months)) forecast_months <- 'all' + if (is.null(start_months)) start_months <- 'all' - num_dims <- numeric(dim(allfiledims)[1]) - for (i in 1:dim(allfiledims)[1]) { - if (length(unique(allfiledims[i,])) > 1) { - warning(paste0("Dimensions of system ", system," with var ", var, - " don't match.")) - } - num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension + args = list(dat = file_path, + var = metrics, + longitude = 'all', + latitude = 'all', + time = forecast_months, + syear = "all", + smonths = start_months, + return_vars = list(longitude = 'dat', + latitude = 'dat', + time = NULL), + metadata_dims = c('dat', 'var'), + retrieve = TRUE) + + if (is.null(syear)) { + args$syear <- NULL } - # dims: [metric, longitude, latitude, time, smonth] - # or [metric, region, time, smonth] - - # Loop for file - dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) - - array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { - if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = metrics, unlist = T, - drop_var_dim = T) - names(dim(res)) <- NULL - } else { - res <- array(dim = c(length(metrics), allfiledims[-1,1])) - names(dim(res)) <- NULL - } - res})$output1 - - dim(array_met_by_sdate) <- c(metric = length(metrics), allfiledims[-1,1], - sdate = length(allfiles)) + + # (3): Call startR + loaded_metrics_start <- do.call(Start, args = args) - # Attributes - # Read attributes from the first existing file - if ("region" %in% rownames(allfiledims)) { - file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) - region <- ncdf4::ncatt_get(file_for_att, 'region') - ncdf4::nc_close(file_for_att) - attributes(array_met_by_sdate)$region <- region - } else { - lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', - unlist = T, drop_var_dim = T) - lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', - unlist = T, drop_var_dim = T) - attributes(array_met_by_sdate)$lon <- lon - attributes(array_met_by_sdate)$lat <- lat - } - attributes(array_met_by_sdate)$metrics <- metrics - attributes(array_met_by_sdate)$start.months <- start.months - attributes(array_met_by_sdate)$forecast.months <- forecast.months - return(array_met_by_sdate) + return(loaded_metrics_start) } diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 0dbcd921..8312735a 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -28,13 +28,18 @@ Scorecards <- function(recipe) { start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { - start.months <- 1:12 - } else { - start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, - split = ", | |,")[[1]]) - } + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + + # NOTE (Eva): This condition needs to be checked, in my case + # (recipe$Analysis$Workflow$Scorecards$start_months = NULL) + start.months <- 1:12 # I added this line + # Needs to be corrected: + # if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { + # start.months <- 1:12 + # } else { + # start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + # split = ", | |,")[[1]]) + # } regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} @@ -76,17 +81,26 @@ Scorecards <- function(recipe) { ncores <- 1 # recipe$Analysis$ncores ## Load data files - loaded_metrics <- LoadMetrics(system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - metrics = metrics.load, - start.months = start.months, - forecast.months = forecast.months, - inf_to_na = inf.to.na, - input.path = input.path) - + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + # NOTE (Eva): This is an example: + if (any(metrics.load %in% 'rps_syear')) { + metric_rps_syear <- .loadmetrics(input_path = input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_syear', + calib_method = calib.method, syear = TRUE) + } else if (any(metrics.load %in% c('crps', 'mean_bias'))) { + metrics <- LoadMetrics(input_path = input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = c('crps', 'mean_bias'), + calib_method = calib.method) + } + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 6c066ff5..9ffd6369 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -67,7 +67,7 @@ Skill <- function(recipe, data, agg = 'global') { cross.val <- recipe$Analysis$Workflow$Skill$cross_validation } skill_metrics <- list() - for (metric in strsplit(metrics, ", | |,")[[1]]) { + for (metric in strsplit(metrics, ", | |,")) { # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', 'fcrps', 'fcrpss')) { -- GitLab From d06d96d0323adb0883db227e3b61035932ead9d2 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 30 Nov 2023 15:38:25 +0100 Subject: [PATCH 10/43] commitng inital development of loading syear metrics --- modules/Scorecards/Dev_Scorecards.R | 330 ++++++++++++++++++++++++++++ modules/Skill/Skill.R | 10 +- tools/check_recipe.R | 2 + 3 files changed, 337 insertions(+), 5 deletions(-) create mode 100644 modules/Scorecards/Dev_Scorecards.R diff --git a/modules/Scorecards/Dev_Scorecards.R b/modules/Scorecards/Dev_Scorecards.R new file mode 100644 index 00000000..9829f6ed --- /dev/null +++ b/modules/Scorecards/Dev_Scorecards.R @@ -0,0 +1,330 @@ +############################################################################### +##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############################################################################### + +##### Load source functions ##### +source('modules/Scorecards/R/tmp/LoadMetrics.R') +source('modules/Scorecards/R/tmp/WeightedMetrics.R') +source('modules/Scorecards/R/tmp/Utils.R') +source('modules/Scorecards/R/tmp/SCTransform.R') +source('modules/Scorecards/R/tmp/ScorecardsSingle.R') +source('modules/Scorecards/R/tmp/ScorecardsMulti.R') +source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') +source('modules/Scorecards/R/tmp/SCPlotScorecard.R') + + +## TODO: Change function name to 'Scorecards'? +## Define function +Scorecards <- function(recipe) { + + ## Parameters for loading data + input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/output_test/" #paste0(recipe$Run$output_dir, "/outputs/Skill/") + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + + # NOTE (Eva): This condition needs to be checked, in my case + # (recipe$Analysis$Workflow$Scorecards$start_months = NULL) + start.months <- 1:12 # I added this line + # Needs to be corrected: + # if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { + # start.months <- 1:12 + # } else { + # start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + # split = ", | |,")[[1]]) + # } + + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation + metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) + metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + + + ## Parameters for scorecard table + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf.to.na + table.label <- recipe$Analysis$Workflow$Scorecards$table_label + fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width + col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + ncores <- 1 # recipe$Analysis$ncores + + ## Load data files + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + ####### SKILL AGGREGATION ####### + if(metric.aggregation == 'skill'){ + + ## Load data files + loaded_metrics <- LoadMetrics(system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + metrics = metrics.load, + start.months = start.months, + forecast.months = forecast.months, + inf.to.na = inf.to.na, + input.path = input.path) + + ## Spatial Aggregation of metrics + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + + ### Convert loaded metrics to array for already aggregated data + metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics + forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months + start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months + regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) + + aggregated_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) + + + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } + + ## Add attributes + attributes(aggregated_metrics)$metrics <- metrics.load + attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(aggregated_metrics)$regions <- regions + attributes(aggregated_metrics)$system.name <- names(loaded_metrics) + attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + + + } else { + ## Calculate weighted mean of spatial aggregation + aggregated_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + metric.aggregation = metric.aggregation, + ncores = ncores) + }## close if on region + } + + ###### SCORE AGGREGATION ###### + if(metric.aggregation == 'score'){ + + lon_dim_name <- 'longitude' + lat_dim_name <- 'latitude' + + if('rpss' %in% metrics.visualize){ + ## Load data from saved files + rps_syear <- .loadmetrics(input_path = input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_syear', + calib_method = calib.method, syear = TRUE) + + rps_clim_syear <- .loadmetrics(input_path = input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate skill score value to show in scorecard + + ## Spatially aggregate rps data + rps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' + + ## Spatially aggregate rps_clim data + rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, + target_dims = 'syear', + fun = 'mean', ncores = ncores)$output1 + + rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, + target_dims = 'syear', + fun = 'mean', ncores = ncores)$output1 + + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, + time_dim = 'syear', test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + } ## close if on rpss + + if('crpss' %in% metrics.visualize){ + ## Load data from saved files + crps_syear <- .loadmetrics(input_path = input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_syear', + calib_method = calib.method, syear = TRUE) + + crps_clim_syear <- .loadmetrics(input_path = input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate skill score value to show in scorecard + + ## Spatially aggregate crps data + crps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' + + ## Spatially aggregate crps_clim data + crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, + target_dims = 'syear', + fun = 'mean', ncores = ncores)$output1 + + crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, + target_dims = 'syear', + fun = 'mean', ncores = ncores)$output1 + + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, + time_dim = 'syear', test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + } ## close if on crpss + } ## close if on score + + ## Create simple scorecard tables + ## (one system only) + ## Metrics input must be in the same order as function SC_spatial_aggregation + scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics.visualize, + table.label = table.label, + fileout.label = fileout.label, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + output.path = output.path) + + ## Create multi system/reference scorecard tables + ## (multiple systems with one reference or one system with multiple references) + ## Metrics input must be in the same order as function SC_spatial_aggregation + if(length(system) > 1 || length(reference) > 1){ + scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = attributes(regions)$names, + metrics = metrics.visualize, + table.label = table.label, + fileout.label = fileout.label, + output.path = output.path) + } ## close if + + + if(calculate.diff == TRUE){ + if(length(system) == 2 || length(reference) == 2){ + scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = attributes(regions)$names, + metrics = metrics.visualize, + table.label = table.label, + fileout.label = fileout.label, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + output.path = output.path) + } else {stop ("Difference scorecard can only be computed with two systems or two references.")} + } ## close if on calculate.diff + +} + diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9ffd6369..3e69e3e1 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -67,7 +67,7 @@ Skill <- function(recipe, data, agg = 'global') { cross.val <- recipe$Analysis$Workflow$Skill$cross_validation } skill_metrics <- list() - for (metric in strsplit(metrics, ", | |,")) { + for (metric in strsplit(metrics, ", | |,")[[1]]) { ## Loop over metric not working?? # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', 'fcrps', 'fcrpss')) { @@ -93,7 +93,7 @@ Skill <- function(recipe, data, agg = 'global') { skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # RPS_clim - } else if (metric %in% c('rps_clim')) { + } else if (metric == 'rps_clim') { skill <- Apply(list(data$obs$data), target_dims = c(time_dim, memb_dim), cross.val = cross.val, @@ -111,16 +111,16 @@ Skill <- function(recipe, data, agg = 'global') { return_mean = FALSE, ncores = ncores) skill <- .drop_dims(skill) - skill_metrics[[metric]] <- skill + skill_metrics[[ metric ]] <- skill ## temp # RPS_clim_syear - } else if (metric %in% c('rps_clim_syear')) { ## not returning syear dimension name + } else if (metric == 'rps_clim_syear') { skill <- Apply(list(data$obs$data), target_dims = c(time_dim, memb_dim), cross.val = cross.val, fun = RPS_clim, return_mean = FALSE, output_dims = 'syear')$output1 skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- skill ## temp # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, diff --git a/tools/check_recipe.R b/tools/check_recipe.R index e68f0b90..f4d711f8 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -566,6 +566,7 @@ check_recipe <- function(recipe) { } # Scorecards if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { + if(recipe$Analysis$Workflow$Scorecards$execute == TRUE){ if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Scorecards'.") @@ -602,6 +603,7 @@ check_recipe <- function(recipe) { error_status <- T } } + } } # --------------------------------------------------------------------- # RUN CHECKS -- GitLab From 1dbad2177ab450690eb09d8b342f763e3beece15 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 5 Dec 2023 16:21:27 +0100 Subject: [PATCH 11/43] developing scorecard loading syear metrics --- modules/Scorecards/Dev_Scorecards.R | 414 +++++++++++++++++++--------- modules/Skill/Skill.R | 2 +- modules/Statistics/Statistics.R | 46 ++-- 3 files changed, 305 insertions(+), 157 deletions(-) diff --git a/modules/Scorecards/Dev_Scorecards.R b/modules/Scorecards/Dev_Scorecards.R index 9829f6ed..3984c3ed 100644 --- a/modules/Scorecards/Dev_Scorecards.R +++ b/modules/Scorecards/Dev_Scorecards.R @@ -18,7 +18,9 @@ source('modules/Scorecards/R/tmp/SCPlotScorecard.R') Scorecards <- function(recipe) { ## Parameters for loading data - input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/output_test/" #paste0(recipe$Run$output_dir, "/outputs/Skill/") + input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231204155452/outputs/" #temp + skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") + stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) system <- recipe$Analysis$Datasets$System$name @@ -49,7 +51,7 @@ Scorecards <- function(recipe) { ## Parameters for scorecard table - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf.to.na + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space @@ -57,6 +59,7 @@ Scorecards <- function(recipe) { col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff ncores <- 1 # recipe$Analysis$ncores + na.rm <- FALSE ## Load data files start.months <- sprintf("%02d", start.months) @@ -75,7 +78,7 @@ Scorecards <- function(recipe) { start.months = start.months, forecast.months = forecast.months, inf.to.na = inf.to.na, - input.path = input.path) + input.path = skill.input.path) ## Spatial Aggregation of metrics if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ @@ -123,147 +126,288 @@ Scorecards <- function(recipe) { lon_dim_name <- 'longitude' lat_dim_name <- 'latitude' - - if('rpss' %in% metrics.visualize){ - ## Load data from saved files - rps_syear <- .loadmetrics(input_path = input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_syear', - calib_method = calib.method, syear = TRUE) - - rps_clim_syear <- .loadmetrics(input_path = input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Calculate skill score value to show in scorecard - - ## Spatially aggregate rps data - rps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' - - ## Spatially aggregate rps_clim data - rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' + time_dim <- 'syear' + memb_dim <- 'ensemble' - ## Temporally aggregate data - rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, - target_dims = 'syear', - fun = 'mean', ncores = ncores)$output1 - - rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, - target_dims = 'syear', - fun = 'mean', ncores = ncores)$output1 - - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, - time_dim = 'syear', test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, - ncores = NULL)$sign + ## Define arrays to filled with data + scorecard_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) - } ## close if on rpss + scorecard_sign <- array(data = NA, + dim = c(time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) + + for (met in metrics.visualize) { + + if(met == 'rpss'){ + ## Load data from saved files + rps_syear <- .loadmetrics(input_path = skill.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_syear', + calib_method = calib.method, syear = TRUE) + + rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + rps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = F) + }, simplify = 'array') + + rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' + names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' - if('crpss' %in% metrics.visualize){ - ## Load data from saved files - crps_syear <- .loadmetrics(input_path = input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_syear', - calib_method = calib.method, syear = TRUE) - - crps_clim_syear <- .loadmetrics(input_path = input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Calculate skill score value to show in scorecard + ## Temporally aggregate data + rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign - ## Spatially aggregate crps data - crps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, - na.rm = F) - }, simplify = 'array') + ## Save metric result in arrays + scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) + scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) + + } ## close if on rpss - ## Include name of region dimension - names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' - - ## Spatially aggregate crps_clim data - crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' - - ## Temporally aggregate data - crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, - target_dims = 'syear', - fun = 'mean', ncores = ncores)$output1 - - crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, - target_dims = 'syear', - fun = 'mean', ncores = ncores)$output1 + + if(met == 'crpss'){ + ## Load data from saved files + crps_syear <- .loadmetrics(input_path = skill.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_syear', + calib_method = calib.method, syear = TRUE) + + crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + crps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' + names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Save metric result in arrays + scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) + scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) + + } ## close if on crpss - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, - time_dim = 'syear', test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, - ncores = NULL)$sign + if(met == 'corr'){ + ## Load data from saved files + cov <- .loadmetrics(input_path = stats.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'cov', + calib_method = calib.method, syear = NULL) + + ## Comments (Nadia): + ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? + ## 2) output files names of statistics not correct, too many _ in name + + std_hcst <- .loadmetrics(input_path = stats.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_hcst', + calib_method = calib.method, syear = NULL) + + std_obs <- .loadmetrics(input_path = stats.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_obs', + calib_method = calib.method, syear = NULL) - } ## close if on crpss + ## Calculate spatial aggregation + cov_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = cov, + region = regions[[X]], + lon = as.vector(attributes(cov)$Variables$dat1$longitude), + lat = as.vector(attributes(cov)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' + + + std_hcst_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_hcst, + region = regions[[X]], + lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), + lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' + + std_obs_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_obs, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim_name, + latdim = lat_dim_name, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' + + ## Calculate correlation + + corr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) + metric <- corr + + ## Calculate significance of corr + n_eff <- s2dv::Eno(data = data$obs$data, time_dim = time_dim, na.action = na.pass, ncores = ncores) + # n_eff <- (end.year - start.year) + 1 # define degrees of freedom + alpha <- 0.05 # Apply 95% confidence + + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(corr) * sqrt(n_eff-2) / sqrt(1-corr^2) + + sign_corr <- NULL + + if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ + sign_corr$sign = TRUE + } else { + sign_corr$sign = FALSE + } + + ## Save metric result in arrays + scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = corr, order = c('time', 'smonths','region')) + scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) + + + } ## close if on corr + + ## Combine metric arrays + scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = metric, order = c('time', 'smonths','region')) + + ##Combine significance + + + } ## close loop on metric } ## close if on score + + + + + + + + + + + + + + + ## Create simple scorecard tables ## (one system only) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 3e69e3e1..cc0da1e1 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -67,7 +67,7 @@ Skill <- function(recipe, data, agg = 'global') { cross.val <- recipe$Analysis$Workflow$Skill$cross_validation } skill_metrics <- list() - for (metric in strsplit(metrics, ", | |,")[[1]]) { ## Loop over metric not working?? + for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', 'fcrps', 'fcrpss')) { diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index fe36c4ce..b3ee7848 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -9,64 +9,68 @@ compute_statistics <- function(recipe, data, agg = 'global'){ time_dim <- 'syear' memb_dim <- 'ensemble' + + # ## Remove unwanted dimensions + obs_data <- Subset(data$obs$data, along = c('dat', 'sday', 'sweek', 'ensemble'), indices = list(1,1,1,1) , drop = 'selected') + hcst_data <- Subset(data$hcst$data, along = c('dat', 'sday', 'sweek'), indices = list(1,1,1) , drop = 'selected') + ## Repeat ensemble members for obs + obs_data_ens <- InsertDim(data = obs_data, pos = length(dim(obs_data))+1, lendim = 25, name = 'ensemble') - ## Duplicate obs along hcst ensemble dimension - obs_data <- adrop(data$obs$data, drop = 9) - obs_data <- InsertDim(data = obs_data, pos = 9, lendim = 25, name = 'ensemble') + # obs_data <- adrop(data$obs$data, drop = 9) + # obs_data_ens <- InsertDim(data = obs_data, pos = 9, lendim = 25, name = 'ensemble') statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) - statistics <- list() + statistics_metrics <- list() for (stat in strsplit(statistics_list, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed if (stat %in% c('cov', 'covariance')) { - covariance <- Apply(data = list(x= obs_data, y=data$hcst$data), + covariance <- Apply(data = list(x = obs_data_ens, y = hcst_data), target_dims = c(time_dim, memb_dim), fun = function(x,y){cov(as.vector(x),as.vector(y), use = "everything", method = "pearson")})$output1 - statistics[[ stat ]] <- covariance + statistics_metrics[[ stat ]] <- covariance - } ## close if on cov + } ## close if on covariance if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation - std_hcst <- Apply(data = data$hcst$data, + std_hcst <- Apply(data = hcst_data, target_dims = c(time_dim, memb_dim), fun = 'sd')$output1 - std_obs <- Apply(data = data$obs$data, - target_dims = c(time_dim, memb_dim), + std_obs <- Apply(data = obs_data, + target_dims = c(time_dim), fun = 'sd')$output1 - statistics[[ 'std_hcst' ]] <- std_hcst - statistics[[ 'std_obs' ]] <- std_obs - + statistics_metrics[['std_hcst']] <- std_hcst + statistics_metrics[['std_obs']] <- std_obs } ## close if on std if (stat %in% c('var', 'variance')) { ## Calculate standard deviation - var_hcst <- (Apply(data = data$hcst$data, + var_hcst <- (Apply(data = hcst_data, target_dims = c(time_dim, memb_dim), fun = 'sd')$output1)^2 - var_obs <- (Apply(data = data$obs$data, - target_dims = c(time_dim, memb_dim), + var_obs <- (Apply(data = obs_data, + target_dims = c(time_dim), fun = 'sd')$output1)^2 - statistics[[ 'var_hcst' ]] <- var_hcst - statistics[[ 'var_obs' ]] <- var_obs + statistics_metrics[['var_hcst']] <- var_hcst + statistics_metrics[['var_obs']] <- var_obs - } ## close if on var + } ## close if on variance } @@ -82,11 +86,11 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics - save_metrics_scorecards(recipe = recipe, skill = statistics, ## Not able to save data with these dimensions + save_metrics_scorecards(recipe = recipe, skill = statistics_metrics, ## Not able to save data with these dimensions data_cube = data$hcst, agg = agg) ## The length of parameter 'order' should be the same with the dimension length of parameter 'data'. } # Return results - return(statistics) + return(statistics_metrics) } -- GitLab From 51ba570c6456d255d7c5bf9d66ea5254ad88ef62 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 7 Dec 2023 16:02:16 +0100 Subject: [PATCH 12/43] included enscorr significance --- modules/Scorecards/Dev_Scorecards.R | 202 ++++++++++++++++++++-------- modules/Statistics/Statistics.R | 8 ++ 2 files changed, 151 insertions(+), 59 deletions(-) diff --git a/modules/Scorecards/Dev_Scorecards.R b/modules/Scorecards/Dev_Scorecards.R index 3984c3ed..2bc02a5e 100644 --- a/modules/Scorecards/Dev_Scorecards.R +++ b/modules/Scorecards/Dev_Scorecards.R @@ -18,7 +18,7 @@ source('modules/Scorecards/R/tmp/SCPlotScorecard.R') Scorecards <- function(recipe) { ## Parameters for loading data - input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231204155452/outputs/" #temp + input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231207112311/outputs/" #temp skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") @@ -124,8 +124,14 @@ Scorecards <- function(recipe) { ###### SCORE AGGREGATION ###### if(metric.aggregation == 'score'){ - lon_dim_name <- 'longitude' - lat_dim_name <- 'latitude' + ## Comments (Nadia): + ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? + ## 2) output files names of statistics not correct, too many _ in name + ## 3) how to load multiple systems? + + + lon_dim <- 'longitude' + lat_dim <- 'latitude' time_dim <- 'syear' memb_dim <- 'ensemble' @@ -171,8 +177,8 @@ Scorecards <- function(recipe) { region = regions[[X]], lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, + londim = lon_dim, + latdim = lat_dim, na.rm = F) }, simplify = 'array') @@ -182,8 +188,8 @@ Scorecards <- function(recipe) { region = regions[[X]], lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, + londim = lon_dim, + latdim = lat_dim, na.rm = F) }, simplify = 'array') @@ -214,7 +220,6 @@ Scorecards <- function(recipe) { scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) } ## close if on rpss - if(met == 'crpss'){ ## Load data from saved files @@ -243,8 +248,8 @@ Scorecards <- function(recipe) { region = regions[[X]], lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, + londim = lon_dim, + latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') @@ -254,8 +259,8 @@ Scorecards <- function(recipe) { region = regions[[X]], lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, + londim = lon_dim, + latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') @@ -287,8 +292,7 @@ Scorecards <- function(recipe) { } ## close if on crpss - - if(met == 'corr'){ + if(met == 'enscorr'){ ## Load data from saved files cov <- .loadmetrics(input_path = stats.input.path, system = system, reference = reference, var = var, @@ -297,16 +301,12 @@ Scorecards <- function(recipe) { metrics = 'cov', calib_method = calib.method, syear = NULL) - ## Comments (Nadia): - ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? - ## 2) output files names of statistics not correct, too many _ in name - std_hcst <- .loadmetrics(input_path = stats.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_hcst', - calib_method = calib.method, syear = NULL) + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_hcst', + calib_method = calib.method, syear = NULL) std_obs <- .loadmetrics(input_path = stats.input.path, system = system, reference = reference, var = var, @@ -315,6 +315,14 @@ Scorecards <- function(recipe) { metrics = 'std_obs', calib_method = calib.method, syear = NULL) + + n_eff <- .loadmetrics(input_path = stats.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'n_eff', + calib_method = calib.method, syear = NULL) + ## Calculate spatial aggregation cov_spatial_aggr <- sapply(X = 1:length(regions), FUN = function(X) { @@ -322,8 +330,8 @@ Scorecards <- function(recipe) { region = regions[[X]], lon = as.vector(attributes(cov)$Variables$dat1$longitude), lat = as.vector(attributes(cov)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, + londim = lon_dim, + latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') @@ -337,8 +345,8 @@ Scorecards <- function(recipe) { region = regions[[X]], lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, + londim = lon_dim, + latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') @@ -350,63 +358,139 @@ Scorecards <- function(recipe) { region = regions[[X]], lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim_name, - latdim = lat_dim_name, + londim = lon_dim, + latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' - ## Calculate correlation + n_eff_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = n_eff, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') - corr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) - metric <- corr + names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' + n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate correlation + enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) + ## Drop unwanted dimensions + enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + ## Calculate significance of corr - n_eff <- s2dv::Eno(data = data$obs$data, time_dim = time_dim, na.action = na.pass, ncores = ncores) - # n_eff <- (end.year - start.year) + 1 # define degrees of freedom alpha <- 0.05 # Apply 95% confidence - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) - t <- abs(corr) * sqrt(n_eff-2) / sqrt(1-corr^2) + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) - sign_corr <- NULL + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + smonths = length(start.months), + region = length(regions))) - if (anyNA(c(t,t_alpha2_n2)) == FALSE & t >= t_alpha2_n2){ - sign_corr$sign = TRUE - } else { - sign_corr$sign = FALSE - } + + for (time in 1:dim(sign_corr)[['time']]){ + for (mon in 1:dim(sign_corr)[['smonths']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE + && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ + sign_corr[time, mon, reg] <- TRUE + } else { + sign_corr[time, mon, reg] <- FALSE + } + + } + } + } ## Save metric result in arrays - scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = corr, order = c('time', 'smonths','region')) + scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + mean_bias <- .loadmetrics(input_path = skill.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'mean_bias', + calib_method = calib.method, syear = NULL) - } ## close if on corr + ## Calculate spatial aggregation + mean_bias_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = mean_bias, + region = regions[[X]], + lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), + lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') - ## Combine metric arrays - scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = metric, order = c('time', 'smonths','region')) - - ##Combine significance - + names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + enssprerr <- .loadmetrics(input_path = skill.input.path, system = system, + reference = reference, var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'enssprerr', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + enssprerr_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = enssprerr, + region = regions[[X]], + lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), + lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on enssprerr + } ## close loop on metric + + aggregated_metrics <- scorecard_metrics + } ## close if on score - - - - - - - - - - + stop() ## Create simple scorecard tables diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index b3ee7848..777f5c78 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -9,6 +9,7 @@ compute_statistics <- function(recipe, data, agg = 'global'){ time_dim <- 'syear' memb_dim <- 'ensemble' + ncores <- recipe$Analysis$ncores # ## Remove unwanted dimensions obs_data <- Subset(data$obs$data, along = c('dat', 'sday', 'sweek', 'ensemble'), indices = list(1,1,1,1) , drop = 'selected') @@ -71,7 +72,14 @@ compute_statistics <- function(recipe, data, agg = 'global'){ statistics_metrics[['var_obs']] <- var_obs } ## close if on variance + + if (stat == 'n_eff') { + + n_eff <- s2dv::Eno(data = obs_data, time_dim = time_dim, na.action = na.pass, ncores = ncores) + statistics_metrics[['n_eff']] <- n_eff + + } ## close on n_eff } info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") -- GitLab From 57902ede2ab43b97e5d6300bf531ea3808cc80a3 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 15 Dec 2023 11:03:59 +0100 Subject: [PATCH 13/43] working progress --- modules/Scorecards/Dev_Scorecards.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Scorecards/Dev_Scorecards.R b/modules/Scorecards/Dev_Scorecards.R index 2bc02a5e..2fdd940e 100644 --- a/modules/Scorecards/Dev_Scorecards.R +++ b/modules/Scorecards/Dev_Scorecards.R @@ -127,8 +127,8 @@ Scorecards <- function(recipe) { ## Comments (Nadia): ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? ## 2) output files names of statistics not correct, too many _ in name - ## 3) how to load multiple systems? - + ## 3) how to load multiple systems? - Loop over sys and ref for metric.aggregation == 'score' + ## Need to test mith multiple sdates lon_dim <- 'longitude' lat_dim <- 'latitude' -- GitLab From 717a4646b3f5b546007ad83bfa0e828dfad8dc9a Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 15 Dec 2023 14:50:16 +0100 Subject: [PATCH 14/43] Bug fix file output name --- modules/Saving/R/get_filename.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index 9f6151e8..617115e7 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -44,7 +44,7 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "crps_clim_syear" = {type_info <- paste0("crps_clim_syear")}, "crps" = {type_info <- paste0("crps")}, "mean_bias" = {type_info <- paste0("mean_bias")}, - {type_info <- paste0("_", file.type, "_")}) + {type_info <- paste0(file.type)}) # Build file name file <- paste0("scorecards_", system, "_", reference, "_", -- GitLab From 6a14162bd8b45f4e3b4cb4f26a3cc450ceae05d0 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 2 Jan 2024 11:25:27 +0100 Subject: [PATCH 15/43] included system and reference loop for score aggregation --- modules/Scorecards/Dev_Scorecards.R | 659 ++++++++++++++-------------- 1 file changed, 336 insertions(+), 323 deletions(-) diff --git a/modules/Scorecards/Dev_Scorecards.R b/modules/Scorecards/Dev_Scorecards.R index 2fdd940e..d8976cfa 100644 --- a/modules/Scorecards/Dev_Scorecards.R +++ b/modules/Scorecards/Dev_Scorecards.R @@ -18,7 +18,7 @@ source('modules/Scorecards/R/tmp/SCPlotScorecard.R') Scorecards <- function(recipe) { ## Parameters for loading data - input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231207112311/outputs/" #temp + input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231219150442/outputs/" #temp skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") @@ -48,6 +48,8 @@ Scorecards <- function(recipe) { metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + na.rm <- FALSE ## Need to define in recipe ?? + ncores <- 1 # recipe$Analysis$ncores ## Parameters for scorecard table @@ -58,8 +60,8 @@ Scorecards <- function(recipe) { col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - ncores <- 1 # recipe$Analysis$ncores - na.rm <- FALSE + + ## Load data files start.months <- sprintf("%02d", start.months) @@ -128,7 +130,7 @@ Scorecards <- function(recipe) { ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? ## 2) output files names of statistics not correct, too many _ in name ## 3) how to load multiple systems? - Loop over sys and ref for metric.aggregation == 'score' - ## Need to test mith multiple sdates + ## Need to test with multiple sdates lon_dim <- 'longitude' lat_dim <- 'latitude' @@ -137,352 +139,363 @@ Scorecards <- function(recipe) { ## Define arrays to filled with data scorecard_metrics <- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), sdate = length(start.months), region = length(regions), metric = length(metrics.visualize))) scorecard_sign <- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), sdate = length(start.months), region = length(regions), metric = length(metrics.visualize))) - for (met in metrics.visualize) { - if(met == 'rpss'){ - ## Load data from saved files - rps_syear <- .loadmetrics(input_path = skill.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_syear', - calib_method = calib.method, syear = TRUE) - - rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Spatially aggregate data - rps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' - names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' - - ## Temporally aggregate data - rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, - ncores = NULL)$sign + for (sys in 1:length(system)){ + # sys_num <- which(system == sys) + for (ref in 1:length(reference)){ + # ref_num <- which(refence == ref) + for (met in metrics.visualize) { + + if(met == 'rpss'){ + ## Load data from saved files + rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_syear', + calib_method = calib.method, syear = TRUE) + + rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + rps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' + names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' - ## Save metric result in arrays - scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) - scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) - - } ## close if on rpss - - if(met == 'crpss'){ - ## Load data from saved files - crps_syear <- .loadmetrics(input_path = skill.input.path, system = system, - reference = reference, var = var, + ## Temporally aggregate data + rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Save metric result in arrays + scorecard_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) + scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + ## Load data from saved files + crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_syear', + calib_method = calib.method, syear = TRUE) + + crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + crps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' + names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Save metric result in arrays + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) + scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + ## Load data from saved files + cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'cov', + calib_method = calib.method, syear = NULL) + + std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_hcst', + calib_method = calib.method, syear = NULL) + + std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_obs', + calib_method = calib.method, syear = NULL) + + + n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, period = period, start_months = start.months, forecast_months = forecast.months, - metrics = 'crps_syear', - calib_method = calib.method, syear = TRUE) - - crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Spatially aggregate data - crps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + metrics = 'n_eff', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + cov_spatial_aggr <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = crps_clim_syear, + WeightedMean(data = cov, region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), + lon = as.vector(attributes(cov)$Variables$dat1$longitude), + lat = as.vector(attributes(cov)$Variables$dat1$latitude), londim = lon_dim, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' - names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' - - ## Temporally aggregate data - crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Save metric result in arrays - scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) - scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - ## Load data from saved files - cov <- .loadmetrics(input_path = stats.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'cov', - calib_method = calib.method, syear = NULL) - - std_hcst <- .loadmetrics(input_path = stats.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_hcst', - calib_method = calib.method, syear = NULL) - - std_obs <- .loadmetrics(input_path = stats.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_obs', - calib_method = calib.method, syear = NULL) - - - n_eff <- .loadmetrics(input_path = stats.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'n_eff', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - cov_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = cov, - region = regions[[X]], - lon = as.vector(attributes(cov)$Variables$dat1$longitude), - lat = as.vector(attributes(cov)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' - - - std_hcst_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_hcst, - region = regions[[X]], - lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), - lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' - - std_obs_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_obs, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' - - n_eff_spatial_aggr <- sapply(X = 1:length(regions), + + ## Include name of region dimension + names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' + + + std_hcst_spatial_aggr <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = n_eff, + WeightedMean(data = std_hcst, region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), + lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), londim = lon_dim, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - - names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' - n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Calculate correlation - enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) - - ## Drop unwanted dimensions - enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Calculate significance of corr - alpha <- 0.05 # Apply 95% confidence - - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - smonths = length(start.months), - region = length(regions))) - - - for (time in 1:dim(sign_corr)[['time']]){ - for (mon in 1:dim(sign_corr)[['smonths']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE - && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ - sign_corr[time, mon, reg] <- TRUE - } else { - sign_corr[time, mon, reg] <- FALSE + + names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' + + std_obs_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_obs, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' + + n_eff_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = n_eff, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' + n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate correlation + enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) + + ## Drop unwanted dimensions + enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate significance of corr + alpha <- 0.05 # Apply 95% confidence + + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + smonths = length(start.months), + region = length(regions))) + + + for (time in 1:dim(sign_corr)[['time']]){ + for (mon in 1:dim(sign_corr)[['smonths']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE + && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ + sign_corr[time, mon, reg] <- TRUE + } else { + sign_corr[time, mon, reg] <- FALSE + } + + } } - - } - } - } - - ## Save metric result in arrays - scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) - scorecard_sign[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - mean_bias <- .loadmetrics(input_path = skill.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'mean_bias', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - mean_bias_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = mean_bias, - region = regions[[X]], - lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), - lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Save metric result in array - scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - enssprerr <- .loadmetrics(input_path = skill.input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'enssprerr', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - enssprerr_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = enssprerr, - region = regions[[X]], - lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), - lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Save metric result in array - scorecard_metrics[ , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) - - } ## close on enssprerr - + } - } ## close loop on metric - + ## Save metric result in arrays + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) + scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'mean_bias', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + mean_bias_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = mean_bias, + region = regions[[X]], + lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), + lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'enssprerr', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + enssprerr_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = enssprerr, + region = regions[[X]], + lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), + lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on enssprerr + + } ## close loop on metric + } ## close if on reference + } ## close if on system + aggregated_metrics <- scorecard_metrics } ## close if on score -- GitLab From 997398e1c2cd8d1857b877f0f050283a11bcc5cf Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 3 Jan 2024 17:18:10 +0100 Subject: [PATCH 16/43] included underline signifcance in scorecard --- modules/Scorecards/Dev_Scorecards.R | 571 -------------------- modules/Scorecards/R/tmp/SCPlotScorecard.R | 32 +- modules/Scorecards/R/tmp/ScorecardsSingle.R | 32 +- modules/Scorecards/Scorecards.R | 513 +++++++++++++++--- 4 files changed, 494 insertions(+), 654 deletions(-) delete mode 100644 modules/Scorecards/Dev_Scorecards.R diff --git a/modules/Scorecards/Dev_Scorecards.R b/modules/Scorecards/Dev_Scorecards.R deleted file mode 100644 index d8976cfa..00000000 --- a/modules/Scorecards/Dev_Scorecards.R +++ /dev/null @@ -1,571 +0,0 @@ -############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### -############################################################################### - -##### Load source functions ##### -source('modules/Scorecards/R/tmp/LoadMetrics.R') -source('modules/Scorecards/R/tmp/WeightedMetrics.R') -source('modules/Scorecards/R/tmp/Utils.R') -source('modules/Scorecards/R/tmp/SCTransform.R') -source('modules/Scorecards/R/tmp/ScorecardsSingle.R') -source('modules/Scorecards/R/tmp/ScorecardsMulti.R') -source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/SCPlotScorecard.R') - - -## TODO: Change function name to 'Scorecards'? -## Define function -Scorecards <- function(recipe) { - - ## Parameters for loading data - input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231219150442/outputs/" #temp - skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") - output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") - dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name - var <- recipe$Analysis$Variables$name - start.year <- as.numeric(recipe$Analysis$Time$hcst_start) - end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - - # NOTE (Eva): This condition needs to be checked, in my case - # (recipe$Analysis$Workflow$Scorecards$start_months = NULL) - start.months <- 1:12 # I added this line - # Needs to be corrected: - # if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { - # start.months <- 1:12 - # } else { - # start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, - # split = ", | |,")[[1]]) - # } - - regions <- recipe$Analysis$Workflow$Scorecards$regions - for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} - - metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation - metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - na.rm <- FALSE ## Need to define in recipe ?? - ncores <- 1 # recipe$Analysis$ncores - - - ## Parameters for scorecard table - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na - table.label <- recipe$Analysis$Workflow$Scorecards$table_label - fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space - col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width - col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - - - - ## Load data files - start.months <- sprintf("%02d", start.months) - period <- paste0(start.year, "-", end.year) - - ####### SKILL AGGREGATION ####### - if(metric.aggregation == 'skill'){ - - ## Load data files - loaded_metrics <- LoadMetrics(system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - metrics = metrics.load, - start.months = start.months, - forecast.months = forecast.months, - inf.to.na = inf.to.na, - input.path = skill.input.path) - - ## Spatial Aggregation of metrics - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) - - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) - - - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } - - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) - - - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - metric.aggregation = metric.aggregation, - ncores = ncores) - }## close if on region - } - - ###### SCORE AGGREGATION ###### - if(metric.aggregation == 'score'){ - - ## Comments (Nadia): - ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? - ## 2) output files names of statistics not correct, too many _ in name - ## 3) how to load multiple systems? - Loop over sys and ref for metric.aggregation == 'score' - ## Need to test with multiple sdates - - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - - ## Define arrays to filled with data - scorecard_metrics <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - scorecard_sign <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - - for (sys in 1:length(system)){ - # sys_num <- which(system == sys) - for (ref in 1:length(reference)){ - # ref_num <- which(refence == ref) - for (met in metrics.visualize) { - - if(met == 'rpss'){ - ## Load data from saved files - rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_syear', - calib_method = calib.method, syear = TRUE) - - rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Spatially aggregate data - rps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' - names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' - - ## Temporally aggregate data - rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Save metric result in arrays - scorecard_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) - scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) - - } ## close if on rpss - - if(met == 'crpss'){ - - ## Load data from saved files - crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_syear', - calib_method = calib.method, syear = TRUE) - - crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Spatially aggregate data - crps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' - names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' - - ## Temporally aggregate data - crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Save metric result in arrays - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) - scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - ## Load data from saved files - cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'cov', - calib_method = calib.method, syear = NULL) - - std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_hcst', - calib_method = calib.method, syear = NULL) - - std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_obs', - calib_method = calib.method, syear = NULL) - - - n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'n_eff', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - cov_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = cov, - region = regions[[X]], - lon = as.vector(attributes(cov)$Variables$dat1$longitude), - lat = as.vector(attributes(cov)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' - - - std_hcst_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_hcst, - region = regions[[X]], - lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), - lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' - - std_obs_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_obs, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' - - n_eff_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = n_eff, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' - n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Calculate correlation - enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) - - ## Drop unwanted dimensions - enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Calculate significance of corr - alpha <- 0.05 # Apply 95% confidence - - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - smonths = length(start.months), - region = length(regions))) - - - for (time in 1:dim(sign_corr)[['time']]){ - for (mon in 1:dim(sign_corr)[['smonths']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE - && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ - sign_corr[time, mon, reg] <- TRUE - } else { - sign_corr[time, mon, reg] <- FALSE - } - - } - } - } - - ## Save metric result in arrays - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) - scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'mean_bias', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - mean_bias_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = mean_bias, - region = regions[[X]], - lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), - lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Save metric result in array - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'enssprerr', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - enssprerr_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = enssprerr, - region = regions[[X]], - lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), - lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Save metric result in array - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) - - } ## close on enssprerr - - } ## close loop on metric - } ## close if on reference - } ## close if on system - - aggregated_metrics <- scorecard_metrics - - } ## close if on score - - - - - stop() - - - ## Create simple scorecard tables - ## (one system only) - ## Metrics input must be in the same order as function SC_spatial_aggregation - scorecard_single <- ScorecardsSingle(data = aggregated_metrics, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = names(regions), - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - output.path = output.path) - - ## Create multi system/reference scorecard tables - ## (multiple systems with one reference or one system with multiple references) - ## Metrics input must be in the same order as function SC_spatial_aggregation - if(length(system) > 1 || length(reference) > 1){ - scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = attributes(regions)$names, - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - output.path = output.path) - } ## close if - - - if(calculate.diff == TRUE){ - if(length(system) == 2 || length(reference) == 2){ - scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = attributes(regions)$names, - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - output.path = output.path) - } else {stop ("Difference scorecard can only be computed with two systems or two references.")} - } ## close if on calculate.diff - -} - diff --git a/modules/Scorecards/R/tmp/SCPlotScorecard.R b/modules/Scorecards/R/tmp/SCPlotScorecard.R index 4373057b..8c2d9eba 100644 --- a/modules/Scorecards/R/tmp/SCPlotScorecard.R +++ b/modules/Scorecards/R/tmp/SCPlotScorecard.R @@ -5,6 +5,9 @@ #' #'@param data A multidimensional array containing spatially aggregated metrics #' data with dimensions: metric, region, sdate and ftime. +#'@param sign A multidimensional boolean array with the same dimensions as +#' 'data', indicting which values to be highlighted. If set to NULL no values +#' will be highlighted. #'@param row.dim A character string indicating the dimension name to show in the #' rows of the plot. #'@param subrow.dim A character string indicating the dimension name to show in @@ -72,7 +75,8 @@ #'@import s2dv #'@import ClimProjDiags #'@export -SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', +SCPlotScorecard <- function(data, sign, + row.dim = 'region', subrow.dim = 'time', col.dim = 'metric', subcol.dim = 'sdate', legend.dim = 'metric', row.names = NULL, subrow.names = NULL, col.names = NULL, @@ -343,20 +347,30 @@ SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', table_data <- rbind(table_data, table_temp) } } - - # All data for plotting in table + + # All data for plotting in table table <- data.frame(table_column_1 = row.names.table, table_data) table_temp <- array(unlist(table[3:n.columns]), dim = c(n.rows, n.columns - 2)) + # Define colors to show in table - table_colors <- .SCTableColors(table = table_temp, n.col = n.col.names, - n.subcol = n.subcol.names, n.row = n.row.names, - n.subrow = n.subrow.names, legend.breaks = legend.breaks, - palette = palette, colorunder = colorunder, - colorsup = colorsup) + table_colors <- .SCTableColors(table = table_temp, + n.col = n.col.names, n.subcol = n.subcol.names, + n.row = n.row.names, n.subrow = n.subrow.names, + legend.breaks = legend.breaks, palette = palette, + colorunder = colorunder, colorsup = colorsup) + metric.color <- table_colors$metric.color metric.text.color <- table_colors$metric.text.color # metric.text.bold <- table_colors$metric.text.bold + # Remove temporary tables + rm(table_temp) + rm(table_sign_temp) + + ## Format values to underline in table + metric.underline <- MergeDims(sign, c('sdate', 'metric') , rename_dim = 'col', na.rm =F) + metric.underline <- MergeDims(metric.underline, c('time', 'region') , rename_dim = 'row', na.rm =F) + options(stringsAsFactors = FALSE) title <- data.frame(c1 = table.title, c2 = n.columns) subtitle <- data.frame(c1 = table.subtitle, c2 = n.columns) @@ -385,6 +399,7 @@ SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', for (j in 1:n.subcol.names) { my.background <- metric.color[, (i - 1) * n.subcol.names + j] my.text.color <- metric.text.color[, (i - 1) * n.subcol.names + j] + my.underline <- metric.underline[, (i - 1) * n.subcol.names + j] # my.bold <- metric.text.bold[(i - 1) * n.subcol.names + j] table.html.part[[(i - 1) * n.subcol.names + j + 1]] <- @@ -392,6 +407,7 @@ SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', 2 + n.subcol.names * (i - 1) + j, background = my.background[1:n.rows], color = my.text.color[1:n.rows], + underline = my.underline[1:n.rows], bold = T) ## strsplit(toString(bold), ', ')[[1]] } } diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 56f08204..190ae3d5 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -2,8 +2,10 @@ #' #'@description Scorecards function to create scorecard tables for one system and #' one reference combination (types 1 to 4). -#'@param input_data is an array of spatially aggregated metrics containing the +#'@param data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. +#'@param sign is an array with the same dimensions as data indicting the +#' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the #' archive.yml format from verification suite #'@param reference a vector of character strings defining the references @@ -41,7 +43,7 @@ #' output.path = '/esarchive/scratch/nmilders/scorecards_images/test' #' ) #'@export -ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, +ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end.year, start.months, forecast.months, region.names, metrics, legend.breaks = NULL, table.label = NULL, fileout.label = NULL, @@ -80,6 +82,7 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, ## Make sure input_data is in correct order for using in functions: data_order <- c('system', 'reference', 'metric', 'time', 'sdate', 'region') data <- Reorder(data, data_order) + sign <- Reorder(sign, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics @@ -88,11 +91,18 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) attributes(input_data)$metrics <- metrics + input_sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(input_sign)$metrics <- metrics + ## Transform data for scorecards by forecast month (types 3 & 4) transformed_data <- SCTransform(data = input_data, sdate_dim = 'sdate', ftime_dim = 'time') + transformed_sign <- SCTransform(data = input_sign, + sdate_dim = 'sdate', + ftime_dim = 'time') + ## Load configuration files sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars @@ -180,8 +190,12 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 1, fileout.label = fileout.label, output.path = output.path) + data_sc_1 <- Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + sign_sc_1 <- Subset(input_sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + SCPlotScorecard(data = data_sc_1, + sign = sign_sc_1, row.dim = 'region', subrow.dim = 'time', col.dim = 'metric', @@ -217,12 +231,17 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, ## Scorecard type 2 is same as type 1 for only one region, therefore is ## only plotted if more that one region is requested if(dim(input_data)['region'] > 1) { + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 2, fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') data_sc_2 <- Reorder(Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + sign_sc_2 <- Reorder(Subset(input_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + SCPlotScorecard(data = data_sc_2, + sign = sign_sc_2, row.dim = 'time', subrow.dim = 'region', col.dim = 'metric', @@ -259,8 +278,12 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 3, fileout.label = fileout.label, output.path = output.path) + data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + sign_sc_3 <- Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + SCPlotScorecard(data = data_sc_3, + sign = sign_sc_3, row.dim = 'region', subrow.dim = 'time', col.dim = 'metric', @@ -296,12 +319,17 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, ## Scorecard type 4 is same as type 3 for only one region, therefore is ## only plotted if more that one region is requested if(dim(input_data)['region'] > 1) { + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') data_sc_4 <- Reorder(Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + sign_sc_4 <- Reorder(Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + SCPlotScorecard(data = data_sc_4, + sign = sign_sc_4, row.dim = 'time', subrow.dim = 'region', col.dim = 'metric', diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 8312735a..bad1497c 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -17,11 +17,12 @@ source('modules/Scorecards/R/tmp/SCPlotScorecard.R') ## Define function Scorecards <- function(recipe) { - ## set parameters - input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + ## Parameters for loading data + input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231219150442/outputs/" #temp + skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") + stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name @@ -46,31 +47,17 @@ Scorecards <- function(recipe) { metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) - - ## Define skill scores in score aggregation has been requested - - if(metric.aggregation == 'score'){ - if('rps' %in% metrics.load){ - metrics.load <- c(metrics.load, 'rps_clim') - } - if('crps' %in% metrics.load){ - metrics.load <- c(metrics.load, 'crps_clim') - } - } - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - ## Define skill scores in score aggregation has been requested - - if(metric.aggregation == 'score'){ - if('rpss' %in% metrics.visualize){ - metrics.visualize[metrics.visualize == 'rpss'] <- 'rpss_score_aggr' - } - if('crpss' %in% metrics.visualize){ - metrics.visualize[metrics.visualize == 'crpss'] <- 'crpss_score_aggr' - } + if(is.null(recipe$Analysis$remove_NAs)){ + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs } + ncores <- 1 # recipe$Analysis$ncores + + ## Parameters for scorecard table inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label @@ -78,73 +65,453 @@ Scorecards <- function(recipe) { col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - ncores <- 1 # recipe$Analysis$ncores ## Load data files start.months <- sprintf("%02d", start.months) period <- paste0(start.year, "-", end.year) - # NOTE (Eva): This is an example: - if (any(metrics.load %in% 'rps_syear')) { - metric_rps_syear <- .loadmetrics(input_path = input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_syear', - calib_method = calib.method, syear = TRUE) - } else if (any(metrics.load %in% c('crps', 'mean_bias'))) { - metrics <- LoadMetrics(input_path = input.path, system = system, - reference = reference, var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = c('crps', 'mean_bias'), - calib_method = calib.method) - } - + ####### SKILL AGGREGATION ####### + if(metric.aggregation == 'skill'){ - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + ## Load data files + loaded_metrics <- LoadMetrics(system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + metrics = metrics.load, + start.months = start.months, + forecast.months = forecast.months, + inf.to.na = inf.to.na, + input.path = skill.input.path) - ### Convert loaded metrics to array for allready aggregated data - metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) + ## Spatial Aggregation of metrics + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + + ### Convert loaded metrics to array for already aggregated data + metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics + forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months + start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months + regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) + + aggregated_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) + + + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } + + ## Add attributes + attributes(aggregated_metrics)$metrics <- metrics.load + attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(aggregated_metrics)$regions <- regions + attributes(aggregated_metrics)$system.name <- names(loaded_metrics) + attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + + + } else { + ## Calculate weighted mean of spatial aggregation + aggregated_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + metric.aggregation = metric.aggregation, + ncores = ncores) + }## close if on region + } + + ###### SCORE AGGREGATION ###### + if(metric.aggregation == 'score'){ - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) + ## Comments (Nadia): + ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? + ## 2) output files names of statistics not correct, too many _ in name + ## 3) how to load multiple systems? - Loop over sys and ref for metric.aggregation == 'score' + ## Need to test with multiple sdates + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + + ## Define arrays to filled with data + scorecard_metrics <- array(data = NA, + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } + scorecard_sign <- array(data = NA, + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + for (sys in 1:length(system)){ + # sys_num <- which(system == sys) + for (ref in 1:length(reference)){ + # ref_num <- which(refence == ref) + for (met in metrics.visualize) { + + if(met == 'rpss'){ + ## Load data from saved files + rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_syear', + calib_method = calib.method, syear = TRUE) + + rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + rps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' + names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Save metric result in arrays + scorecard_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) + scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + ## Load data from saved files + crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_syear', + calib_method = calib.method, syear = TRUE) + + crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + crps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' + names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = 0.05, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Save metric result in arrays + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) + scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + ## Load data from saved files + cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'cov', + calib_method = calib.method, syear = NULL) + + std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_hcst', + calib_method = calib.method, syear = NULL) + + std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_obs', + calib_method = calib.method, syear = NULL) + + + n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'n_eff', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + cov_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = cov, + region = regions[[X]], + lon = as.vector(attributes(cov)$Variables$dat1$longitude), + lat = as.vector(attributes(cov)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' + - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - metric.aggregation = metric.aggregation, - ncores = ncores) - }## close if + std_hcst_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_hcst, + region = regions[[X]], + lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), + lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' + + std_obs_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_obs, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' + + n_eff_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = n_eff, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' + n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate correlation + enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) + + ## Drop unwanted dimensions + enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate significance of corr + alpha <- 0.05 # Apply 95% confidence + + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + smonths = length(start.months), + region = length(regions))) + + + for (time in 1:dim(sign_corr)[['time']]){ + for (mon in 1:dim(sign_corr)[['smonths']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE + && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ + sign_corr[time, mon, reg] <- TRUE + } else { + sign_corr[time, mon, reg] <- FALSE + } + + } + } + } + + ## Save metric result in arrays + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) + scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'mean_bias', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + mean_bias_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = mean_bias, + region = regions[[X]], + lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), + lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'enssprerr', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + enssprerr_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = enssprerr, + region = regions[[X]], + lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), + lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on enssprerr + + } ## close loop on metric + } ## close if on reference + } ## close if on system + + aggregated_metrics <- scorecard_metrics + + ## set NAs to False + scorecard_sign[is.na(scorecard_sign)] <- FALSE + + } ## close if on score + ## Create simple scorecard tables ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + signif = scorecard_sign, system = system, reference = reference, var = var, -- GitLab From f098f32cd0b3f6e3863ea6a6d75eb32e2b868a11 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 10 Jan 2024 10:47:16 +0100 Subject: [PATCH 17/43] changes for including significance --- modules/Saving/R/tmp/CST_SaveExp.R | 1085 +++++++++---------- modules/Scorecards/R/tmp/SCPlotScorecard.R | 36 +- modules/Scorecards/R/tmp/ScorecardsMulti.R | 129 ++- modules/Scorecards/R/tmp/ScorecardsSingle.R | 107 +- modules/Scorecards/Scorecards.R | 77 +- modules/Skill/R/tmp/CRPS.R | 2 +- modules/Skill/R/tmp/RPS.R | 2 +- recipes/recipe_scorecards.yml | 36 +- tools/check_recipe.R | 62 +- 9 files changed, 803 insertions(+), 733 deletions(-) diff --git a/modules/Saving/R/tmp/CST_SaveExp.R b/modules/Saving/R/tmp/CST_SaveExp.R index 1154a41c..2ffd8fa8 100644 --- a/modules/Saving/R/tmp/CST_SaveExp.R +++ b/modules/Saving/R/tmp/CST_SaveExp.R @@ -4,94 +4,105 @@ #' #'@description This function allows to divide and save a object of class #''s2dv_cube' into a NetCDF file, allowing to reload the saved data using -#'\code{Start} function from StartR package. If the original 's2dv_cube' object -#'has been created from \code{CST_Load()}, then it can be reloaded with -#'\code{Load()}. +#'\code{CST_Start} or \code{CST_Load} functions. It also allows to save any +#''s2dv_cube' object that follows the NetCDF attributes conventions. #' #'@param data An object of class \code{s2dv_cube}. #'@param destination A character string containing the directory name in which #' to save the data. NetCDF file for each starting date are saved into the -#' folder tree: \cr -#' destination/Dataset/variable/. By default the function -#' creates and saves the data into the working directory. +#' folder tree: 'destination/Dataset/variable/'. By default the function +#' saves the data into the working directory. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. It can be NULL if there is no #' start date dimension. #'@param ftime_dim A character string indicating the name of the forecast time -#' dimension. By default, it is set to 'time'. It can be NULL if there is no -#' forecast time dimension. +#' dimension. If 'Dates' are used, it can't be NULL. If there is no forecast +#' time dimension, 'Dates' will be set to NULL and will not be used. By +#' default, it is set to 'time'. #'@param dat_dim A character string indicating the name of dataset dimension. -#' By default, it is set to 'dataset'. It can be NULL if there is no dataset -#' dimension. +#' It can be NULL if there is no dataset dimension. By default, it is set to +#' 'dataset'. #'@param var_dim A character string indicating the name of variable dimension. -#' By default, it is set to 'var'. It can be NULL if there is no variable -#' dimension. -#'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. +#' It can be NULL if there is no variable dimension. By default, it is set to +#' 'var'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It can be NULL if there is no member dimension. By default, it is +#' set to 'member'. #'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the coordinate corresponding the the start date dimension or -#' the first Date of each time step will be used as the name of the files. -#' It is NULL by default. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. #'@param single_file A logical value indicating if all object is saved in a #' single file (TRUE) or in multiple files (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default. -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters. -#'@param units_time_since A logical value indicating if the time units are -#' saved as 'time unit since' (e.g. 'horurs since') (TRUE) or as time unit -#' index (e.g. days, months, or hours) (FALSE). It is set as TRUE by default. +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: '__.nc'. It is NULL by +#' default. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: 'Dates' have forecast time and start date dimension, 'single_file' is +#' TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast +#' time with units of 'hours since'; if it is FALSE, the time units will be a +#' number of time steps with its corresponding frequency (e.g. n days, n months +#' or n hours). It is FALSE by default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. #' #'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ +#'\item{\code{single_file is TRUE}}{ #' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. #'} -#'\item{\code{single_file = FALSE}}{ +#'\item{\code{single_file is FALSE}}{ #' The data array is subset and stored into multiple files. Each file #' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. #'} #' #'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and #'\code{\link{s2dv_cube}} #' #'@examples -#'\dontrun{ #'data <- lonlat_temp_st$exp -#'destination <- "./" -#'CST_SaveExp(data = data, destination = destination, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') -#'} +#'CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', +#' dat_dim = 'dataset', sdate_dim = 'sdate') #' -#'@import ncdf4 -#'@importFrom s2dv Reorder -#'@importFrom ClimProjDiags Subset -#'@import multiApply #'@export -CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', - ftime_dim = 'time', dat_dim = 'dataset', - var_dim = 'var', memb_dim = 'member', - startdates = NULL, drop_dims = NULL, - single_file = FALSE, extra_string = NULL, - units_time_since = TRUE) { +CST_SaveExp <- function(data, destination = "./", startdates = NULL, + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', + var_dim = 'var', drop_dims = NULL, + single_file = FALSE, extra_string = NULL, + global_attrs = NULL, units_hours_since = FALSE) { # Check 's2dv_cube' if (!inherits(data, 's2dv_cube')) { stop("Parameter 'data' must be of the class 's2dv_cube'.") @@ -104,22 +115,11 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', if (!inherits(data$attrs, 'list')) { stop("Level 'attrs' must be a list with at least 'Dates' element.") } - if (!all(c('coords') %in% names(data))) { - warning("Element 'coords' not found. No coordinates will be used.") - } # metadata - if (is.null(data$attrs$Variable$metadata)) { - warning("No metadata found in element Variable from attrs.") - } else { + if (!is.null(data$attrs$Variable$metadata)) { if (!inherits(data$attrs$Variable$metadata, 'list')) { stop("Element metadata from Variable element in attrs must be a list.") } - if (!any(names(data$attrs$Variable$metadata) %in% names(data$coords))) { - warning("Metadata is not found for any coordinate.") - } else if (!any(names(data$attrs$Variable$metadata) %in% - data$attrs$Variable$varName)) { - warning("Metadata is not found for any variable.") - } } # Dates if (is.null(data$attrs$Dates)) { @@ -133,51 +133,31 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' must be a character string.") } - if (length(sdate_dim) > 1) { - warning("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - sdate_dim <- sdate_dim[1] - } - } else if (length(dim(data$attrs$Dates)) == 1) { - sdate_dim <- 'sdate' - dim(data$data) <- c(sdate = 1, dim(data$data)) - data$dims <- dim(data$data) - dim(data$attrs$Dates) <- c(sdate = 1, dim(data$attrs$Dates)) - data$coords[[sdate_dim]] <- data$attrs$Dates[1] } # startdates if (is.null(startdates)) { - startdates <- data$coords[[sdate_dim]] - } else { - if (!is.character(startdates)) { - warning(paste0("Parameter 'startdates' is not a character string, ", - "it will not be used.")) + if (is.character(data$coords[[sdate_dim]])) { startdates <- data$coords[[sdate_dim]] } - if (!is.null(sdate_dim)) { - if (dim(data$data)[sdate_dim] != length(startdates)) { - warning(paste0("Parameter 'startdates' doesn't have the same length ", - "as dimension '", sdate_dim,"', it will not be used.")) - startdates <- data$coords[[sdate_dim]] - } - } } - + SaveExp(data = data$data, destination = destination, - Dates = data$attrs$Dates, coords = data$coords, + Dates = data$attrs$Dates, + time_bounds = data$attrs$time_bounds, + startdates = startdates, varname = data$attrs$Variable$varName, metadata = data$attrs$Variable$metadata, Datasets = data$attrs$Datasets, - startdates = startdates, - dat_dim = dat_dim, sdate_dim = sdate_dim, - ftime_dim = ftime_dim, var_dim = var_dim, + sdate_dim = sdate_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, + dat_dim = dat_dim, var_dim = var_dim, drop_dims = drop_dims, - extra_string = extra_string, single_file = single_file, - units_time_since = units_time_since) + extra_string = extra_string, + global_attrs = global_attrs, + units_hours_since = units_hours_since) } #'Save a multidimensional array with metadata to data in NetCDF format #'@description This function allows to save a data array with metadata into a @@ -190,14 +170,26 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param data A multi-dimensional array with named dimensions. #'@param destination A character string indicating the path where to store the #' NetCDF files. -#'@param Dates A named array of dates with the corresponding sdate and forecast -#' time dimension. If there is no sdate_dim, you can set it to NULL. -#' It must have ftime_dim dimension. #'@param coords A named list with elements of the coordinates corresponding to #' the dimensions of the data parameter. The names and length of each element #' must correspond to the names of the dimensions. If any coordinate is not #' provided, it is set as an index vector with the values from 1 to the length #' of the corresponding dimension. +#'@param Dates A named array of dates with the corresponding sdate and forecast +#' time dimension. If there is no sdate_dim, you can set it to NULL. +#' It must have ftime_dim dimension. +#'@param time_bounds (Optional) A list of two arrays of dates containing +#' the lower (first array) and the upper (second array) time bounds +#' corresponding to Dates. Each array must have the same dimensions as Dates. +#' If 'Dates' parameter is NULL, 'time_bounds' are not used. It is NULL by +#' default. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. #'@param varname A character string indicating the name of the variable to be #' saved. #'@param metadata A named list where each element is a variable containing the @@ -205,12 +197,6 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #' lists for each variable. #'@param Datasets A vector of character string indicating the names of the #' datasets. -#'@param startdates A vector of dates that will be used for the filenames -#' when saving the data in multiple files. It must be a vector of the same -#' length as the start date dimension of data. It must be a vector of class -#' \code{Dates}, \code{'POSIXct'} or character with lenghts between 1 and 10. -#' If it is NULL, the first Date of each time step will be used as the name of -#' the files. It is NULL by default. #'@param sdate_dim A character string indicating the name of the start date #' dimension. By default, it is set to 'sdate'. It can be NULL if there is no #' start date dimension. @@ -223,45 +209,63 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'@param var_dim A character string indicating the name of variable dimension. #' By default, it is set to 'var'. It can be NULL if there is no variable #' dimension. -#'@param memb_dim A character string indicating the name of the member dimension. -#' By default, it is set to 'member'. It can be NULL if there is no member -#' dimension. -#'@param drop_dims A vector of character strings indicating the dimension names -#' of length 1 that need to be dropped in order that they don't appear in the -#' netCDF file. It is NULL by default (optional). +#'@param memb_dim A character string indicating the name of the member +#' dimension. By default, it is set to 'member'. It can be NULL if there is no +#' member dimension. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. #'@param single_file A logical value indicating if all object is saved in a -#' unique file (TRUE) or in separated directories (FALSE). When it is FALSE, -#' the array is separated for Datasets, variable and start date. It is FALSE -#' by default (optional). -#'@param extra_string A character string to be include as part of the file name, -#' for instance, to identify member or realization. It would be added to the -#' file name between underscore characters (optional). -#'@param units_time_since A logical value indicating if the time units are -#' saved as 'time unit since' (e.g. 'horurs since') (TRUE) or as time unit -#' index (e.g. days, months, or hours) (FALSE). It is set as TRUE by default. +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: '__.nc'. It is NULL by +#' default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: Dates have forecast time and start date dimension, single_file is +#' TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time +#' with units of 'hours since'; if it is FALSE, the time units will be a number +#' of time steps with its corresponding frequency (e.g. n days, n months or n +#' hours). It is FALSE by default. #' #'@return Multiple or single NetCDF files containing the data array.\cr -#'\item{\code{single_file = TRUE}}{ +#'\item{\code{single_file is TRUE}}{ #' All data is saved in a single file located in the specified destination -#' path with the following name: -#' ___.nc. Multiple -#' variables are saved separately in the same file. The forecast time units -#' is extracted from the frequency of the time steps (hours, days, months). -#' The first value of forecast time is 1. If no frequency is found, the units -#' will be 'hours since' each start date and the time steps are assumed to be -#' equally spaced. +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. #'} -#'\item{\code{single_file = FALSE}}{ +#'\item{\code{single_file is FALSE}}{ #' The data array is subset and stored into multiple files. Each file #' contains the data subset for each start date, variable and dataset. Files -#' with different variables and Datasets are stored in separated directories -#' within the following directory tree: destination/Dataset/variable/. -#' The name of each file will be: -#' __.nc. +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. #'} #' #'@examples -#'\dontrun{ #'data <- lonlat_temp_st$exp$data #'lon <- lonlat_temp_st$exp$coords$lon #'lat <- lonlat_temp_st$exp$coords$lat @@ -269,25 +273,23 @@ CST_SaveExp <- function(data, destination = "./", sdate_dim = 'sdate', #'Datasets <- lonlat_temp_st$exp$attrs$Datasets #'varname <- 'tas' #'Dates <- lonlat_temp_st$exp$attrs$Dates -#'destination = './' #'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata -#'SaveExp(data = data, destination = destination, coords = coords, -#' Datasets = Datasets, varname = varname, Dates = Dates, -#' metadata = metadata, single_file = TRUE, ftime_dim = 'ftime', -#' var_dim = 'var', dat_dim = 'dataset') -#'} +#'SaveExp(data = data, coords = coords, Datasets = Datasets, varname = varname, +#' Dates = Dates, metadata = metadata, single_file = TRUE, +#' ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset') #' -#'@import ncdf4 +#'@import easyNCDF #'@importFrom s2dv Reorder #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, +SaveExp <- function(data, destination = "./", coords = NULL, + Dates = NULL, time_bounds = NULL, startdates = NULL, varname = NULL, metadata = NULL, Datasets = NULL, - startdates = NULL, dat_dim = 'dataset', sdate_dim = 'sdate', - ftime_dim = 'time', var_dim = 'var', memb_dim = 'member', + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', var_dim = 'var', drop_dims = NULL, single_file = FALSE, extra_string = NULL, - units_time_since = TRUE) { + global_attrs = NULL, units_hours_since = FALSE) { ## Initial checks # data if (is.null(data)) { @@ -297,21 +299,15 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if (is.null(dimnames)) { stop("Parameter 'data' must be an array with named dimensions.") } + if (!is.null(attributes(data)$dimensions)) { + attributes(data)$dimensions <- NULL + } # destination if (!is.character(destination) | length(destination) > 1) { stop("Parameter 'destination' must be a character string of one element ", "indicating the name of the file (including the folder if needed) ", "where the data will be saved.") } - # Dates - if (!is.null(Dates)) { - if (!inherits(Dates, "POSIXct") & !inherits(Dates, "Date")) { - stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") - } - if (is.null(dim(Dates))) { - stop("Parameter 'Dates' must have dimension names.") - } - } # drop_dims if (!is.null(drop_dims)) { if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { @@ -320,6 +316,10 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else if (!all(dim(data)[drop_dims] %in% 1)) { warning("Parameter 'drop_dims' can only contain dimension names ", "that are of length 1. It will not be used.") + } else if (any(drop_dims %in% c(ftime_dim, sdate_dim, dat_dim, memb_dim, var_dim))) { + warning("Parameter 'drop_dims' contains dimensions used in the computation. ", + "It will not be used.") + drop_dims <- NULL } else { data <- Subset(x = data, along = drop_dims, indices = lapply(1:length(drop_dims), function(x) 1), @@ -329,28 +329,17 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } # coords if (!is.null(coords)) { - if (!all(names(coords) %in% dimnames)) { - coords <- coords[-which(!names(coords) %in% dimnames)] - } - for (i_coord in dimnames) { - if (i_coord %in% names(coords)) { - if (length(coords[[i_coord]]) != dim(data)[i_coord]) { - warning(paste0("Coordinate '", i_coord, "' has different lenght as ", - "its dimension and it will not be used.")) - coords[[i_coord]] <- 1:dim(data)[i_coord] - } - } else { - # warning(paste0("Coordinate '", i_coord, "' is not provided ", - # "and it will be set as index in element coords.")) - coords[[i_coord]] <- 1:dim(data)[i_coord] - } + if (!inherits(coords, 'list')) { + stop("Parameter 'coords' must be a named list of coordinates.") + } + if (is.null(names(coords))) { + stop("Parameter 'coords' must have names corresponding to coordinates.") } } else { coords <- sapply(dimnames, function(x) 1:dim(data)[x]) } # varname if (is.null(varname)) { - warning("Parameter 'varname' is NULL. It will be assigned to 'X'.") varname <- 'X' } else if (length(varname) > 1) { multiple_vars <- TRUE @@ -361,11 +350,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'varname' must be a character string with the ", "variable names.") } - # metadata - if (is.null(metadata)) { - warning("Parameter 'metadata' is not provided so the metadata saved ", - "will be incomplete.") - } # single_file if (!inherits(single_file, 'logical')) { warning("Parameter 'single_file' must be a logical value. It will be ", @@ -378,13 +362,13 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'extra_string' must be a character string.") } } - # units_time_since - if (!is.logical(units_time_since)) { - warning("Parameter 'units_time_since' must be a logical value. It will be ", - "set as TRUE.") - units_time_since <- TRUE + # global_attrs + if (!is.null(global_attrs)) { + if (!inherits(global_attrs, 'list')) { + stop("Parameter 'global_attrs' must be a list.") + } } - + ## Dimensions checks # Spatial coordinates if (!any(dimnames %in% .KnownLonNames()) | @@ -394,16 +378,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] - if (length(lon_dim) > 1) { - warning("Found more than one longitudinal dimension. Only the first one ", - "will be used.") - lon_dim <- lon_dim[1] - } - if (length(lat_dim) > 1) { - warning("Found more than one latitudinal dimension. Only the first one ", - "will be used.") - lat_dim <- lat_dim[1] - } } # ftime_dim if (!is.null(ftime_dim)) { @@ -411,12 +385,8 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'ftime_dim' must be a character string.") } if (!all(ftime_dim %in% dimnames)) { - stop("Parameter 'ftime_dim' is not found in 'data' dimension.") - } - if (length(ftime_dim) > 1) { - warning("Parameter 'ftime_dim' has length greater than 1 and ", - "only the first element will be used.") - ftime_dim <- ftime_dim[1] + stop("Parameter 'ftime_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no forecast time dimension.") } } # sdate_dim @@ -424,11 +394,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, if (!is.character(sdate_dim)) { stop("Parameter 'sdate_dim' must be a character string.") } - if (length(sdate_dim) > 1) { - warning("Parameter 'sdate_dim' has length greater than 1 and ", - "only the first element will be used.") - sdate_dim <- sdate_dim[1] - } if (!all(sdate_dim %in% dimnames)) { stop("Parameter 'sdate_dim' is not found in 'data' dimension.") } @@ -452,11 +417,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", "as NULL if there is no Datasets dimension.") } - if (length(dat_dim) > 1) { - warning("Parameter 'dat_dim' has length greater than 1 and ", - "only the first element will be used.") - dat_dim <- dat_dim[1] - } n_datasets <- dim(data)[dat_dim] } else { n_datasets <- 1 @@ -470,11 +430,6 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", "as NULL if there is no variable dimension.") } - if (length(var_dim) > 1) { - warning("Parameter 'var_dim' has length greater than 1 and ", - "only the first element will be used.") - var_dim <- var_dim[1] - } n_vars <- dim(data)[var_dim] } else { n_vars <- 1 @@ -489,35 +444,121 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, single_file <- TRUE } } - # Dates dimension check + # Dates (1): initial checks if (!is.null(Dates)) { - if (all(c(ftime_dim, sdate_dim) %in% names(dim(Dates)))) { - if (any(!names(dim(Dates)) %in% c(ftime_dim, sdate_dim))) { - if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] - } else { - stop("Parameter 'Dates' must have only sdate_dim and ftime_dim dimensions.") - } + if (!any(inherits(Dates, "POSIXct"), inherits(Dates, "Date"))) { + stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") + } + if (is.null(dim(Dates))) { + stop("Parameter 'Dates' must have dimension names.") + } + if (all(is.null(ftime_dim), is.null(sdate_dim))) { + warning("Parameters 'ftime_dim' and 'sdate_dim' can't both be NULL ", + "if 'Dates' are used. 'Dates' will not be used.") + Dates <- NULL + } + # sdate_dim in Dates + if (!is.null(sdate_dim)) { + if (!sdate_dim %in% names(dim(Dates))) { + warning("Parameter 'sdate_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL } - if (is.null(startdates)) { - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - } else if ((!inherits(startdates, "POSIXct") & !inherits(startdates, "Date")) && - (!is.character(startdates) | (any(nchar(startdates) > 10) | any(nchar(startdates) < 1)))) { - warning("Parameter 'startdates' should be a character string containing ", - "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", - "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") - startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } + # ftime_dim in Dates + if (!is.null(ftime_dim)) { + if (!ftime_dim %in% names(dim(Dates))) { + warning("Parameter 'ftime_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL } - if (!is.null(format(startdates, "%Y%m%d"))) { - startdates <- format(startdates, "%Y%m%d") + } + } + # time_bounds + if (!is.null(time_bounds)) { + if (!inherits(time_bounds, 'list')) { + stop("Parameter 'time_bounds' must be a list with two dates arrays.") + } + time_bounds_dims <- lapply(time_bounds, function(x) dim(x)) + if (!identical(time_bounds_dims[[1]], time_bounds_dims[[2]])) { + stop("Parameter 'time_bounds' must have 2 arrays with same dimensions.") + } + if (is.null(Dates)) { + time_bounds <- NULL + } else { + name_tb <- sort(names(time_bounds_dims[[1]])) + name_dt <- sort(names(dim(Dates))) + if (!identical(dim(Dates)[name_dt], time_bounds_dims[[1]][name_tb])) { + stop(paste0("Parameter 'Dates' and 'time_bounds' must have same length ", + "of all dimensions.")) } - } else if (any(ftime_dim %in% names(dim(Dates)))) { - if (all(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim)] == 1)) { - dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + } + } + # Dates (2): Check dimensions + if (!is.null(Dates)) { + if (any(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] != 1)) { + stop("Parameter 'Dates' can have only 'sdate_dim' and 'ftime_dim' ", + "dimensions of length greater than 1.") + } + # drop dimensions of length 1 different from sdate_dim and ftime_dim + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + + # add ftime if needed + if (is.null(ftime_dim)) { + warning("A 'time' dimension of length 1 will be added to 'Dates'.") + dim(Dates) <- c(time = 1, dim(Dates)) + dim(data) <- c(time = 1, dim(data)) + dimnames <- names(dim(data)) + ftime_dim <- 'time' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(time = 1, dim(x)) + return(x) + }) + } + units_hours_since <- TRUE + } + # add sdate if needed + if (is.null(sdate_dim)) { + if (!single_file) { + dim(Dates) <- c(dim(Dates), sdate = 1) + dim(data) <- c(dim(data), sdate = 1) + dimnames <- names(dim(data)) + sdate_dim <- 'sdate' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(dim(x), sdate = 1) + return(x) + }) + } + if (!is.null(startdates)) { + if (length(startdates) != 1) { + warning("Parameter 'startdates' must be of length 1 if 'sdate_dim' is NULL.", + "They won't be used.") + startdates <- NULL + } + } } + units_hours_since <- TRUE } } # startdates + if (!is.null(Dates)) { + # check startdates + if (is.null(startdates)) { + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { + warning("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } + } else if (!single_file) { + warning("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + single_file <- TRUE + } + # startdates if (is.null(startdates)) { if (is.null(sdate_dim)) { startdates <- 'XXX' @@ -525,20 +566,21 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, startdates <- rep('XXX', dim(data)[sdate_dim]) } } else { - if (is.null(sdate_dim)) { - if (length(startdates) != 1) { - warning("Parameter 'startdates' has length more than 1. Only first ", - "value will be used.") - startdates <- startdates[[1]] + if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { + startdates <- format(startdates, "%Y%m%d") + } + if (!is.null(sdate_dim)) { + if (dim(data)[sdate_dim] != length(startdates)) { + warning(paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", sdate_dim,"', it will not be used.")) + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + startdates <- format(startdates, "%Y%m%d") } } } + # Datasets if (is.null(Datasets)) { - if (!single_file) { - warning("Parameter 'Datasets' is NULL. Files will be saved with a ", - "directory name of 'XXX'.") - } Datasets <- rep('XXX', n_datasets ) } if (inherits(Datasets, 'list')) { @@ -553,128 +595,75 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, "element 'Datasets' and only the firsts elements will be used.") Datasets <- Datasets[1:n_datasets] } - + + ## NetCDF dimensions definition + excluded_dims <- var_dim + if (!is.null(Dates)) { + excluded_dims <- c(excluded_dims, sdate_dim, ftime_dim) + } + if (!single_file) { + excluded_dims <- c(excluded_dims, dat_dim) + } + ## Unknown dimensions check - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) if (!all(dimnames %in% alldims)) { unknown_dims <- dimnames[which(!dimnames %in% alldims)] memb_dim <- c(memb_dim, unknown_dims) - alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, memb_dim, ftime_dim) - } - # Reorder - if (any(dimnames != alldims)) { - data <- Reorder(data, alldims) - dimnames <- names(dim(data)) - if (!is.null(attr(data, 'dimensions'))) { - attr(data, 'dimensions') <- dimnames - } - } - - ## NetCDF dimensions definition - defined_dims <- NULL - extra_info_dim <- NULL - if (is.null(Dates)) { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim))] - } else { - filedims <- dimnames[which(!dimnames %in% c(dat_dim, var_dim, sdate_dim, ftime_dim))] } + + filedims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) + filedims <- filedims[which(!filedims %in% excluded_dims)] + + # Delete unneded coords + coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL + out_coords <- NULL for (i_coord in filedims) { - dim_info <- list() # vals if (i_coord %in% names(coords)) { - if (is.numeric(coords[[i_coord]])) { - dim_info[['vals']] <- as.vector(coords[[i_coord]]) + if (length(coords[[i_coord]]) != dim(data)[i_coord]) { + warning(paste0("Coordinate '", i_coord, "' has different lenght as ", + "its dimension and it will not be used.")) + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } else if (is.numeric(coords[[i_coord]])) { + out_coords[[i_coord]] <- as.vector(coords[[i_coord]]) } else { - dim_info[['vals']] <- 1:dim(data)[i_coord] + out_coords[[i_coord]] <- 1:dim(data)[i_coord] } } else { - dim_info[['vals']] <- 1:dim(data)[i_coord] - } - # name - dim_info[['name']] <- i_coord - # len - dim_info[['len']] <- as.numeric(dim(data)[i_coord]) - # unlim - dim_info[['unlim']] <- FALSE - # create_dimvar - dim_info[['create_dimvar']] <- TRUE + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } + dim(out_coords[[i_coord]]) <- dim(data)[i_coord] + ## metadata if (i_coord %in% names(metadata)) { if ('variables' %in% names(attributes(metadata[[i_coord]]))) { # from Start: 'lon' or 'lat' - attrs <- attributes(metadata[[i_coord]])[['variables']][[i_coord]] - i_coord_info <- attrs[!sapply(attrs, inherits, 'list')] + attrs <- attributes(metadata[[i_coord]])[['variables']] + attrs[[i_coord]]$dim <- NULL + attr(out_coords[[i_coord]], 'variables') <- attrs } else if (inherits(metadata[[i_coord]], 'list')) { # from Start and Load: main var - i_coord_info <- metadata[[i_coord]] + attr(out_coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord } else if (!is.null(attributes(metadata[[i_coord]]))) { # from Load - i_coord_info <- attributes(metadata[[i_coord]]) - } else { - stop("Metadata is not correct.") - } - # len - if ('size' %in% names(i_coord_info)) { - if (i_coord_info[['size']] != dim(data)[i_coord]) { - dim_info[['original_len']] <- i_coord_info[['size']] - i_coord_info[['size']] <- NULL - } + attrs <- attributes(metadata[[i_coord]]) + # We remove because some attributes can't be saved + attrs <- NULL + attr(out_coords[[i_coord]], 'variables') <- list(attrs) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord } - # units - if (!('units' %in% names(i_coord_info))) { - dim_info[['units']] <- '' - } else { - dim_info[['units']] <- i_coord_info[['units']] - i_coord_info[['units']] <- NULL - } - # calendar - if (!('calendar' %in% names(i_coord_info))) { - dim_info[['calendar']] <- NA - } else { - dim_info[['calendar']] <- i_coord_info[['calendar']] - i_coord_info[['calendar']] <- NULL - } - # longname - if ('long_name' %in% names(i_coord_info)) { - dim_info[['longname']] <- i_coord_info[['long_name']] - i_coord_info[['long_name']] <- NULL - } else if ('longname' %in% names(i_coord_info)) { - dim_info[['longname']] <- i_coord_info[['longname']] - i_coord_info[['longname']] <- NULL - } else { - if (i_coord %in% .KnownLonNames()) { - dim_info[['longname']] <- 'longitude' - } else if (i_coord %in% .KnownLatNames()) { - dim_info[['longname']] <- 'latitude' - } - } - # extra information - if (!is.null(names(i_coord_info))) { - extra_info_dim[[i_coord]] <- i_coord_info - } - } else { - # units - dim_info[['units']] <- "adim" - # longname - dim_info[['longname']] <- i_coord - # calendar - dim_info[['calendar']] <- NA - } - new_dim <- list(ncdim_def(name = dim_info[['name']], units = dim_info[['units']], - vals = dim_info[['vals']], unlim = dim_info[['unlim']], - create_dimvar = dim_info[['create_dimvar']], - calendar = dim_info[['calendar']], - longname = dim_info[['longname']])) - names(new_dim) <- i_coord - defined_dims <- c(defined_dims, new_dim) + } } - - defined_vars <- list() + if (!single_file) { for (i in 1:n_datasets) { path <- file.path(destination, Datasets[i], varname) for (j in 1:n_vars) { - dir.create(path[j], recursive = TRUE) + if (!dir.exists(path[j])) { + dir.create(path[j], recursive = TRUE) + } startdates <- gsub("-", "", startdates) dim(startdates) <- c(length(startdates)) names(dim(startdates)) <- sdate_dim @@ -687,177 +676,172 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } else { data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') } + target <- names(dim(data_subset))[which(!names(dim(data_subset)) %in% c(sdate_dim, ftime_dim))] + target_dims_data <- c(target, ftime_dim) if (is.null(Dates)) { input_data <- list(data_subset, startdates) - target_dims <- list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL) + target_dims <- list(target_dims_data, NULL) + } else if (!is.null(time_bounds)) { + input_data <- list(data_subset, startdates, Dates, + time_bounds[[1]], time_bounds[[2]]) + target_dims = list(target_dims_data, NULL, + ftime_dim, ftime_dim, ftime_dim) } else { input_data <- list(data_subset, startdates, Dates) - target_dims = list(c(lon_dim, lat_dim, memb_dim, ftime_dim), NULL, ftime_dim) + target_dims = list(target_dims_data, NULL, ftime_dim) } Apply(data = input_data, target_dims = target_dims, - fun = .saveExp, + fun = .saveexp, destination = path[j], - defined_dims = defined_dims, + coords = out_coords, ftime_dim = ftime_dim, varname = varname[j], metadata_var = metadata[[varname[j]]], - extra_info_dim = extra_info_dim, - extra_string = extra_string) + extra_string = extra_string, + global_attrs = global_attrs) } } } else { - # Datasets definition - # From here - if (!is.null(dat_dim)) { - new_dim <- list(ncdim_def(name = dat_dim, units = "adim", - vals = 1 : dim(data)[dat_dim], - longname = 'Datasets', create_dimvar = TRUE)) - names(new_dim) <- dat_dim - defined_dims <- c(new_dim, defined_dims) - extra_info_dim[[dat_dim]] <- list(Datasets = paste(Datasets, collapse = ', ')) - } - first_sdate <- last_sdate <- NULL - save_hours_since <- TRUE + # time_bnds + if (!is.null(time_bounds)) { + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + } + # Dates + remove_metadata_dim <- TRUE if (!is.null(Dates)) { if (is.null(sdate_dim)) { sdates <- Dates[1] # ftime definition - leadtimes <- as.numeric(Dates - sdates)/3600 + leadtimes <- as.numeric(difftime(Dates, sdates, units = "hours")) } else { # sdate definition sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') - differ <- as.numeric((sdates - sdates[1])/3600) - new_dim <- list(ncdim_def(name = sdate_dim, units = paste('hours since', sdates[1]), - vals = differ, - longname = sdate_dim, create_dimvar = TRUE)) - names(new_dim) <- sdate_dim - defined_dims <- c(defined_dims, new_dim) - first_sdate <- sdates[1] - last_sdate <- sdates[length(sdates)] + differ <- as.numeric(difftime(sdates, sdates[1], units = "hours")) + dim(differ) <- dim(data)[sdate_dim] + differ <- list(differ) + names(differ) <- sdate_dim + out_coords <- c(differ, out_coords) + attrs <- list(units = paste('hours since', sdates[1]), + calendar = 'proleptic_gregorian', longname = sdate_dim) + attr(out_coords[[sdate_dim]], 'variables')[[sdate_dim]] <- attrs # ftime definition - Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) - differ_ftime <- apply(Dates, 2, function(x){as.numeric((x - x[1])/3600)}) + Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) + differ_ftime <- array(dim = dim(Dates)) + for (i in 1:length(sdates)) { + differ_ftime[, i] <- as.numeric(difftime(Dates[, i], Dates[1, i], + units = "hours")) + } dim(differ_ftime) <- dim(Dates) leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') - - if (all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { - if (!units_time_since) save_hours_since <- FALSE - } else { + if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { warning("Time steps are not equal for all start dates. Only ", "forecast time values for the first start date will be saved ", "correctly.") } } - - if (!save_hours_since) { - # NOTE: Are the units readable by Start? + if (all(!units_hours_since, is.null(time_bounds))) { if (all(diff(leadtimes/24) == 1)) { # daily values - dim_time <- list(ncdim_def(name = ftime_dim, units = 'days', - vals = round(leadtimes/24) + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + units <- 'days' + leadtimes_vals <- round(leadtimes/24) + 1 } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { # monthly values - dim_time <- list(ncdim_def(name = ftime_dim, units = 'months', - vals = round(leadtimes/730) + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + units <- 'months' + leadtimes_vals <- round(leadtimes/(30.437*24)) + 1 } else { # other frequency - dim_time <- list(ncdim_def(name = ftime_dim, units = 'hours', - vals = leadtimes + 1, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + units <- 'hours' + leadtimes_vals <- leadtimes + 1 } } else { - # Save in units 'hours since' - dim_time <- list(ncdim_def(name = ftime_dim, - units = paste('hours since', - paste(sdates, collapse = ', ')), - vals = leadtimes, - calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) + units <- paste('hours since', paste(sdates, collapse = ', ')) + leadtimes_vals <- leadtimes + } + + # Add time_bnds + if (!is.null(time_bounds)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + leadtimes_bnds <- as.numeric(difftime(time_bnds, sdates, units = "hours")) + dim(leadtimes_bnds) <- c(dim(Dates), bnds = 2) + } else { + # assuming they have sdate and ftime + time_bnds <- lapply(time_bounds, function(x) { + x <- Reorder(x, c(ftime_dim, sdate_dim)) + return(x) + }) + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + dim(time_bnds) <- c(dim(Dates), bnds = 2) + differ_bnds <- array(dim = c(dim(time_bnds))) + for (i in 1:length(sdates)) { + differ_bnds[, i, ] <- as.numeric(difftime(time_bnds[, i, ], Dates[1, i], + units = "hours")) + } + # NOTE (TODO): Add a warning when they are not equally spaced? + leadtimes_bnds <- Subset(differ_bnds, along = sdate_dim, 1, drop = 'selected') + } + # Add time_bnds + leadtimes_bnds <- Reorder(leadtimes_bnds, c('bnds', ftime_dim)) + leadtimes_bnds <- list(leadtimes_bnds) + names(leadtimes_bnds) <- 'time_bnds' + out_coords <- c(leadtimes_bnds, out_coords) + attrs <- list(units = paste('hours since', paste(sdates, collapse = ', ')), + calendar = 'proleptic_gregorian', + long_name = 'time bounds', unlim = FALSE) + attr(out_coords[['time_bnds']], 'variables')$time_bnds <- attrs + } + # Add ftime var + dim(leadtimes_vals) <- dim(data)[ftime_dim] + leadtimes_vals <- list(leadtimes_vals) + names(leadtimes_vals) <- ftime_dim + out_coords <- c(leadtimes_vals, out_coords) + attrs <- list(units = units, calendar = 'proleptic_gregorian', + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) + if (!is.null(time_bounds)) { + attrs$bounds = 'time_bnds' + } + attr(out_coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + for (j in 1:n_vars) { + remove_metadata_dim <- FALSE + metadata[[varname[j]]]$dim <- list(list(name = ftime_dim, unlim = TRUE)) + } + # Reorder ftime_dim to last + if (length(dim(data)) != which(names(dim(data)) == ftime_dim)) { + order <- c(names(dim(data))[which(!names(dim(data)) %in% c(ftime_dim))], ftime_dim) + data <- Reorder(data, order) } } - # var definition - defined_vars <- list() extra_info_var <- NULL for (j in 1:n_vars) { - var_info <- list() - i_var_info <- metadata[[varname[j]]][!sapply(metadata[[varname[j]]], inherits, 'list')] - ## Define metadata - # name - var_info[['name']] <- varname[j] - # units - if ('units' %in% names(i_var_info)) { - var_info[['units']] <- i_var_info[['units']] - i_var_info[['units']] <- NULL - } else { - var_info[['units']] <- '' - } - # dim - var_info[['dim']] <- defined_dims - # missval - if ('missval' %in% names(i_var_info)) { - var_info[['missval']] <- i_var_info[['missval']] - i_var_info[['missval']] <- NULL + varname_j <- varname[j] + metadata_j <- metadata[[varname_j]] + if (is.null(var_dim)) { + out_coords[[varname_j]] <- data } else { - var_info[['missval']] <- NULL + out_coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') } - # longname - if (any(c('longname', 'long_name') %in% names(i_var_info))) { - longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] - var_info[['longname']] <- i_var_info[[longname]] - i_var_info[[longname]] <- NULL - } else { - var_info[['longname']] <- varname[j] - } - # prec - if ('prec' %in% names(i_var_info)) { - var_info[['prec']] <- i_var_info[['prec']] - i_var_info[['prec']] <- NULL - } else { - prec <- typeof(data) - if (prec == 'character') { - var_info[['prec']] <- 'char' - } - if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { - var_info[['prec']] <- prec - } else { - var_info[['prec']] <- 'double' - } + if (!is.null(metadata_j)) { + if (remove_metadata_dim) metadata_j$dim <- NULL + attr(out_coords[[varname_j]], 'variables') <- list(metadata_j) + names(attributes(out_coords[[varname_j]])$variables) <- varname_j } - # extra information - if (!is.null(names(i_var_info))) { - extra_info_var[[varname[j]]] <- i_var_info + # Add global attributes + if (!is.null(global_attrs)) { + attributes(out_coords[[varname_j]])$global_attrs <- global_attrs } - new_var <- list(ncvar_def(name = var_info[['name']], - units = var_info[['units']], - dim = var_info[['dim']], - missval = var_info[['missval']], - longname = var_info[['longname']], - prec = var_info[['prec']])) - - names(new_var) <- varname[j] - defined_vars <- c(defined_vars, new_var) } if (is.null(extra_string)) { + first_sdate <- startdates[1] + last_sdate <- startdates[length(startdates)] gsub("-", "", first_sdate) file_name <- paste0(paste(c(varname, gsub("-", "", first_sdate), gsub("-", "", last_sdate)), - collapse = '_'), ".nc") + collapse = '_'), ".nc") } else { nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) if (nc == ".nc") { @@ -867,134 +851,65 @@ SaveExp <- function(data, destination = "./", Dates = NULL, coords = NULL, } } full_filename <- file.path(destination, file_name) - file_nc <- nc_create(full_filename, defined_vars) - if (is.null(var_dim)) { - ncvar_put(file_nc, varname, vals = data) - } else { - for (j in 1:n_vars) { - ncvar_put(file_nc, defined_vars[[j]]$name, - vals = Subset(data, var_dim, j, drop = 'selected')) - } - } - # Additional dimension attributes - for (dim in names(defined_dims)) { - if (dim %in% names(extra_info_dim)) { - for (info_dim in names(extra_info_dim[[dim]])) { - add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') - ncatt_put(file_nc, dim, info_dim, add_info_dim) - } - } - } - # Additional dimension attributes - for (var in names(defined_vars)) { - if (var %in% names(extra_info_var)) { - for (info_var in names(extra_info_var[[var]])) { - add_info_var <- paste0(extra_info_var[[var]][[info_var]], collapse = ', ') - ncatt_put(file_nc, var, info_var, add_info_var) - } - } - } - nc_close(file_nc) + ArrayToNc(out_coords, full_filename) } } -.saveExp <- function(data, startdates = NULL, dates = NULL, destination = "./", - defined_dims, ftime_dim = 'time', varname = 'var', - metadata_var = NULL, extra_info_dim = NULL, - extra_string = NULL) { - # ftime_dim +.saveexp <- function(data, coords, destination = "./", + startdates = NULL, dates = NULL, + time_bnds1 = NULL, time_bnds2 = NULL, + ftime_dim = 'time', varname = 'var', + metadata_var = NULL, extra_string = NULL, + global_attrs = NULL) { + remove_metadata_dim <- TRUE if (!is.null(dates)) { - differ <- as.numeric((dates - dates[1])/3600) - dim_time <- list(ncdim_def(name = ftime_dim, units = paste('hours since', dates[1]), - vals = differ, calendar = 'proleptic_gregorian', - longname = ftime_dim, unlim = TRUE)) - names(dim_time) <- ftime_dim - defined_dims <- c(defined_dims, dim_time) - } - - ## Define var metadata - var_info <- NULL - extra_info_var <- NULL - i_var_info <- metadata_var[!sapply(metadata_var, inherits, 'list')] - - # name - var_info[['name']] <- varname - # units - if ('units' %in% names(i_var_info)) { - var_info[['units']] <- i_var_info[['units']] - i_var_info[['units']] <- NULL - } else { - var_info[['units']] <- '' - } - # dim - var_info[['dim']] <- defined_dims - # missval - if ('missval' %in% names(i_var_info)) { - var_info[['missval']] <- i_var_info[['missval']] - i_var_info[['missval']] <- NULL - } else { - var_info[['missval']] <- NULL - } - # longname - if (any(c('longname', 'long_name') %in% names(i_var_info))) { - longname <- names(i_var_info)[which(names(i_var_info) %in% c('longname', 'long_name'))] - var_info[['longname']] <- i_var_info[[longname]] - i_var_info[[longname]] <- NULL - } else { - var_info[['longname']] <- varname - } - # prec - if ('prec' %in% names(i_var_info)) { - var_info[['prec']] <- i_var_info[['prec']] - i_var_info[['prec']] <- NULL - } else { - prec <- typeof(data) - if (prec == 'character') { - var_info[['prec']] <- 'char' - } - if (any(prec %in% c('short', 'float', 'double', 'integer', 'char', 'byte'))) { - var_info[['prec']] <- prec - } else { - var_info[['prec']] <- 'double' - } - } - # extra information - if (!is.null(names(i_var_info))) { - extra_info_var <- i_var_info - } - - datanc <- ncvar_def(name = var_info[['name']], - units = var_info[['units']], - dim = var_info[['dim']], - missval = var_info[['missval']], - longname = var_info[['longname']], - prec = var_info[['prec']]) - + if (!any(is.null(time_bnds1), is.null(time_bnds2))) { + time_bnds <- c(time_bnds1, time_bnds2) + time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) + dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) + time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) + time_bnds <- list(time_bnds) + names(time_bnds) <- 'time_bnds' + coords <- c(time_bnds, coords) + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = 'time bounds') + attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs + } + # Add ftime_dim + differ <- as.numeric(difftime(dates, dates[1], units = "hours")) + dim(differ) <- dim(data)[ftime_dim] + differ <- list(differ) + names(differ) <- ftime_dim + coords <- c(differ, coords) + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) + if (!is.null(time_bnds1)) { + attrs$bounds = 'time_bnds' + } + attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + metadata_var$dim <- list(list(name = ftime_dim, unlim = TRUE)) + remove_metadata_dim <- FALSE + } + # Add data + coords[[varname]] <- data + if (!is.null(metadata_var)) { + if (remove_metadata_dim) metadata_var$dim <- NULL + attr(coords[[varname]], 'variables') <- list(metadata_var) + names(attributes(coords[[varname]])$variables) <- varname + } + # Add global attributes + if (!is.null(global_attrs)) { + attributes(coords[[varname]])$global_attrs <- global_attrs + } + if (is.null(extra_string)) { file_name <- paste0(varname, "_", startdates, ".nc") } else { file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") } full_filename <- file.path(destination, file_name) - file_nc <- nc_create(full_filename, datanc) - ncvar_put(file_nc, datanc, data) - - # Additional attributes - for (dim in names(defined_dims)) { - if (dim %in% names(extra_info_dim)) { - for (info_dim in names(extra_info_dim[[dim]])) { - add_info_dim <- paste0(extra_info_dim[[dim]][[info_dim]], collapse = ', ') - ncatt_put(file_nc, dim, info_dim, add_info_dim) - } - } - } - # Additional dimension attributes - if (!is.null(extra_info_var)) { - for (info_var in names(extra_info_var)) { - add_info_var <- paste0(extra_info_var[[info_var]], collapse = ', ') - ncatt_put(file_nc, varname, info_var, add_info_var) - } - } - - nc_close(file_nc) -} + ArrayToNc(coords, full_filename) +} \ No newline at end of file diff --git a/modules/Scorecards/R/tmp/SCPlotScorecard.R b/modules/Scorecards/R/tmp/SCPlotScorecard.R index 8c2d9eba..a6575ea5 100644 --- a/modules/Scorecards/R/tmp/SCPlotScorecard.R +++ b/modules/Scorecards/R/tmp/SCPlotScorecard.R @@ -36,6 +36,7 @@ #'@param plot.legend A logical value to determine if the legend is plotted. #'@param legend.width A numeric value to define the width of the legend bars. #'@param legend.height A numeric value to define the height of the legend bars. +#'@param label.scale A numeric value to define the size of the legend labels. #'@param palette A vector of character strings or a list of vectors of #' character strings containing the colors to use in the legends. If a vector #' is given as input, then these colors will be used for each legend.dim. A @@ -52,9 +53,18 @@ #' breaks value. This parameter will also plot a inferior triangle in the #' legend bar. The parameter can be set to NULL if there are no superior values. #' If a character string is given this color will be applied to all legend.dims. +#'@param legend.white.space A numeric value defining the initial starting +#' position of the legend bars, the white space infront of the legend is +#' calculated from the left most point of the table as a distance in cm. #'@param round.decimal A numeric indicating to which decimal point the data -#' is to be displayed in the scorecard table. +#' is to be displayed in the scorecard table. Default is 2. #'@param font.size A numeric indicating the font size on the scorecard table. +#'@param col1.width A numeric value defining the width of the first table column +#' in cm. +#'@param col2.width A numeric value defining the width of the second table +#' column in cm. +#'@param columns.width A numeric value defining the width all columns within the +#' table in cm (excluding the first and second columns containing the titles). #'@param fileout A path of the location to save the scorecard plots. #' #'@return An image file containing the scorecard. @@ -75,7 +85,7 @@ #'@import s2dv #'@import ClimProjDiags #'@export -SCPlotScorecard <- function(data, sign, +SCPlotScorecard <- function(data, sign = NULL, row.dim = 'region', subrow.dim = 'time', col.dim = 'metric', subcol.dim = 'sdate', legend.dim = 'metric', row.names = NULL, @@ -90,12 +100,21 @@ SCPlotScorecard <- function(data, sign, round.decimal = 2, font.size = 1.1, legend.white.space = NULL, col1.width = NULL, col2.width = NULL, + columns.width = 1.2, fileout = './scorecard.png') { # Input parameter checks ## Check data if (!is.array(data)) { stop("Parameter 'data' must be a numeric array.") } + ## Check sign + if (is.null(sign)){ + sign <- array(F, dim = dim(data)) + } else { + if (!is.array(sign)) { + stop("Parameter 'sign' must be a boolean array or NULL.") + } + } ## Check row.dim if (!is.character(row.dim)) { stop("Parameter 'row.dim' must be a character string.") @@ -363,14 +382,13 @@ SCPlotScorecard <- function(data, sign, metric.text.color <- table_colors$metric.text.color # metric.text.bold <- table_colors$metric.text.bold - # Remove temporary tables + # Remove temporary table rm(table_temp) - rm(table_sign_temp) - - ## Format values to underline in table - metric.underline <- MergeDims(sign, c('sdate', 'metric') , rename_dim = 'col', na.rm =F) - metric.underline <- MergeDims(metric.underline, c('time', 'region') , rename_dim = 'row', na.rm =F) + ## Format values to underline in table + metric.underline <- MergeDims(sign, c(subcol.dim, col.dim) , rename_dim = 'col', na.rm =F) + metric.underline <- MergeDims(metric.underline, c(subrow.dim, row.dim) , rename_dim = 'row', na.rm =F) + options(stringsAsFactors = FALSE) title <- data.frame(c1 = table.title, c2 = n.columns) subtitle <- data.frame(c1 = table.subtitle, c2 = n.columns) @@ -423,7 +441,7 @@ SCPlotScorecard <- function(data, sign, table.html <- column_spec(table.html.part[[n.last.list]], 1, bold = TRUE, width_min = paste0(col1.width, 'cm')) %>% column_spec(2, bold = TRUE, width_min = paste0(col2.width, 'cm')) %>% - column_spec(3:n.columns, width_min = "1.2cm") %>% + column_spec(3:n.columns, width_min = paste0(columns.width, 'cm')) %>% column_spec(c(1, 2, column.borders), border_right = "2px solid black") %>% column_spec(1, border_left = "2px solid black") %>% column_spec(n.columns, border_right = "2px solid black") %>% diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 89f1df44..f05275cf 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -2,7 +2,7 @@ #' #'@description Scorecards function to create scorecard tables for multiple systems #' and references (types 9 to 12). -#'@param input_data is an array of spatially aggregated metrics containing the +#'@param data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. #'@param system a vector of character strings defining the systems following the #' archive.yml format from verification suite @@ -21,6 +21,28 @@ #' include in the scorecard title #'@param fileout.label a character string containing additional information to #' include in the output png file when saving the scorecard. +#'@param plot.legend A logical value to determine if the legend is plotted. +#'@param legend.breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each legend.dim. A list of vectors can be +#' given as input if the legend.dims require different breaks. This parameter +#' is required even if the legend is not plotted, to define the colors in the +#' scorecard table. +#'@param legend.white.space A numeric value defining the initial starting +#' position of the legend bars, the white space infront of the legend is +#' calculated from the left most point of the table as a distance in cm. +#'@param legend.width A numeric value to define the width of the legend bars. +#'@param legend.height A numeric value to define the height of the legend bars. +#'@param label.scale A numeric value to define the size of the legend labels. +#'@param col1.width A numeric value defining the width of the first table column +#' in cm. +#'@param col2.width A numeric value defining the width of the second table +#' column in cm. +#'@param columns.width A numeric value defining the width all columns within the +#' table in cm (excluding the first and second columns containing the titles). +#'@param font.size A numeric indicating the font size on the scorecard table. +#'@param round.decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. Default is 2. #'@param output.path a path of the location to save the scorecard plots. #' #'@return @@ -44,19 +66,16 @@ #' ) -ScorecardsMulti <- function(data, - system, - reference, - var, - start.year, - end.year, - start.months, - forecast.months, - region.names, - metrics, - table.label, - fileout.label, - output.path){ +ScorecardsMulti <- function(data, sign, system, reference, var, start.year, + end.year, start.months, forecast.months, + region.names, metrics, plot.legend = TRUE, + legend.breaks = NULL, legend.white.space = NULL, + legend.width = 555, legend.height = 50, + table.label = NULL, fileout.label = NULL, + label.scale = 1.4, font.size = 1.1, + col1.width = NULL, col2.width = NULL, + columns.width = NULL, + round.decimal = 2, output.path){ ## Checks to apply: # first dimension in aggregated_metrics is system and second dimension is reference @@ -70,29 +89,40 @@ ScorecardsMulti <- function(data, fileout.label <- "" } - ## Make sure input_data is in correct order for using in functions: + ## Make sure data is in correct order for using in functions: data_order <- c('system','reference','metric','time','sdate','region') data <- Reorder(data, data_order) - - ## Identify metrics loaded - metrics_loaded <- attributes(data)$metrics - - ## Select only the metrics to visualize from data - input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) - attributes(input_data)$metrics <- metrics - + sign <- Reorder(sign, data_order) + ## Transform data for scorecards by forecast month (types 11 & 12) - transformed_data <- SCTransform(data = input_data, + transformed_data <- SCTransform(data = data, sdate_dim = 'sdate', ftime_dim = 'time') + transformed_sign <- SCTransform(data = sign, + sdate_dim = 'sdate', + ftime_dim = 'time') + ## Load configuration files - sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive - var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + if (is.null(recipe$Run$filesystem)) { + filesystem <- 'esarchive' + } else { + filesystem <- recipe$Run$filesystem + } + sys_dict <- read_yaml("conf/archive.yml")[[filesystem]] + var_dict <- read_yaml("conf/variable-dictionary.yml")$vars ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units + + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + if (is.null(var.units)) { + var.units <- var_dict[[var]]$units + } system.name <- NULL reference.name <- NULL @@ -147,18 +177,6 @@ ScorecardsMulti <- function(data, ## Legend upper limit color legend.col.sup <- .legend_col_sup(metrics, colorsup) legend.col.sup <- legend.col.sup[metrics] - - ## Legend inputs - plot.legend = TRUE - label.scale = 1.4 - legend.width = 555 - legend.height = 50 - - ## Data display inputs - round.decimal = 2 - font.size = 1.1 - - legend.white.space <- col1.width <- col2.width <- NULL ## Use default values of function ## Loop over region for(reg in 1:length(region.names)){ @@ -189,12 +207,17 @@ ScorecardsMulti <- function(data, start.year = start.year, end.year = end.year, scorecard.type = 9, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ - data_sc_9 <- Subset(input_data, c('reference','region'), list(1, reg), drop = 'selected') + data_sc_9 <- Subset(data, c('reference','region'), list(1, reg), drop = 'selected') + sign_sc_9 <- Subset(sign, c('reference','region'), list(1, reg), drop = 'selected') } else if(model == 'reference'){ - data_sc_9 <- Subset(input_data, c('system','region'), list(1, reg), drop = 'selected') + data_sc_9 <- Subset(data, c('system','region'), list(1, reg), drop = 'selected') + sign_sc_9 <- Subset(sign, c('system','region'), list(1, reg), drop = 'selected') } + SCPlotScorecard(data = data_sc_9, + sign = sign_sc_9, row.dim = model, subrow.dim = 'time', col.dim = 'metric', @@ -222,6 +245,7 @@ ScorecardsMulti <- function(data, legend.white.space = legend.white.space, col1.width = 4, col2.width = col2.width, + columns.width = columns.width, fileout = fileout) @@ -231,13 +255,18 @@ ScorecardsMulti <- function(data, start.year = start.year, end.year = end.year, scorecard.type = 10, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ - data_sc_10 <- Subset(Reorder(input_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + data_sc_10 <- Subset(Reorder(data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + sign_sc_10 <- Subset(Reorder(sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') } else if(model == 'reference'){ - data_sc_10 <- Subset(Reorder(input_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + data_sc_10 <- Subset(Reorder(data, new_order), c('system','region'), list(1, reg), drop = 'selected') + sign_sc_10 <- Subset(Reorder(sign, new_order), c('system','region'), list(1, reg), drop = 'selected') } SCPlotScorecard(data = data_sc_10, + sign = sign_sc_10, row.dim = 'time', subrow.dim = model, col.dim = 'metric', @@ -265,6 +294,7 @@ ScorecardsMulti <- function(data, legend.white.space = legend.white.space, col1.width = col1.width, col2.width = 4, + columns.width = columns.width, fileout = fileout) @@ -274,12 +304,17 @@ ScorecardsMulti <- function(data, start.year = start.year, end.year = end.year, scorecard.type = 11, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') + sign_sc_11 <- Subset(transformed_sign, c('reference','region'), list(1, reg), drop = 'selected') } else if(model == 'reference'){ data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + sign_sc_11 <- Subset(transformed_sign, c('system','region'), list(1, reg), drop = 'selected') } + SCPlotScorecard(data = data_sc_11, + sign = sign_sc_11, row.dim = model, subrow.dim = 'time', col.dim = 'metric', @@ -307,6 +342,7 @@ ScorecardsMulti <- function(data, legend.white.space = legend.white.space, col1.width = 4, col2.width = col2.width, + columns.width = columns.width, fileout = fileout) @@ -316,13 +352,19 @@ ScorecardsMulti <- function(data, start.year = start.year, end.year = end.year, scorecard.type = 12, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') } else if(model == 'reference'){ data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('system','region'), list(1, reg), drop = 'selected') } + SCPlotScorecard(data = data_sc_12, + sign = sign_sc_12, row.dim = 'time', subrow.dim = model, col.dim = 'metric', @@ -350,6 +392,7 @@ ScorecardsMulti <- function(data, legend.white.space = legend.white.space, col1.width = col1.width, col2.width = 4, + columns.width = columns.width, fileout = fileout) } ## close loop on region diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 190ae3d5..3eb06537 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -23,7 +23,29 @@ #' include in the scorecard title #'@param fileout.label a character string containing additional information to #' include in the output png file when saving the scorecard. -#'@param output.path a path of the location to save the scorecard plots +#'@param plot.legend A logical value to determine if the legend is plotted. +#'@param legend.breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each legend.dim. A list of vectors can be +#' given as input if the legend.dims require different breaks. This parameter +#' is required even if the legend is not plotted, to define the colors in the +#' scorecard table. +#'@param legend.white.space A numeric value defining the initial starting +#' position of the legend bars, the white space infront of the legend is +#' calculated from the left most point of the table as a distance in cm. +#'@param legend.width A numeric value to define the width of the legend bars. +#'@param legend.height A numeric value to define the height of the legend bars. +#'@param label.scale A numeric value to define the size of the legend labels. +#'@param col1.width A numeric value defining the width of the first table column +#' in cm. +#'@param col2.width A numeric value defining the width of the second table +#' column in cm. +#'@param columns.width A numeric value defining the width all columns within the +#' table in cm (excluding the first and second columns containing the titles). +#'@param font.size A numeric indicating the font size on the scorecard table. +#'@param round.decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. Default is 2. +#'@param output.path A path of the location to save the scorecard plots #' #'@return #' This function returns 4 scorecards images, saved in the directory output.path @@ -43,15 +65,18 @@ #' output.path = '/esarchive/scratch/nmilders/scorecards_images/test' #' ) #'@export -ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end.year, - start.months, forecast.months, region.names, - metrics, legend.breaks = NULL, +ScorecardsSingle <- function(data, sign, system, reference, var, start.year, + end.year, start.months, forecast.months, + region.names, metrics, plot.legend = TRUE, + legend.breaks = NULL, legend.white.space = NULL, + legend.width = 550, legend.height = 50, table.label = NULL, fileout.label = NULL, - legend.white.space = NULL, - col1.width = NULL, col2.width = NULL, - output.path){ - - ## Checks to apply: + label.scale = 1.4, font.size = 1.1, + col1.width = NULL, col2.width = NULL, + columns.width = 1.2, + round.decimal = 2, output.path){ + + ## Checks to apply: # First dimension in aggregated_metrics is system and second dimension is reference # To allow 1 region - if region = 1 --> only scorecards 1 & 3 need to be plotted # If any dimension of input dat is 1, make sure dimension is still present in array @@ -84,32 +109,58 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end data <- Reorder(data, data_order) sign <- Reorder(sign, data_order) - ## Identify metrics loaded - metrics_loaded <- attributes(data)$metrics + ####### PROBLEM HERE ?? ###### + # No attributes in score aggregated dataset + + # ## Identify metrics loaded + # metrics_loaded <- attributes(data)$metrics + # + # ## Select only the metrics to visualize from data + # input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + # attributes(input_data)$metrics <- metrics + # + # input_sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + # attributes(input_sign)$metrics <- metrics - ## Select only the metrics to visualize from data - input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) - attributes(input_data)$metrics <- metrics + input_data <- data #temporary + input_sign <- sign - input_sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) - attributes(input_sign)$metrics <- metrics + ############################ ## Transform data for scorecards by forecast month (types 3 & 4) - transformed_data <- SCTransform(data = input_data, + transformed_data <- SCTransform(data = data, sdate_dim = 'sdate', ftime_dim = 'time') - transformed_sign <- SCTransform(data = input_sign, + transformed_sign <- SCTransform(data = sign, sdate_dim = 'sdate', ftime_dim = 'time') ## Load configuration files - sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive - var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + if (is.null(recipe$Run$filesystem)) { + filesystem <- 'esarchive' + } else { + filesystem <- recipe$Run$filesystem + } + sys_dict <- read_yaml("conf/archive.yml")[[filesystem]] + var_dict <- read_yaml("conf/variable-dictionary.yml")$vars ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units + + # for (i in 1:length(recipe$Analysis$Variables)) { + # if (recipe$Analysis$Variables[[i]]$name == var) { ## depends on way recipe is written + # var.units <- recipe$Analysis$Variables[[i]]$units + # } + # } + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables$name == var) { ## TEMPORARY + var.units <- recipe$Analysis$Variables$units + } + } + if (is.null(var.units)) { + var.units <- var_dict[[var]]$units + } ## Get metric long names metric.names.list <- .met_names(metrics, var.units) @@ -136,16 +187,6 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end legend.col.sup <- .legend_col_sup(metrics, colorsup) legend.col.sup <- legend.col.sup[metrics] - ## Legend inputs - plot.legend = TRUE - label.scale = 1.4 - legend.width = 555 - legend.height = 50 - - ## Data display inputs - round.decimal = 2 - font.size = 1.1 - ## Loop over system and reference for each scorecard plot for (sys in 1:dim(input_data)['system']) { for (ref in 1:dim(input_data)['reference']) { @@ -223,6 +264,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end legend.white.space = legend.white.space, col1.width = col1.width, col2.width = col2.width, + columns.width = columns.width, fileout = fileout) @@ -269,6 +311,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end legend.white.space = legend.white.space, col1.width = col1.width, col2.width = col2.width, + columns.width = columns.width, fileout = fileout) } ## close if @@ -311,6 +354,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end legend.white.space = legend.white.space, col1.width = col1.width, col2.width = col2.width, + columns.width = columns.width, fileout = fileout) @@ -357,6 +401,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, end legend.white.space = legend.white.space, col1.width = col1.width, col2.width = col2.width, + columns.width = columns.width, fileout = fileout) } ## close if diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index bad1497c..af797025 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -17,8 +17,8 @@ source('modules/Scorecards/R/tmp/SCPlotScorecard.R') ## Define function Scorecards <- function(recipe) { - ## Parameters for loading data - input.path <- "/esarchive/scratch/nmilders/scorecards_data/test/recipe_scorecards_data_loading_nadia_20231219150442/outputs/" #temp + ## Parameters for loading data files + input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/to_system/" #temp skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") @@ -42,12 +42,28 @@ Scorecards <- function(recipe) { # split = ", | |,")[[1]]) # } + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + + if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } if(is.null(recipe$Analysis$remove_NAs)){ na.rm <- FALSE @@ -57,19 +73,33 @@ Scorecards <- function(recipe) { ncores <- 1 # recipe$Analysis$ncores - ## Parameters for scorecard table - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + ## Paramters for scorecard layout table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - - ## Load data files - start.months <- sprintf("%02d", start.months) - period <- paste0(start.year, "-", end.year) + columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width + legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width + legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height + label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale + round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal + font.size <- recipe$Analysis$Workflow$Scorecards$font_size + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ + plot.legend <- TRUE + } else { + plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend + } + + ## Define if difference scorecard is to be plotted + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ + calculate.diff <- FALSE + } else { + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + } + ####### SKILL AGGREGATION ####### if(metric.aggregation == 'skill'){ @@ -79,7 +109,7 @@ Scorecards <- function(recipe) { var = var, start.year = start.year, end.year = end.year, - metrics = metrics.load, + metrics = metrics.visualize, ## metrics.load start.months = start.months, forecast.months = forecast.months, inf.to.na = inf.to.na, @@ -130,11 +160,8 @@ Scorecards <- function(recipe) { if(metric.aggregation == 'score'){ ## Comments (Nadia): - ## 1) syear parameter as FALSE instead of NULL for metrics without syear dimension? - ## 2) output files names of statistics not correct, too many _ in name - ## 3) how to load multiple systems? - Loop over sys and ref for metric.aggregation == 'score' - ## Need to test with multiple sdates - + ## syear parameter as FALSE instead of NULL for metrics without syear dimension? + lon_dim <- 'longitude' lat_dim <- 'latitude' time_dim <- 'syear' @@ -226,7 +253,7 @@ Scorecards <- function(recipe) { ## Calculate significance sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, time_dim = time_dim, test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, + alpha = alpha, pval = FALSE, sign = TRUE, ncores = NULL)$sign ## Save metric result in arrays @@ -298,7 +325,7 @@ Scorecards <- function(recipe) { ## Calculate significance sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, time_dim = time_dim, test.type = 'two.sided', - alpha = 0.05, pval = FALSE, sign = TRUE, + alpha = alpha, pval = FALSE, sign = TRUE, ncores = NULL)$sign ## Save metric result in arrays @@ -401,8 +428,6 @@ Scorecards <- function(recipe) { enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') ## Calculate significance of corr - alpha <- 0.05 # Apply 95% confidence - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) @@ -506,12 +531,14 @@ Scorecards <- function(recipe) { } ## close if on score + + ####### PLOT SCORECARDS ########## ## Create simple scorecard tables ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation scorecard_single <- ScorecardsSingle(data = aggregated_metrics, - signif = scorecard_sign, + sign = scorecard_sign, system = system, reference = reference, var = var, @@ -523,9 +550,17 @@ Scorecards <- function(recipe) { metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, col1.width = col1.width, col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, output.path = output.path) ## Create multi system/reference scorecard tables diff --git a/modules/Skill/R/tmp/CRPS.R b/modules/Skill/R/tmp/CRPS.R index c08375c4..9f91be34 100644 --- a/modules/Skill/R/tmp/CRPS.R +++ b/modules/Skill/R/tmp/CRPS.R @@ -126,7 +126,7 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU Fair = Fair, ncores = ncores)$output1 - if (return_mean == TRUE) { + if (isTRUE(return_mean)) { crps <- MeanDims(crps, time_dim, na.rm = FALSE) } else { crps <- crps diff --git a/modules/Skill/R/tmp/RPS.R b/modules/Skill/R/tmp/RPS.R index 54ec8440..e15a1754 100644 --- a/modules/Skill/R/tmp/RPS.R +++ b/modules/Skill/R/tmp/RPS.R @@ -252,7 +252,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL weights = weights, cross.val = cross.val, na.rm = na.rm, ncores = ncores)$output1 - if (return_mean == TRUE) { + if (isTRUE(return_mean)) { rps <- MeanDims(rps, time_dim, na.rm = TRUE) } else { rps <- rps diff --git a/recipes/recipe_scorecards.yml b/recipes/recipe_scorecards.yml index 434426d0..a75ad1d2 100644 --- a/recipes/recipe_scorecards.yml +++ b/recipes/recipe_scorecards.yml @@ -56,6 +56,9 @@ Analysis: metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split cross_validation: yes save: 'all' + Statistics: + metric: cov std n_eff + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] # list, don't split save: 'none' @@ -64,19 +67,30 @@ Analysis: Indicators: index: no # ? Scorecards: - execute: yes # yes/no - regions: + execute: yes # Mandatory, yes/no + regions: # Mandatory, define regions over which to aggregate data Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} - Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} - start_months: NULL - metric: mean_bias enscorr rpss crpss enssprerr - metric_aggregation: 'score' - table_label: NULL - fileout_label: NULL - col1_width: NULL - col2_width: NULL - calculate_diff: FALSE + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory, define metrics to visualize in scorecard. + metric_aggregation: 'score' # Mandatory, defines the aggregation level of the metrics. + signif_alpha: 0.05 # Optional, to set alpha for signifiance calculation, default is 0.05. + table_label: NULL # Optional, to add extra information to the table title. + fileout_label: NULL # Optional, to add extra information to the output filename. + col1_width: NULL # Optional, to set the width (cm) of the first table column, default is calculated depending on row names. + col2_width: NULL # Optional, to set the width (cm) of the first table column, default is calculated depending on subrow names. + columns_width: NULL # Optional, to set the width (cm) of all other columns within the table, defualt is 1.2. + plot_legend: TRUE ## Optional, to define is the legend is included in the scorecards image, default is TRUE. + legend_breaks: NULL # Optional, default used legend breaks from modules/Scorecards/R/tmp/Utils.R. + legend_white_space: NULL # Optional, default is automatically calculted depend on column sizes. + legend_width: NULL # Optional, to set the width of the lengend bars, default is 550. + legend_height: NULL # Optional, to set the height of the legend bars, default is 50. + label_scale: NULL # Optional, to set the scale of the legend bar lables, default is 1.4. + round_decimal: NULL # Optional, to round the data shown in the scorecard, default is 2 (decimals). + inf_to_na: TRUE # Optional, to set infinite values to NA, default is FALSE. + font_size: NULL # Optional, to set the font size of the scorecard table values, default is 1.2. + calculate_diff: FALSE # Optional, to calculate difference between two systems or two references, default is FALSE. ncores: 7 remove_NAs: no # bool, don't split Output_format: Scorecards # string, don't split diff --git a/tools/check_recipe.R b/tools/check_recipe.R index f4d711f8..c398c345 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -567,43 +567,43 @@ check_recipe <- function(recipe) { # Scorecards if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { if(recipe$Analysis$Workflow$Scorecards$execute == TRUE){ - if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { - error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Scorecards'.") - error_status <- T - } else { - sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, - ", | |,")[[1]] - if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { - if ('rpss' %in% tolower(sc_metrics)) { - if (!('rps_clim_syear' %in% requested_metrics)) { - requested_metrics <- c(requested_metrics, 'rps_clim_syear') - } - if (!('rps_syear' %in% requested_metrics)) { - requested_metrics <- c(requested_metrics, 'rps_syear') + if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Scorecards'.") + error_status <- T + } else { + sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, + ", | |,")[[1]] + if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { + if ('rpss' %in% tolower(sc_metrics)) { + if (!('rps_clim_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rps_clim_syear') + } + if (!('rps_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rps_syear') + } } - } - if ('crpss' %in% tolower(sc_metrics)) { - if (!('crps_clim_syear' %in% requested_metrics)) { - requested_metrics <- c(requested_metrics, 'crps_clim_syear') + if ('crpss' %in% tolower(sc_metrics)) { + if (!('crps_clim_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'crps_clim_syear') + } + if (!('crps_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'crps_syear') + } } - if (!('crps_syear' %in% requested_metrics)) { - requested_metrics <- c(requested_metrics, 'crps_syear') + if ('enscorr' %in% tolower(sc_metrics)) { + recipe$Analysis$Workflow$Statistics <- c('std', 'cov', 'n_eff') } + recipe$Analysis$Workflow$Skill$metric <- requested_metrics } - if ('enscorr' %in% tolower(sc_metrics)) { - recipe$Analysis$Workflow$Statistics <- c('standard_deviation', 'covariance') + if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { + error(recipe$Run$logger, + paste0("All of the metrics requested under 'Scorecards' must ", + "be requested in the 'Skill' section.")) + error_status <- T } - recipe$Analysis$Workflow$Skill$metric <- requested_metrics - } - if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { - error(recipe$Run$logger, - paste0("All of the metrics requested under 'Scorecards' must ", - "be requested in the 'Skill' section.")) - error_status <- T } - } - } + } } # --------------------------------------------------------------------- # RUN CHECKS -- GitLab From e64bfede72a8028524d09636b206dd7be3ffc084 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 12 Jan 2024 12:07:15 +0100 Subject: [PATCH 18/43] scorecards significance development --- modules/Scorecards/R/tmp/ScorecardsMulti.R | 27 +++++++- modules/Scorecards/R/tmp/ScorecardsSingle.R | 73 +++++++++---------- modules/Scorecards/R/tmp/WeightedMetrics.R | 29 -------- modules/Scorecards/Scorecards.R | 77 ++++++++++++--------- 4 files changed, 100 insertions(+), 106 deletions(-) diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index f05275cf..dd9ac257 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -93,6 +93,17 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, data_order <- c('system','reference','metric','time','sdate','region') data <- Reorder(data, data_order) sign <- Reorder(sign, data_order) + + ## Identify metrics loaded + metrics_loaded <- attributes(data)$metrics + + ## Select only the metrics to visualize from data + data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(data)$metrics <- metrics + + sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(sign)$metrics <- metrics + ## Transform data for scorecards by forecast month (types 11 & 12) transformed_data <- SCTransform(data = data, @@ -115,9 +126,15 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables[[i]]$name == var) { - var.units <- recipe$Analysis$Variables[[i]]$units + if ('name' %in% names(recipe$Analysis$Variables)){ + if (recipe$Analysis$Variables$name == var) { + var.units <- recipe$Analysis$Variables$units + } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } } } if (is.null(var.units)) { @@ -136,6 +153,10 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, reference.name <- c(reference.name, reference.name1) } + if("Multimodel" %in% system ){ + system.name <- c(system.name, "Multimodel") + } + ## Get metric long names metric.names.list <- .met_names(metrics, var.units) diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 3eb06537..1e8f93f6 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -104,29 +104,21 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, fileout.label <- "" } - ## Make sure input_data is in correct order for using in functions: + ## Make sure data is in correct order for using in functions: data_order <- c('system', 'reference', 'metric', 'time', 'sdate', 'region') data <- Reorder(data, data_order) sign <- Reorder(sign, data_order) - - ####### PROBLEM HERE ?? ###### - # No attributes in score aggregated dataset - - # ## Identify metrics loaded - # metrics_loaded <- attributes(data)$metrics - # - # ## Select only the metrics to visualize from data - # input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) - # attributes(input_data)$metrics <- metrics - # - # input_sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) - # attributes(input_sign)$metrics <- metrics - - input_data <- data #temporary - input_sign <- sign - - ############################ - + + ## Identify metrics loaded + metrics_loaded <- attributes(data)$metrics + + ## Select only the metrics to visualize from data + data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(data)$metrics <- metrics + + sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(sign)$metrics <- metrics + ## Transform data for scorecards by forecast month (types 3 & 4) transformed_data <- SCTransform(data = data, sdate_dim = 'sdate', @@ -148,16 +140,17 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - # for (i in 1:length(recipe$Analysis$Variables)) { - # if (recipe$Analysis$Variables[[i]]$name == var) { ## depends on way recipe is written - # var.units <- recipe$Analysis$Variables[[i]]$units - # } - # } - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables$name == var) { ## TEMPORARY + if ('name' %in% names(recipe$Analysis$Variables)){ + if (recipe$Analysis$Variables$name == var) { var.units <- recipe$Analysis$Variables$units } - } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + } if (is.null(var.units)) { var.units <- var_dict[[var]]$units } @@ -188,20 +181,20 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, legend.col.sup <- legend.col.sup[metrics] ## Loop over system and reference for each scorecard plot - for (sys in 1:dim(input_data)['system']) { - for (ref in 1:dim(input_data)['reference']) { + for (sys in 1:dim(data)['system']) { + for (ref in 1:dim(data)['reference']) { ## TO DO: Apply check to each scorecard function ## check dimension 'metric' exists: - if (!("metric" %in% names(dim(input_data)))) { - dim(input_data) <- c(metric = 1, dim(input_data)) + if (!("metric" %in% names(dim(data)))) { + dim(data) <- c(metric = 1, dim(data)) } ## Find position of mean bias metric to calculate breaks breaks_bias <- NULL if ('mean_bias' %in% metrics){ - stopifnot(identical(names(dim(Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) - temp_data <- Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) + temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') if(var == 'psl'){ temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 @@ -232,8 +225,8 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, start.year = start.year, end.year = end.year, scorecard.type = 1, fileout.label = fileout.label, output.path = output.path) - data_sc_1 <- Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected') - sign_sc_1 <- Subset(input_sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + data_sc_1 <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') + sign_sc_1 <- Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected') SCPlotScorecard(data = data_sc_1, sign = sign_sc_1, @@ -272,15 +265,15 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## (reorder only) ## Scorecard type 2 is same as type 1 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(input_data)['region'] > 1) { + if(dim(data)['region'] > 1) { fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 2, fileout.label = fileout.label, output.path = output.path) new_order <- c('metric', 'region', 'sdate', 'time') - data_sc_2 <- Reorder(Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) - sign_sc_2 <- Reorder(Subset(input_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + data_sc_2 <- Reorder(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + sign_sc_2 <- Reorder(Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) SCPlotScorecard(data = data_sc_2, sign = sign_sc_2, @@ -362,7 +355,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## (transformation and reorder) ## Scorecard type 4 is same as type 3 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(input_data)['region'] > 1) { + if(dim(data)['region'] > 1) { fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R index aea23c56..7df20c71 100644 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ b/modules/Scorecards/R/tmp/WeightedMetrics.R @@ -88,35 +88,6 @@ WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, } ## close loop on reference } ## close loop on system - ## skill aggregation: - if (metric.aggregation == 'score') { - if (all(c("rps", "rps_clim") %in% metrics)) { - ## Calculate RPSS from aggregated RPS and RPS_clim - all_metric_means <- multiApply::Apply(data = all_metric_means, - target_dims = 'metric', - fun = function(x, met) { - res <- 1 - x[which(met == 'rps')] / x[which(met == 'rps_clim')] - c(x, res)}, met = metrics, - output_dims = 'metric', - ncores = ncores)$output1 - ## Define name of newly calculated RPSS metric - metrics <- c(metrics, "rpss_score_aggr") - } - if (all(c("crps", "crps_clim") %in% metrics)) { - ## Calculate CRPSS from aggragated CRPS and CRPS_clim - all_metric_means <- multiApply::Apply(data = all_metric_means, - target_dims = 'metric', - fun = function(x, met) { - res <- 1 - x[which(met == 'crps')] / x[which(met == 'crps_clim')] - c(x, res)}, - met = metrics, - output_dims = 'metric', - ncores = ncores)$output1 - ## Define name of newly calculated CRPSS metric - metrics <- c(metrics, "crpss_score_aggr") - } - ## Add warning in case metric.aggregation == 'score' but 1 of the metrics from each pair is missing - } ## reorder dimensions in array all_metric_means <- s2dv::Reorder(all_metric_means, c('system','reference','metric','time','sdate','region')) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index af797025..889bdc1e 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -18,7 +18,7 @@ source('modules/Scorecards/R/tmp/SCPlotScorecard.R') Scorecards <- function(recipe) { ## Parameters for loading data files - input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/to_system/" #temp + input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") @@ -30,17 +30,16 @@ Scorecards <- function(recipe) { end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - - # NOTE (Eva): This condition needs to be checked, in my case - # (recipe$Analysis$Workflow$Scorecards$start_months = NULL) - start.months <- 1:12 # I added this line - # Needs to be corrected: - # if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { - # start.months <- 1:12 - # } else { - # start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, - # split = ", | |,")[[1]]) - # } + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + } else { + start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + split = ", | |,")[[1]]) + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } + } start.months <- sprintf("%02d", start.months) period <- paste0(start.year, "-", end.year) @@ -53,7 +52,7 @@ Scorecards <- function(recipe) { metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ + if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ alpha <- 0.05 } else { alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha @@ -73,7 +72,7 @@ Scorecards <- function(recipe) { ncores <- 1 # recipe$Analysis$ncores - ## Paramters for scorecard layout + ## Parameters for scorecard layout table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width @@ -153,14 +152,11 @@ Scorecards <- function(recipe) { regions = regions, metric.aggregation = metric.aggregation, ncores = ncores) - }## close if on region - } + } ## close if on region + } ## close if on skill ###### SCORE AGGREGATION ###### if(metric.aggregation == 'score'){ - - ## Comments (Nadia): - ## syear parameter as FALSE instead of NULL for metrics without syear dimension? lon_dim <- 'longitude' lat_dim <- 'latitude' @@ -168,7 +164,7 @@ Scorecards <- function(recipe) { memb_dim <- 'ensemble' ## Define arrays to filled with data - scorecard_metrics <- array(data = NA, + aggregated_metrics <- array(data = NA, dim = c(system = length(system), reference = length(reference), time = length(forecast.months), @@ -176,7 +172,7 @@ Scorecards <- function(recipe) { region = length(regions), metric = length(metrics.visualize))) - scorecard_sign <- array(data = NA, + metrics_significance <- array(data = NA, dim = c(system = length(system), reference = length(reference), time = length(forecast.months), @@ -257,8 +253,8 @@ Scorecards <- function(recipe) { ncores = NULL)$sign ## Save metric result in arrays - scorecard_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) - scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) } ## close if on rpss @@ -329,8 +325,8 @@ Scorecards <- function(recipe) { ncores = NULL)$sign ## Save metric result in arrays - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) - scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) } ## close if on crpss @@ -423,7 +419,7 @@ Scorecards <- function(recipe) { ## Calculate correlation enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) - + ## Drop unwanted dimensions enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') @@ -453,8 +449,8 @@ Scorecards <- function(recipe) { } ## Save metric result in arrays - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) - scorecard_sign[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) } ## close if on enscorr @@ -485,7 +481,7 @@ Scorecards <- function(recipe) { mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') ## Save metric result in array - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) } ## close on mean_bias @@ -516,7 +512,7 @@ Scorecards <- function(recipe) { enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') ## Save metric result in array - scorecard_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) } ## close on enssprerr @@ -524,10 +520,11 @@ Scorecards <- function(recipe) { } ## close if on reference } ## close if on system - aggregated_metrics <- scorecard_metrics + #Include metric attributes + attributes(aggregated_metrics)$metrics <- metrics.visualize ## set NAs to False - scorecard_sign[is.na(scorecard_sign)] <- FALSE + metrics_significance[is.na(metrics_significance)] <- FALSE } ## close if on score @@ -538,7 +535,7 @@ Scorecards <- function(recipe) { ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation scorecard_single <- ScorecardsSingle(data = aggregated_metrics, - sign = scorecard_sign, + sign = metrics_significance, system = system, reference = reference, var = var, @@ -568,6 +565,7 @@ Scorecards <- function(recipe) { ## Metrics input must be in the same order as function SC_spatial_aggregation if(length(system) > 1 || length(reference) > 1){ scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + sign = metrics_significance, system = system, reference = reference, var = var, @@ -575,10 +573,21 @@ Scorecards <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = attributes(regions)$names, + region.names = names(regions), metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, output.path = output.path) } ## close if -- GitLab From 50c4a4c7c7a8b02dfa304ddf7995658f7612ee57 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 17 Jan 2024 16:22:32 +0100 Subject: [PATCH 19/43] including changes for new VizScorecards function --- modules/Loading/Dev_Loading.R | 501 -------------- modules/Scorecards/R/tmp/ClimPalette.R | 78 +++ modules/Scorecards/R/tmp/ColorBarContinuous.R | 594 +++++++++++++++++ modules/Scorecards/R/tmp/ScorecardsMulti.R | 248 +++---- modules/Scorecards/R/tmp/ScorecardsSingle.R | 248 +++---- modules/Scorecards/R/tmp/Utils.R | 92 --- modules/Scorecards/R/tmp/VizScorecard.R | 619 ++++++++++++++++++ modules/Scorecards/Scorecards.R | 60 +- 8 files changed, 1587 insertions(+), 853 deletions(-) delete mode 100644 modules/Loading/Dev_Loading.R create mode 100644 modules/Scorecards/R/tmp/ClimPalette.R create mode 100644 modules/Scorecards/R/tmp/ColorBarContinuous.R create mode 100644 modules/Scorecards/R/tmp/VizScorecard.R diff --git a/modules/Loading/Dev_Loading.R b/modules/Loading/Dev_Loading.R deleted file mode 100644 index fb456eb3..00000000 --- a/modules/Loading/Dev_Loading.R +++ /dev/null @@ -1,501 +0,0 @@ -## TODO: remove paths to personal scratchs -source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") -# Load required libraries/funs -source("modules/Loading/R/dates2load.R") -source("modules/Loading/R/get_timeidx.R") -source("modules/Loading/R/check_latlon.R") -## TODO: Move to prepare_outputs.R -source("tools/libs.R") -## TODO: remove these two lines when new as.s2dv_cube() is in CSTools -source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') -source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') - -## TODO: Source new s2dv_cube version -## TODO: Eliminate dim_var dimension (merge_across_dims?) - -load_datasets <- function(recipe) { - - # ------------------------------------------- - # Set params ----------------------------------------- - - hcst.inityear <- recipe$Analysis$Time$hcst_start - hcst.endyear <- recipe$Analysis$Time$hcst_end - lats.min <- recipe$Analysis$Region$latmin - lats.max <- recipe$Analysis$Region$latmax - lons.min <- recipe$Analysis$Region$lonmin - lons.max <- recipe$Analysis$Region$lonmax - ref.name <- recipe$Analysis$Datasets$Reference$name - exp.name <- recipe$Analysis$Datasets$System$name - - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]][1] - vars <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] - store.freq <- recipe$Analysis$Variables$freq - - # get sdates array - ## LOGGER: Change dates2load to extract logger from recipe? - sdates <- dates2load(recipe, recipe$Run$logger) - - idxs <- NULL - idxs$hcst <- get_timeidx(sdates$hcst, - recipe$Analysis$Time$ftime_min, - recipe$Analysis$Time$ftime_max, - time_freq=store.freq) - - if (!(is.null(sdates$fcst))) { - idxs$fcst <- get_timeidx(sdates$fcst, - recipe$Analysis$Time$ftime_min, - recipe$Analysis$Time$ftime_max, - time_freq=store.freq) - } - - ## TODO: Examine this verifications part, verify if it's necessary - # stream <- verifications$stream - # sdates <- verifications$fcst.sdate - - ## TODO: define fcst.name - ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name - - # get esarchive datasets dict: - ## TODO: Adapt to 'filesystem' option in recipe - archive <- read_yaml("conf/archive.yml")$esarchive - exp_descrip <- archive$System[[exp.name]] - - freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) - reference_descrip <- archive$Reference[[ref.name]] - freq.obs <- unlist(reference_descrip[[store.freq]][variable]) - obs.dir <- reference_descrip$src - fcst.dir <- exp_descrip$src - hcst.dir <- exp_descrip$src - fcst.nmember <- exp_descrip$nmember$fcst - hcst.nmember <- exp_descrip$nmember$hcst - - ## TODO: it is necessary? - ##if ("accum" %in% names(reference_descrip)) { - ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) - ##} else { - ## accum <- FALSE - ##} - - var_dir_obs <- reference_descrip[[store.freq]][vars] - var_dir_exp <- exp_descrip[[store.freq]][vars] - - # ----------- - obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", "$var_dir$", - "/$var$_$file_date$.nc") - - hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", "$var_dir$", - "$var$_$file_date$.nc") - - fcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", "$var_dir$", - "/$var$_$file_date$.nc") - - # Define regrid parameters: - #------------------------------------------------------------------- - regrid_params <- get_regrid_params(recipe, archive) - - # Longitude circular sort and latitude check - #------------------------------------------------------------------- - circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - - if (recipe$Analysis$Variables$freq == "monthly_mean"){ - split_multiselected_dims = TRUE - } else { - split_multiselected_dims = FALSE - } - - # Load hindcast - #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, - var = vars, - var_dir = var_dir_exp, - file_date = sdates$hcst, - time = idxs$hcst, - var_dir_depends = 'var', - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:hcst.nmember), - metadata_dims = 'var', # change to just 'var'? - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - - # Remove var_dir dimension - if ("var_dir" %in% names(dim(hcst))) { - hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") - } - - if (recipe$Analysis$Variables$freq == "daily_mean") { - # Adjusts dims for daily case, could be removed if startR allows - # multidim split - names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(hcst))] <- dim(hcst) - dim(hcst) <- default_dims - # Change time attribute dimensions - default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) - names(dim(attr(hcst, "Variables")$common$time))[which(names( - dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" - default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- - dim(attr(hcst, "Variables")$common$time) - dim(attr(hcst, "Variables")$common$time) <- default_time_dims - } - - # Convert hcst to s2dv_cube object - ## TODO: Give correct dimensions to $Dates - ## (sday, sweek, syear instead of file_date) - hcst <- as.s2dv_cube(hcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) - } - - ## Combine tas and tos data into one variable: tas-tos - if(recipe$Analysis$Variables$name == 'tas tos'){ - #if(recipe$Analysis$Datasets$Reference$name == 'HadCRUT5' || recipe$Analysis$Datasets$Reference$name == 'BEST') { - source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') - hcst <- mask_tas_tos(input_data = hcst, region = c(lons.min, lons.max,lats.min, lats.max), - grid = 'r360x181', - lon = hcst$coords$longitude, - lat = hcst$coords$latitude, - lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) - - hcst$dims[['var']] <- dim(hcst$data)[['var']] - #} - } - - # Load forecast - #------------------------------------------------------------------- - if (!is.null(recipe$Analysis$Time$fcst_year)) { - # the call uses file_date instead of fcst_syear so that it can work - # with the daily case and the current version of startR not allowing - # multiple dims split - - fcst <- Start(dat = fcst.path, - var = vars, - var_dir = var_dir_exp, - var_dir_depends = 'var', - file_date = sdates$fcst, - time = idxs$fcst, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:fcst.nmember), - metadata_dims = 'var', - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - - if ("var_dir" %in% names(dim(fcst))) { - fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") - } - - if (recipe$Analysis$Variables$freq == "daily_mean") { - # Adjusts dims for daily case, could be removed if startR allows - # multidim split - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(fcst))] <- dim(fcst) - dim(fcst) <- default_dims - # Change time attribute dimensions - default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) - names(dim(attr(fcst, "Variables")$common$time))[which(names( - dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" - default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- - dim(attr(fcst, "Variables")$common$time) - dim(attr(fcst, "Variables")$common$time) <- default_time_dims - } - - # Convert fcst to s2dv_cube - fcst <- as.s2dv_cube(fcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - fcst$attrs$Dates[] <- - fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) - } - - } else { - fcst <- NULL - } - - # Load reference - #------------------------------------------------------------------- - - # Obtain dates and date dimensions from the loaded hcst data to make sure - # the corresponding observations are loaded correctly. - dates <- hcst$attrs$Dates - dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] - - # Separate Start() call for monthly vs daily data - if (store.freq == "monthly_mean") { - - dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") - dim(dates_file) <- dim(dates) - - ## tas tos mask - if (recipe$Analysis$Variables$name == 'tas tos'){ - if (recipe$Analysis$Datasets$Reference$name == 'HadCRUT5'){ - vars <- 'tasanomaly' - var_dir_obs <- reference_descrip[[store.freq]][vars] - } - } - - if (recipe$Analysis$Variables$name == 'tas tos'){ - if (recipe$Analysis$Datasets$Reference$name == 'BEST'){ - vars <- 'tas' - var_dir_obs <- reference_descrip[[store.freq]][vars] - } - } - - obs <- Start(dat = obs.path, - var = vars, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - metadata_dims = 'var', - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - - } else if (store.freq == "daily_mean") { - - # Get year and month for file_date - dates_file <- sapply(dates, format, '%Y%m') - dim(dates_file) <- dim(dates) - # Set hour to 12:00 to ensure correct date retrieval for daily data - lubridate::hour(dates) <- 12 - lubridate::minute(dates) <- 00 - # Restore correct dimensions - dim(dates) <- dim(dates_file) - - obs <- Start(dat = obs.path, - var = vars, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - metadata_dims = 'var', - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - } - - # Remove var_dir dimension - if ("var_dir" %in% names(dim(obs))) { - obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") - } - # Adds ensemble dim to obs (for consistency with hcst/fcst) - default_dims <- c(dat = 1, var = 1, sday = 1, - sweek = 1, syear = 1, time = 1, - latitude = 1, longitude = 1, ensemble = 1) - default_dims[names(dim(obs))] <- dim(obs) - dim(obs) <- default_dims - - # Convert obs to s2dv_cube - obs <- as.s2dv_cube(obs) - - ## Combine tas and tos data into one variable: tas-tos - if(recipe$Analysis$Variables$name == 'tas tos'){ - if(recipe$Analysis$Datasets$Reference$name != 'HadCRUT5' & recipe$Analysis$Datasets$Reference$name != 'BEST'){ - source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') - obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), - grid = 'r360x181', - lon = obs$coords$longitude, - lat = obs$coords$latitude, - lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) - - obs$dims[['var']] <- dim(obs$data)[['var']] - } - } - - # Check for consistency between hcst and obs grid - if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { - lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lat_error_msg) - hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) - info(recipe$Run$logger, hcst_lat_msg) - obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) - info(recipe$Run$logger, obs_lat_msg) - stop("hcst and obs don't share the same latitudes.") - } - if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { - lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") - error(recipe$Run$logger, lon_error_msg) - hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) - info(recipe$Run$logger, hcst_lon_msg) - obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) - info(recipe$Run$logger, obs_lon_msg) - stop("hcst and obs don't share the same longitudes.") - - } - } - - # Remove negative values in accumulative variables - dictionary <- read_yaml("conf/variable-dictionary.yml") - for (var_idx in 1:length(vars)) { - var_name <- vars[var_idx] - if (dictionary$vars[[var_name]]$accum) { - info(recipe$Run$logger, - paste0("Accumulated variable ", var_name, - ": setting negative values to zero.")) - # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, - # along = "var", - # indices = var_idx, F), 0) - obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 - hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 - if (!is.null(fcst)) { - fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 - } - } - - # Convert prlr from m/s to mm/day - ## TODO: Make a unit conversion function - if (vars[[var_idx]] == "prlr") { - # Verify that the units are m/s and the same in obs and hcst - if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || - (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && - ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || - (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { - info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data[, var_idx, , , , , , , ] <- - obs$data[, var_idx, , , , , , , ]*86400*1000 - obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" - hcst$data[, var_idx, , , , , , , ] <- - hcst$data[, var_idx, , , , , , , ]*86400*1000 - hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" - if (!is.null(fcst)) { - fcst$data[, var_idx, , , , , , , ] <- - fcst$data[, var_idx, , , , , , , ]*86400*1000 - fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" - } - } - } - } - # Compute anomalies if requested - # Print a summary of the loaded data for the user, for each object - if (recipe$Run$logger$threshold <= 2) { - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) - } - } - - info(recipe$Run$logger, - "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - - ############################################################################ - # - # CHECKS ON MISSING FILES - # - ############################################################################ - - #obs.NA_dates.ind <- Apply(obs, - # fun=(function(x){ all(is.na(x))}), - # target_dims=c('time', 'latitude', 'longitude'))[[1]] - #obs.NA_dates <- dates_file[obs.NA_dates.ind] - #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] - #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", - # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") - # - #if (any(is.na(hcst))){ - # fatal(recipe$Run$logger, - # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", - # " ################################################# ", - # " ###### MISSING FILES #### ", - # " ################################################# ", - # "hcst files:", - # hcst.NA_files, - # " ################################################# ", - # " ################################################# ", - # sep="\n")) - # quit(status = 1) - #} - # - #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ - # fatal(recipe$logger, - # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", - # " ################################################# ", - # " ###### MISSING FILES #### ", - # " ################################################# ", - # "obs files:", - # obs.NA_files, - # " ################################################# ", - # " ################################################# ", - # sep="\n")) - # quit(status=1) - #} - # - #info(recipe$logger, - # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") - - ############################################################################ - ############################################################################ - - return(list(hcst = hcst, fcst = fcst, obs = obs)) - -} diff --git a/modules/Scorecards/R/tmp/ClimPalette.R b/modules/Scorecards/R/tmp/ClimPalette.R new file mode 100644 index 00000000..eed3c802 --- /dev/null +++ b/modules/Scorecards/R/tmp/ClimPalette.R @@ -0,0 +1,78 @@ +#'Generate Climate Color Palettes +#' +#'Generates a colorblind friendly color palette with color ranges useful in +#'climate temperature variable plotting. +#' +#'@param palette A character string of palette. The current choices: +#' \itemize{ +#' \item{'bluered': from blue through white to red.} +#' \item{'redblue': from red through white to blue.} +#' \item{'yellowred': from yellow through orange to red.} +#' \item{'redyellow': from red through orange to yellow.} +#' \item{'purpleorange': from purple through white to orange.} +#' \item{'orangepurple': from orange through white to purple.} +#' } +#'@param n A number indicating how many colors to generate. +#' +#'@return +#'ClimPalette() returns the function that generates the color palette and the +#'attribute 'na_color'.\cr +#'ClimColors() returns a vector of the colors. +#' +#'@examples +#'lims <- seq(-1, 1, length.out = 21) +#' +#'cb <- ColorBarContinuous(lims, color_fun = ClimPalette('redyellow'), plot = FALSE) +#' +#'cols <- ClimColors(20) +#'cb <- ColorBarContinuous(lims, cols, plot = FALSE) +#' +#'@importFrom grDevices colorRampPalette +#'@export +ClimPalette <- function(palette = "bluered") { + if (palette == "bluered") { + colorbar <- colorRampPalette(rev(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redblue") { + colorbar <- colorRampPalette(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "yellowred") { + colorbar <- colorRampPalette(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redyellow") { + colorbar <- colorRampPalette(rev(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "purpleorange") { + colorbar <- colorRampPalette(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "orangepurple") { + colorbar <- colorRampPalette(rev(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08"))) + attr(colorbar, 'na_color') <- 'pink' + } else { + stop("Parameter 'palette' must be one of 'bluered', 'redblue', 'yellowred'", + "'redyellow', 'purpleorange' or 'orangepurple'.") + } + colorbar +} + +#'@rdname ClimPalette +#'@export +ClimColors <- function(n, palette = "bluered") { + ClimPalette(palette)(n) +} diff --git a/modules/Scorecards/R/tmp/ColorBarContinuous.R b/modules/Scorecards/R/tmp/ColorBarContinuous.R new file mode 100644 index 00000000..a4ef933f --- /dev/null +++ b/modules/Scorecards/R/tmp/ColorBarContinuous.R @@ -0,0 +1,594 @@ +#'Draws a Continuous Color Bar +#' +#'Generates a color bar to use as colouring function for map plots and +#'optionally draws it (horizontally or vertically) to be added to map +#'multipanels or plots. It is possible to draw triangles at the ends of the +#'colour bar to represent values that go beyond the range of interest. A +#'number of options is provided to adjust the colours and the position and +#'size of the components. The drawn colour bar spans a whole figure region +#'and is compatible with figure layouts.\cr\cr +#'The generated colour bar consists of a set of breaks that define the +#'length(brks) - 1 intervals to classify each of the values in each of the +#'grid cells of a two-dimensional field. The corresponding grid cell of a +#'given value of the field will be coloured in function of the interval it +#'belongs to.\cr\cr +#'The only mandatory parameters are 'var_limits' or 'brks' (in its second +#'format, see below). +#' +#'@param brks Can be provided in two formats: +#'\itemize{ +#' \item{A single value with the number of breaks to be generated +#' automatically, between the minimum and maximum specified in 'var_limits' +#' (both inclusive). Hence the parameter 'var_limits' is mandatory if 'brks' +#' is provided with this format. If 'bar_limits' is additionally provided, +#' values only between 'bar_limits' will be generated. The higher the value +#' of 'brks', the smoother the plot will look.} +#' \item{A vector with the actual values of the desired breaks. Values will +#' be reordered by force to ascending order. If provided in this format, no +#' other parameters are required to generate/plot the colour bar.} +#'} +#' This parameter is optional if 'var_limits' is specified. If 'brks' not +#' specified but 'cols' is specified, it will take as value length(cols) + 1. +#' If 'cols' is not specified either, 'brks' will take 21 as value. +#'@param cols Vector of length(brks) - 1 valid colour identifiers, for each +#' interval defined by the breaks. This parameter is optional and will be +#' filled in with a vector of length(brks) - 1 colours generated with the +#' function provided in 'color_fun' (\code{clim.colors} by default).\cr 'cols' +#' can have one additional colour at the beginning and/or at the end with the +#' aim to colour field values beyond the range of interest represented in the +#' colour bar. If any of these extra colours is provided, parameter +#' 'triangle_ends' becomes mandatory in order to disambiguate which of the +#' ends the colours have been provided for. +#'@param vertical TRUE/FALSE for vertical/horizontal colour bar +#' (disregarded if plot = FALSE). +#'@param subsampleg The first of each subsampleg breaks will be ticked on the +#' colorbar. Takes by default an approximation of a value that yields a +#' readable tick arrangement (extreme breaks always ticked). If set to 0 or +#' lower, no labels are drawn. See the code of the function for details or +#' use 'extra_labels' for customized tick arrangements. +#'@param bar_limits Vector of two numeric values with the extremes of the +#' range of values represented in the colour bar. If 'var_limits' go beyond +#' this interval, the drawing of triangle extremes is triggered at the +#' corresponding sides, painted in 'col_inf' and 'col_sup'. Either of them +#' can be set as NA and will then take as value the corresponding extreme in +#' 'var_limits' (hence a triangle end won't be triggered for these sides). +#' Takes as default the extremes of 'brks' if available, else the same values +#' as 'var_limits'. +#'@param var_limits Vector of two numeric values with the minimum and maximum +#' values of the field to represent. These are used to know whether to draw +#' triangle ends at the extremes of the colour bar and what colour to fill +#' them in with. If not specified, take the same value as the extremes of +#' 'brks'. Hence the parameter 'brks' is mandatory if 'var_limits' is not +#' specified. +#'@param triangle_ends Vector of two logical elements, indicating whether to +#' force the drawing of triangle ends at each of the extremes of the colour +#' bar. This choice is automatically made from the provided 'brks', +#' 'bar_limits', 'var_limits', 'col_inf' and 'col_sup', but the behaviour +#' can be manually forced to draw or not to draw the triangle ends with this +#' parameter. If 'cols' is provided, 'col_inf' and 'col_sup' will take +#' priority over 'triangle_ends' when deciding whether to draw the triangle +#' ends or not. +#'@param col_inf Colour to fill the inferior triangle end with. Useful if +#' specifying colours manually with parameter 'cols', to specify the colour +#' and to trigger the drawing of the lower extreme triangle, or if 'cols' is +#' not specified, to replace the colour automatically generated by ColorBar(). +#'@param col_sup Colour to fill the superior triangle end with. Useful if +#' specifying colours manually with parameter 'cols', to specify the colour +#' and to trigger the drawing of the upper extreme triangle, or if 'cols' is +#' not specified, to replace the colour automatically generated by ColorBar(). +#'@param color_fun Function to generate the colours of the color bar. Must +#' take an integer and must return as many colours. The returned colour vector +#' can have the attribute 'na_color', with a colour to draw NA values. This +#' parameter is set by default to ClimPalette(). +#'@param plot Logical value indicating whether to only compute its breaks and +#' colours (FALSE) or to also draw it on the current device (TRUE). +#'@param draw_ticks Whether to draw ticks for the labels along the colour bar +#' (TRUE) or not (FALSE). TRUE by default. Disregarded if 'plot = FALSE'. +#'@param draw_separators Whether to draw black lines in the borders of each of +#' the colour rectancles of the colour bar (TRUE) or not (FALSE). FALSE by +#' default. Disregarded if 'plot = FALSE'. +#'@param triangle_ends_scale Scale factor for the drawn triangle ends of the +#' colour bar, if drawn at all. Takes 1 by default (rectangle triangle +#' proportional to the thickness of the colour bar). Disregarded if +#' 'plot = FALSE'. +#'@param extra_labels Numeric vector of extra labels to draw along axis of +#' the colour bar. The number of provided decimals will be conserved. +#' Disregarded if 'plot = FALSE'. +#'@param title Title to draw on top of the colour bar, most commonly with the +#' units of the represented field in the neighbour figures. Empty by default. +#'@param title_scale Scale factor for the 'title' of the colour bar. +#' Takes 1 by default. +#'@param label_scale Scale factor for the labels of the colour bar. +#' Takes 1 by default. +#'@param tick_scale Scale factor for the length of the ticks of the labels +#' along the colour bar. Takes 1 by default. +#'@param extra_margin Extra margins to be added around the colour bar, +#' in the format c(y1, x1, y2, x2). The units are margin lines. Takes +#' rep(0, 4) by default. +#'@param label_digits Number of significant digits to be displayed in the +#' labels of the colour bar, usually to avoid too many decimal digits +#' overflowing the figure region. This does not have effect over the labels +#' provided in 'extra_labels'. Takes 4 by default. +#'@param ... Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr adj ann ask bg bty cex.lab cex.main cex.sub cin +#' col.axis col.lab col.main col.sub cra crt csi cxy err family fg fig fin +#' font font.axis font.lab font.main font.sub lend lheight ljoin lmitre lty +#' lwd mai mex mfcol mfrow mfg mkh oma omd omi page pch pin plt pty smo srt +#' tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog.\cr For more +#' information about the parameters see `par`. +#' +#'@return +#'\item{brks}{ +#' Breaks used for splitting the range in intervals. +#'} +#'\item{cols}{ +#' Colours generated for each of the length(brks) - 1 intervals. +#' Always of length length(brks) - 1. +#'} +#'\item{col_inf}{ +#' Colour used to draw the lower triangle end in the colour +#' bar (NULL if not drawn at all). +#'} +#'\item{col_sup}{ +#' Colour used to draw the upper triangle end in the colour +#' bar (NULL if not drawn at all). +#'} +#' +#'@examples +#'cols <- c("dodgerblue4", "dodgerblue1", "forestgreen", "yellowgreen", "white", +#' "white", "yellow", "orange", "red", "saddlebrown") +#'lims <- seq(-1, 1, 0.2) +#'cb <- ColorBarContinuous(lims, cols, plot = FALSE) +#' +#'@importFrom grDevices col2rgb rgb +#'@import utils +#'@export +ColorBarContinuous <- function(brks = NULL, cols = NULL, vertical = TRUE, + subsampleg = NULL, bar_limits = NULL, var_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, + color_fun = ClimPalette(), plot = TRUE, + draw_ticks = TRUE, draw_separators = FALSE, + triangle_ends_scale = 1, extra_labels = NULL, + title = NULL, title_scale = 1, + label_scale = 1, tick_scale = 1, + extra_margin = rep(0, 4), label_digits = 4, ...) { + # Required checks + if ((is.null(brks) || length(brks) < 2) && is.null(bar_limits) && is.null(var_limits)) { + stop("At least one of 'brks' with the desired breaks, 'bar_limits' or ", + "'var_limits' must be provided to generate the colour bar.") + } + + # Check brks + if (!is.null(brks)) { + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be numeric if specified.") + } else if (length(brks) > 1) { + reorder <- sort(brks, index.return = TRUE) + if (!is.null(cols)) { + cols <- cols[reorder$ix[which(reorder$ix <= length(cols))]] + } + brks <- reorder$x + } + } + + # Check bar_limits + if (!is.null(bar_limits)) { + if (!(all(is.na(bar_limits) | is.numeric(bar_limits)) && (length(bar_limits) == 2))) { + stop("Parameter 'bar_limits' must be a vector of two numeric elements or NAs.") + } + } + + # Check var_limits + if (!is.null(var_limits)) { + if (!(is.numeric(var_limits) && (length(var_limits) == 2))) { + stop("Parameter 'var_limits' must be a numeric vector of length 2.") + } else if (anyNA(var_limits)) { + stop("Parameter 'var_limits' must not contain NA values.") + } else if (any(is.infinite(var_limits))) { + stop("Parameter 'var_limits' must not contain infinite values.") + } + } + + # Check cols + if (!is.null(cols)) { + if (!is.character(cols)) { + stop("Parameter 'cols' must be a vector of character strings.") + } else if (any(!sapply(cols, .IsColor))) { + stop("Parameter 'cols' must contain valid colour identifiers.") + } + } + + # Check color_fun + if (!is.function(color_fun)) { + stop("Parameter 'color_fun' must be a colour-generator function.") + } + + # Check integrity among brks, bar_limits and var_limits + if (is.null(brks) || (length(brks) < 2)) { + if (is.null(brks)) { + if (is.null(cols)) { + brks <- 21 + } else { + brks <- length(cols) + 1 + } + } + if (is.null(bar_limits) || anyNA(bar_limits)) { + # var_limits is defined + if (is.null(bar_limits)) { + bar_limits <- c(NA, NA) + } + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + bar_limits[which(is.na(bar_limits))] <- c(var_limits[1] - half_width, var_limits[2] + half_width)[which(is.na(bar_limits))] + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } else if (is.null(var_limits)) { + # bar_limits is defined + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # both bar_limits and var_limits are defined + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } + } else if (is.null(bar_limits)) { + if (is.null(var_limits)) { + # brks is defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (length(brks) - 1) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # brks and var_limits are defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + } + } else { + # brks and bar_limits are defined + # or + # brks, bar_limits and var_limits are defined + if (head(brks, 1) != bar_limits[1] || tail(brks, 1) != bar_limits[2]) { + stop("Parameters 'brks' and 'bar_limits' are inconsistent.") + } + } + + # Check col_inf + if (!is.null(col_inf)) { + if (!.IsColor(col_inf)) { + stop("Parameter 'col_inf' must be a valid colour identifier.") + } + } + + # Check col_sup + if (!is.null(col_sup)) { + if (!.IsColor(col_sup)) { + stop("Parameter 'col_sup' must be a valid colour identifier.") + } + } + + # Check triangle_ends + if (!is.null(triangle_ends) && (!is.logical(triangle_ends) || length(triangle_ends) != 2)) { + stop("Parameter 'triangle_ends' must be a logical vector with two elements.") + } + teflc <- triangle_ends_from_limit_cols <- c(!is.null(col_inf), !is.null(col_sup)) + if (is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { + triangle_ends <- c(FALSE, FALSE) + if (bar_limits[1] >= var_limits[1]) { + triangle_ends[1] <- TRUE + } + if (bar_limits[2] < var_limits[2]) { + triangle_ends[2] <- TRUE + } + } else if (!is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { + triangle_ends <- triangle_ends + } else if (is.null(triangle_ends) && (!is.null(col_inf) || !is.null(col_sup))) { + triangle_ends <- teflc + } else if (any(teflc != triangle_ends)) { + if (!is.null(brks) && length(brks) > 1 && !is.null(cols) && length(cols) >= length(brks)) { + triangle_ends <- teflc + } else if (!is.null(cols)) { + triangle_ends <- teflc + } else { + triangle_ends <- triangle_ends + } + } + if (plot && !is.null(var_limits)) { + if ((bar_limits[1] >= var_limits[1]) && !triangle_ends[1]) { + warning("There are variable values smaller or equal to the lower limit ", + "of the colour bar and the lower triangle end has been ", + "disabled. These will be painted in the colour for NA values.") + } + if ((bar_limits[2] < var_limits[2]) && !triangle_ends[2]) { + warning("There are variable values greater than the higher limit ", + "of the colour bar and the higher triangle end has been ", + "disabled. These will be painted in the colour for NA values.") + } + } + + # Generate colours if needed + if (is.null(cols)) { + cols <- color_fun(length(brks) - 1 + sum(triangle_ends)) + attr_bk <- attributes(cols) + if (triangle_ends[1]) { + if (is.null(col_inf)) col_inf <- head(cols, 1) + cols <- cols[-1] + } + if (triangle_ends[2]) { + if (is.null(col_sup)) col_sup <- tail(cols, 1) + cols <- cols[-length(cols)] + } + attributes(cols) <- attr_bk + } else if ((length(cols) != (length(brks) - 1))) { + stop("Incorrect number of 'brks' and 'cols'. There must be one more break than the number of colours.") + } + + # Check vertical + if (!is.logical(vertical)) { + stop("Parameter 'vertical' must be TRUE or FALSE.") + } + + # Check extra_labels + if (is.null(extra_labels)) { + extra_labels <- numeric(0) + } + if (!is.numeric(extra_labels)) { + stop("Parameter 'extra_labels' must be numeric.") + } else { + if (any(extra_labels > bar_limits[2]) || any(extra_labels < bar_limits[1])) { + stop("Parameter 'extra_labels' must not contain ticks beyond the color bar limits.") + } + } + extra_labels <- sort(extra_labels) + + # Check subsampleg + primes <- function(x) { + # Courtesy of Chase. See http://stackoverflow.com/questions/6424856/r-function-for-returning-all-factors + x <- as.integer(x) + div <- seq_len(abs(x)) + factors <- div[x %% div == 0L] + factors <- list(neg = -factors, pos = factors) + return(factors) + } + remove_final_tick <- FALSE + added_final_tick <- TRUE + if (is.null(subsampleg)) { + subsampleg <- 1 + while (length(brks) / subsampleg > 15 - 1) { + next_factor <- primes((length(brks) - 1) / subsampleg)$pos + next_factor <- next_factor[length(next_factor) - ifelse(length(next_factor) > 2, 1, 0)] + subsampleg <- subsampleg * next_factor + } + if (subsampleg > (length(brks) - 1) / 4) { + subsampleg <- max(1, round(length(brks) / 4)) + extra_labels <- c(extra_labels, bar_limits[2]) + added_final_tick <- TRUE + if ((length(brks) - 1) %% subsampleg < (length(brks) - 1) / 4 / 2) { + remove_final_tick <- TRUE + } + } + } else if (!is.numeric(subsampleg)) { + stop("Parameter 'subsampleg' must be numeric.") + } + subsampleg <- round(subsampleg) + draw_labels <- TRUE + if ((subsampleg) < 1) { + draw_labels <- FALSE + } + + # Check plot + if (!is.logical(plot)) { + stop("Parameter 'plot' must be logical.") + } + + # Check draw_separators + if (!is.logical(draw_separators)) { + stop("Parameter 'draw_separators' must be logical.") + } + + # Check triangle_ends_scale + if (!is.numeric(triangle_ends_scale)) { + stop("Parameter 'triangle_ends_scale' must be numeric.") + } + + # Check draw_ticks + if (!is.logical(draw_ticks)) { + stop("Parameter 'draw_ticks' must be logical.") + } + + # Check title + if (is.null(title)) { + title <- '' + } + if (!is.character(title)) { + stop("Parameter 'title' must be a character string.") + } + + # Check title_scale + if (!is.numeric(title_scale)) { + stop("Parameter 'title_scale' must be numeric.") + } + + # Check label_scale + if (!is.numeric(label_scale)) { + stop("Parameter 'label_scale' must be numeric.") + } + + # Check tick_scale + if (!is.numeric(tick_scale)) { + stop("Parameter 'tick_scale' must be numeric.") + } + + # Check extra_margin + if (!is.numeric(extra_margin) || length(extra_margin) != 4) { + stop("Parameter 'extra_margin' must be a numeric vector of length 4.") + } + + # Check label_digits + if (!is.numeric(label_digits)) { + stop("Parameter 'label_digits' must be numeric.") + } + label_digits <- round(label_digits) + + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "col", "lab", "las", "mar", "mgp", "new", "ps") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # + # Plotting colorbar + # ~~~~~~~~~~~~~~~~~~~ + # + if (plot) { + pars_to_save <- c('mar', 'cex', names(userArgs), 'mai', 'mgp', 'las', 'xpd') + saved_pars <- par(pars_to_save) + par(mar = c(0, 0, 0, 0), cex = 1) + image(1, 1, t(t(1)), col = rgb(0, 0, 0, 0), axes = FALSE, xlab = '', ylab = '') + # Get the availale space + figure_size <- par('fin') + cs <- par('csi') + # This allows us to assume we always want to plot horizontally + if (vertical) { + figure_size <- rev(figure_size) + } + # pannel_to_redraw <- par('mfg') + # .SwitchToFigure(pannel_to_redraw[1], pannel_to_redraw[2]) + # Load the user parameters + par(new = TRUE) + par(userArgs) + # Set up color bar plot region + margins <- c(0.0, 0, 0.0, 0) + cex_title <- 1 * title_scale + cex_labels <- 0.9 * label_scale + cex_ticks <- -0.3 * tick_scale + spaceticklab <- max(-cex_ticks, 0) + if (vertical) { + margins[1] <- margins[1] + (1.2 * cex_labels * 3 + spaceticklab) * cs + margins <- margins + extra_margin[c(4, 1:3)] * cs + } else { + margins[1] <- margins[1] + (1.2 * cex_labels * 1 + spaceticklab) * cs + margins <- margins + extra_margin * cs + } + if (title != '') { + margins[3] <- margins[3] + (1.0 * cex_title) * cs + } + margins[3] <- margins[3] + sqrt(figure_size[2] / (margins[1] + margins[3])) * + figure_size[2] / 6 * ifelse(title != '', 0.5, 0.8) + # Set side margins + margins[2] <- margins[2] + figure_size[1] / 16 + margins[4] <- margins[4] + figure_size[1] / 16 + triangle_ends_prop <- 1 / 32 * triangle_ends_scale + triangle_ends_cex <- triangle_ends_prop * figure_size[2] + if (triangle_ends[1]) { + margins[2] <- margins[2] + triangle_ends_cex + } + if (triangle_ends[2]) { + margins[4] <- margins[4] + triangle_ends_cex + } + ncols <- length(cols) + # Set up the points of triangles + # Compute the proportion of horiz. space occupied by one plot unit + prop_unit <- (1 - (margins[2] + margins[4]) / figure_size[1]) / ncols + # Convert triangle height to plot inits + triangle_height <- triangle_ends_prop / prop_unit + left_triangle <- list(x = c(1, 1 - triangle_height, 1) - 0.5, + y = c(1.4, 1, 0.6)) + right_triangle <- list(x = c(ncols, ncols + triangle_height, ncols) + 0.5, + y = c(1.4, 1, 0.6)) + # Draw the color squares and title + if (vertical) { + par(mai = c(margins[2:4], margins[1]), + mgp = c(0, spaceticklab + 0.2, 0), las = 1) + d <- 4 + image(1, 1:ncols, t(1:ncols), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(ylab = title, line = cex_title * (0.2 + 0.1), cex.lab = cex_title) + # Draw top and bottom border lines + lines(c(0.6, 0.6), c(1 - 0.5, ncols + 0.5)) + lines(c(1.4, 1.4), c(1 - 0.5, ncols + 0.5)) + # Rotate triangles + names(left_triangle) <- rev(names(left_triangle)) + names(right_triangle) <- rev(names(right_triangle)) + } else { + # The term - cex_labels / 4 * (3 / cex_labels - 1) was found by + # try and error + par(mai = margins, + mgp = c(0, cex_labels / 2 + spaceticklab + - cex_labels / 4 * (3 / cex_labels - 1), 0), + las = 1) + d <- 1 + image(1:ncols, 1, t(t(1:ncols)), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(title, line = cex_title * (0.2 + 0.1), cex.main = cex_title) + # Draw top and bottom border lines + lines(c(1 - 0.5, ncols + 0.5), c(0.6, 0.6)) + lines(c(1 - 0.5, ncols + 0.5), c(1.4, 1.4)) + tick_length <- -0.4 + } + # Draw the triangles + par(xpd = TRUE) + if (triangle_ends[1]) { + # Draw left triangle + polygon(left_triangle$x, left_triangle$y, col = col_inf, border = NA) + lines(left_triangle$x, left_triangle$y) + } + if (triangle_ends[2]) { + # Draw right triangle + polygon(right_triangle$x, right_triangle$y, col = col_sup, border = NA) + lines(right_triangle$x, right_triangle$y) + } + par(xpd = FALSE) + + # Put the separators + if (vertical) { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(0.6, 1.4), c(i, i) + 0.5) + } + } + if (draw_separators || is.null(col_inf)) { + lines(c(0.6, 1.4), c(0.5, 0.5)) + } + if (draw_separators || is.null(col_sup)) { + lines(c(0.6, 1.4), c(ncols + 0.5, ncols + 0.5)) + } + } else { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(i, i) + 0.5, c(0.6, 1.4)) + } + } + if (draw_separators || is.null(col_inf)) { + lines(c(0.5, 0.5), c(0.6, 1.4)) + } + if (draw_separators || is.null(col_sup)) { + lines(c(ncols + 0.5, ncols + 0.5), c(0.6, 1.4)) + } + } + # Put the ticks + plot_range <- length(brks) - 1 + var_range <- tail(brks, 1) - head(brks, 1) + extra_labels_at <- ((extra_labels - head(brks, 1)) / var_range) * plot_range + 0.5 + at <- seq(1, length(brks), subsampleg) + labels <- brks[at] + # Getting rid of next-to-last tick if too close to last one + if (remove_final_tick) { + at <- at[-length(at)] + labels <- labels[-length(labels)] + } + labels <- signif(labels, label_digits) + if (added_final_tick) { + extra_labels[length(extra_labels)] <- signif(tail(extra_labels, 1), label_digits) + } + at <- at - 0.5 + at <- c(at, extra_labels_at) + labels <- c(labels, extra_labels) + tick_reorder <- sort(at, index.return = TRUE) + at <- tick_reorder$x + if (draw_labels) { + labels <- labels[tick_reorder$ix] + } else { + labels <- FALSE + } + axis(d, at = at, tick = draw_ticks, labels = labels, cex.axis = cex_labels, tcl = cex_ticks) + par(saved_pars) + } + invisible(list(brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup)) +} diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index dd9ac257..020a2047 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -237,37 +237,37 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, sign_sc_9 <- Subset(sign, c('system','region'), list(1, reg), drop = 'selected') } - SCPlotScorecard(data = data_sc_9, - sign = sign_sc_9, - row.dim = model, - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = model.name, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = table.model.name, - subrow.title = 'Forecast Month', - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = 4, - col2.width = col2.width, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_9, + sign = sign_sc_9, + row_dim = model, + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = model.name, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = table.model.name, + subrow_title = 'Forecast Month', + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = 4, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 10 #### @@ -286,37 +286,37 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, data_sc_10 <- Subset(Reorder(data, new_order), c('system','region'), list(1, reg), drop = 'selected') sign_sc_10 <- Subset(Reorder(sign, new_order), c('system','region'), list(1, reg), drop = 'selected') } - SCPlotScorecard(data = data_sc_10, - sign = sign_sc_10, - row.dim = 'time', - subrow.dim = model, - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = model.name, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast month', - subrow.title = table.model.name, - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = 4, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_10, + sign = sign_sc_10, + row_dim = 'time', + subrow_dim = model, + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = model.name, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast month', + subrow_title = table.model.name, + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = 4, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 11 #### @@ -334,37 +334,37 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, sign_sc_11 <- Subset(transformed_sign, c('system','region'), list(1, reg), drop = 'selected') } - SCPlotScorecard(data = data_sc_11, - sign = sign_sc_11, - row.dim = model, - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = model.name, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = table.model.name, - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = 4, - col2.width = col2.width, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_11, + sign = sign_sc_11, + row_dim = model, + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = model.name, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = table.model.name, + subrow_title = 'Forecast Month', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = 4, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 12 #### @@ -384,37 +384,37 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('system','region'), list(1, reg), drop = 'selected') } - SCPlotScorecard(data = data_sc_12, - sign = sign_sc_12, - row.dim = 'time', - subrow.dim = model, - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = model.name, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = table.model.name, - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = 4, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_12, + sign = sign_sc_12, + row_dim = 'time', + subrow_dim = model, + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = model.name, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast Month', + subrow_title = table.model.name, + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = 4, + columns_width = columns.width, + fileout = fileout) } ## close loop on region diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 1e8f93f6..c5525fd4 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -228,37 +228,37 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, data_sc_1 <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') sign_sc_1 <- Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected') - SCPlotScorecard(data = data_sc_1, - sign = sign_sc_1, - row.dim = 'region', - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = region.names, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Region', - subrow.title = 'Forecast Month', - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_1, + sign = sign_sc_1, + row_dim = 'region', + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = region.names, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Region', + subrow_title = 'Forecast Month', + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 2 #### @@ -275,37 +275,37 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, data_sc_2 <- Reorder(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) sign_sc_2 <- Reorder(Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) - SCPlotScorecard(data = data_sc_2, - sign = sign_sc_2, - row.dim = 'time', - subrow.dim = 'region', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = region.names, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = 'Region', - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_2, + sign = sign_sc_2, + row_dim = 'time', + subrow_dim = 'region', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = region.names, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast Month', + subrow_title = 'Region', + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) } ## close if @@ -318,37 +318,37 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') sign_sc_3 <- Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected') - SCPlotScorecard(data = data_sc_3, - sign = sign_sc_3, - row.dim = 'region', - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = region.names, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Region', - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_3, + sign = sign_sc_3, + row_dim = 'region', + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = region.names, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Region', + subrow_title = 'Forecast Month', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 4 #### @@ -365,37 +365,37 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, data_sc_4 <- Reorder(Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) sign_sc_4 <- Reorder(Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) - SCPlotScorecard(data = data_sc_4, - sign = sign_sc_4, - row.dim = 'time', - subrow.dim = 'region', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = region.names, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = 'Region', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - fileout = fileout) + VizScorecard(data = data_sc_4, + sign = sign_sc_4, + row_dim = 'time', + subrow_dim = 'region', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = region.names, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast Month', + subrow_title = 'Region', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) } ## close if } ## close loop on ref diff --git a/modules/Scorecards/R/tmp/Utils.R b/modules/Scorecards/R/tmp/Utils.R index 6ba49e8c..caee98e4 100644 --- a/modules/Scorecards/R/tmp/Utils.R +++ b/modules/Scorecards/R/tmp/Utils.R @@ -201,8 +201,6 @@ } - - ## Output file name to save scorecard .Filename <- function(system = NULL, reference = NULL, model = NULL, eval.name = NULL, var = NULL, start.year = NULL, end.year = NULL, scorecard.type = NULL, @@ -230,96 +228,6 @@ return(scorecard_save_path) } -# Scorecards function to assign background color of table cells, -# color of text in table and to bold the text. -# -# It will return a list with 2 arrays: -# (1) metric.color, A 2-dimensional array with character strings containing the -# color codes for each cell background. -# (2) metric.text.color, A 2-dimensional array with character strings -# containing the color codes for each cell text. -.SCTableColors <- function(table, n.col, n.subcol, n.row, n.subrow, - legend.breaks, palette, colorunder, colorsup) { - # Define rows and columns - n.rows <- n.row * n.subrow - n.columns <- n.col * n.subcol - - ## Set table background colors - metric.color <- array(colorunder, c(n.row * n.subrow, n.columns)) - metric.text.color <- array("#2A2A2A", c(n.row * n.subrow , n.columns)) - # metric.text.bold <- array(TRUE, c(n.row * n.subrow , n.columns - 2)) ## Setting all values to bold - - ## Define cell and text colors to show in table - for (i in 1:n.col) { - metric.int <- legend.breaks[[i]] - for (rr in 1:n.rows) { - for (j in 1:n.subcol) { - for (pp in 1:(length(metric.int) - 1)) { - if (is.nan(table[rr,((i - 1) * n.subcol + j)])) { - metric.color[rr,((i - 1) * n.subcol + j)] <- "gray" - } else { - if (table[rr,((i - 1) * n.subcol + j)] >= - metric.int[pp] && table[rr,((i - 1) * n.subcol + j)] <= - metric.int[pp+1]) { - metric.color[rr,((i - 1) * n.subcol + j)] <- palette[[i]][pp] #palette[pp] - } - if (table[rr,((i - 1) * n.subcol + j)] < metric.int[1]) { - metric.color[rr,((i - 1) * n.subcol + j)] <- colorunder[i] - } - if (table[rr,((i - 1) * n.subcol + j)] >= - metric.int[length(metric.int)]) { - metric.color[rr,((i - 1) * n.subcol + j)] <- colorsup[i] - } - } - ## color text in white and bold if background is white or dark blue or dark red: - if (is.nan(table[rr,((i - 1) * n.subcol + j)]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == 1 && - table[rr,((i - 1) * n.subcol + j)] < metric.int[2]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == 2 && - table[rr,((i - 1) * n.subcol + j)] < metric.int[3]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == (length(metric.int) - 1) && - table[rr,((i - 1) * n.subcol + j)] >= metric.int[length(metric.int) - 1]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == (length(metric.int) - 2) && - table[rr,((i - 1) * n.subcol + j)] >= metric.int[length(metric.int) - 2])) { - metric.text.color[rr,((i - 1) * n.subcol + j)] <- "white" - #metric.text.bold[rr,((i - 1) * n.subcol + j)] <- TRUE - } - } - } - } - } - - return(list(metric.color = metric.color, - metric.text.color = metric.text.color)) - -} - -# Scorecards function to create the color bar legends for the required metrics -# and paste them below the scorecard table -.SCLegend <- function(legend.breaks, palette, colorunder, colorsup, - label.scale, legend.width, legend.height, - legend.white.space, fileout) { - - ## Create color bar legends for each metric - for (i in 1:length(palette)) { - png(filename = paste0(fileout, '_tmpLegend', i, '.png'), width = legend.width, - height = legend.height) - ColorBar(brks = legend.breaks[[i]], cols = palette[[i]], vertical = FALSE, - label_scale = label.scale, col_inf = colorunder[[i]], - col_sup = colorsup[[i]]) - dev.off() - if (i == 1) { - ## Add white space to the left of the first color bar legend - system(paste0('convert ', fileout, '_tmpLegend1.png -background white -splice ', - legend.white.space, 'x0 ', fileout, '_tmpScorecardLegend.png')) - } else { - system(paste0('convert +append ', fileout, '_tmpScorecardLegend.png ', - fileout, '_tmpLegend', i, '.png ', fileout, - '_tmpScorecardLegend.png')) - } - } - unlink(paste0(fileout,'_tmpLegend*.png')) -} # Function to calculate color bar breaks for bias metric .SCBiasBreaks <- function(data){ diff --git a/modules/Scorecards/R/tmp/VizScorecard.R b/modules/Scorecards/R/tmp/VizScorecard.R new file mode 100644 index 00000000..f3b50a6b --- /dev/null +++ b/modules/Scorecards/R/tmp/VizScorecard.R @@ -0,0 +1,619 @@ +#'Function to plot Scorecard tables +#' +#'This function renders a scorecard table from a multidimensional array +#'in HTML style. The structure of the table is based on the assignment of each +#'dimension of the array as a structure element: row, subrow, column or +#'subcolumn. It is useful to present tabular results with colors in a nice way. +#' +#'Note: Module PhantomJS is required. +#' +#'@param data A multidimensional array containing the data to be plotted with +#' at least four dimensions. Each dimension will have assigned a structure +#' element: row, subrow, column and subcolumn. +#'@param sign A multidimensional boolean array with the same dimensions as +#' 'data', indicting which values to be highlighted. If set to NULL no values +#' will be highlighted. +#'@param row_dim A character string indicating the dimension name to show in the +#' rows of the plot. It is set as 'region' by default. +#'@param subrow_dim A character string indicating the dimension name to show in +#' the sub-rows of the plot. It is set as 'time' by default. +#'@param col_dim A character string indicating the dimension name to show in the +#' columns of the plot. It is set as 'metric' by default. +#'@param subcol_dim A character string indicating the dimension name to show in +#' the sub-columns of the plot. It is set as 'sdate' by default. +#'@param legend_dim A character string indicating the dimension name to use for +#' the legend. It is set as 'metric' by default. +#'@param row_names A vector of character strings with row display names. It +#' is set as NULL by default. +#'@param subrow_names A vector of character strings with sub-row display names. +#' It is set as NULL by default. +#'@param col_names A vector of character strings with column display names. It +#' is set as NULL by default. +#'@param subcol_names A vector of character strings with sub-column display +#' names. It is set as NULL by default. +#'@param row_title A character string for the title of the row names. It is set +#' as NULL by default. +#'@param subrow_title A character string for the title of the sub-row names. It +#' is set as NULL by default. +#'@param table_title A character string for the title of the plot. It is set as +#' NULL by default. +#'@param table_subtitle A character string for the sub-title of the plot. It is +#' set as NULL by default. +#'@param legend_breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each 'legend_dim'. A list of vectors can +#' be given as input if the 'legend_dims' require different breaks. This +#' parameter is required even if the legend is not plotted, to define the +#' colors in the scorecard table. It is set as NULL by default. +#'@param plot_legend A logical value to determine if the legend is plotted. It +#' is set as TRUE by default. +#'@param label_scale A numeric value to define the size of the legend labels. +#' It is set as 1.4 by default. +#'@param legend_width A numeric value to define the width of the legend bars. By +#' default it is set to NULL and calculated internally from the table width. +#'@param legend_height A numeric value to define the height of the legend bars. +#' It is set as 50 by default. +#'@param palette A vector of character strings or a list of vectors of +#' character strings containing the colors to use in the legends. If a vector +#' is given as input, then these colors will be used for each legend_dim. A +#' list of vectors can be given as input if different colors are desired for +#' the legend_dims. This parameter must be included even if the legend is +#' not plotted, to define the colors in the scorecard table. +#'@param colorunder A character string or of vector of character strings +#' defining the colors to use for data values with are inferior to the lowest +#' breaks value. This parameter will also plot a inferior triangle in the +#' legend bar. The parameter can be set to NULL if there are no inferior values. +#' If a character string is given this color will be applied to all +#' 'legend_dims'. It is set as NULL by default. +#'@param colorsup A character string or of vector of character strings +#' defining the colors to use for data values with are superior to the highest +#' breaks value. This parameter will also plot a inferior triangle in the +#' legend bar. The parameter can be set to NULL if there are no superior values. +#' If a character string is given this color will be applied to all +#' legend_dims. It is set as NULL by default. +#'@param round_decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. It is set as 2 by default. +#'@param font_size A numeric indicating the font size on the scorecard table. +#' Default is 2. +#'@param legend_white_space A numeric value defining the initial starting +#' position of the legend bars, the white space infront of the legend is +#' calculated from the left most point of the table as a distance in cm. The +#' default value is 6. +#'@param columns_width A numeric value defining the width all columns within the +#' table in cm (excluding the first and second columns containing the titles). +#'@param col1_width A numeric value defining the width of the first table column +#' in cm. It is set as NULL by default. +#'@param col2_width A numeric value defining the width of the second table +#' column in cm. It is set as NULL by default. +#'@param fileout A path of the location to save the scorecard plots. By default +#' the plots will be saved to the working directory. +#' +#'@return An image file containing the scorecard. +#'@examples +#'data <- array(rnorm(1000), dim = c('sdate' = 12, 'metric' = 4, 'region' = 3, +#' 'time' = 6)) +#'row_names <- c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') +#'col_names <- c('Mean bias (K)', 'Correlation', 'RPSS','CRPSS') +#'VizScorecard(data = data, row_names = row_names, col_names = col_names, +#' subcol_names = month.abb[as.numeric(1:12)], +#' row_title = 'Region', subrow_title = 'Forecast Month', +#' col_title = 'Start date', +#' table_title = "Temperature of ECMWF System 5", +#' table_subtitle = "(Ref: ERA5 1994-2016)", +#' fileout = 'test.png') +#' +#'@import kableExtra +#'@importFrom s2dv Reorder +#'@importFrom ClimProjDiags Subset +#'@importFrom CSTools MergeDims +#'@export +VizScorecard <- function(data, sign = NULL, row_dim = 'region', + subrow_dim = 'time', col_dim = 'metric', + subcol_dim = 'sdate', legend_dim = 'metric', + row_names = NULL, subrow_names = NULL, + col_names = NULL, subcol_names = NULL, + row_title = NULL, subrow_title = NULL, + col_title = NULL, table_title = NULL, + table_subtitle = NULL, legend_breaks = NULL, + plot_legend = TRUE, label_scale = 1.4, + legend_width = NULL, legend_height = 50, + palette = NULL, colorunder = NULL, colorsup = NULL, + round_decimal = 2, font_size = 1.1, + legend_white_space = 6, columns_width = 1.2, + col1_width = NULL, col2_width = NULL, + fileout = './scorecard.png') { + + # Input parameter checks + # Check data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (length(dim(data)) != 4) { + stop("Parameter 'data' must have four dimensions.") + } + dimnames <- names(dim(data)) + # Check sign + if (is.null(sign)) { + sign <- array(FALSE, dim = dim(data)) + } else { + if (!is.array(sign)) { + stop("Parameter 'sign' must be a boolean array or NULL.") + } + if (any(sort(names(dim(sign))) != sort(dimnames))) { + stop("Parameter 'sign' must have same dimensions as 'data'.") + } + if (typeof(sign) != 'logical') { + stop("Parameter 'sign' must be an array with logical values.") + } + } + # Check row_dim + if (!is.character(row_dim)) { + stop("Parameter 'row_dim' must be a character string.") + } + if (!row_dim %in% names(dim(data))) { + stop("Parameter 'row_dim' is not found in 'data' dimensions.") + } + # Check row_names + if (is.null(row_names)) { + row_names <- as.character(1:dim(data)[row_dim]) + } + if (length(row_names) != as.numeric(dim(data)[row_dim])) { + stop("Parameter 'row_names' must have the same length of dimension ", + "'row_dim'.") + } + # Check subrow_dim + if (!is.character(subrow_dim)) { + stop("Parameter 'subrow_dim' must be a character string.") + } + if (!subrow_dim %in% names(dim(data))) { + stop("Parameter 'subrow_dim' is not found in 'data' dimensions.") + } + # Check subrow_names + if (is.null(subrow_names)) { + subrow_names <- as.character(1:dim(data)[subrow_dim]) + } + if (length(subrow_names) != as.numeric(dim(data)[subrow_dim])) { + stop("Parameter 'subrow_names' must have the same length of dimension ", + "'subrow_dim'.") + } + # Check col_dim + if (!is.character(col_dim)) { + stop("Parameter 'col_dim' must be a character string.") + } + if (!col_dim %in% names(dim(data))) { + stop("Parameter 'col_dim' is not found in 'data' dimensions.") + } + # Check col_names + if (is.null(col_names)) { + col_names <- as.character(1:dim(data)[col_dim]) + } + if (length(col_names) != as.numeric(dim(data)[col_dim])) { + stop("Parameter 'col_names' must have the same length of dimension ", + "'col_dim'.") + } + # Check subcol_dim + if (!is.character(subcol_dim)) { + stop("Parameter 'subcol_dim' must be a character string.") + } + if (!subcol_dim %in% names(dim(data))) { + stop("Parameter 'subcol_dim' is not found in 'data' dimensions.") + } + # Check subcol_names + if (is.null(subcol_names)) { + subcol_names <- as.character(1:dim(data)[subcol_dim]) + } + if (length(subcol_names) != as.numeric(dim(data)[subcol_dim])) { + stop("Parameter 'subcol_names' must have the same length of dimension ", + "'subcol_dim'.") + } + # Check legend_dim + if (!is.character(legend_dim)) { + stop("Parameter 'legend_dim' must be a character string.") + } + if (!legend_dim %in% names(dim(data))) { + stop("Parameter 'legend_dim' is not found in 'data' dimensions.") + } + # Check row_title + if (is.null(row_title)) { + row_title <- "" + } else { + if (!is.character(row_title)) { + stop("Parameter 'row_title' must be a character string.") + } + } + # Check subrow_title + if (is.null(subrow_title)) { + subrow_title <- "" + } else { + if (!is.character(subrow_title)) { + stop("Parameter 'subrow_title' must be a character string.") + } + } + # Check col_title + if (is.null(col_title)) { + col_title <- "" + } else { + if (!is.character(col_title)) { + stop("Parameter 'col_title' must be a character string.") + } + } + # Check table_title + if (is.null(table_title)) { + table_title <- "" + } else { + if (!is.character(table_title)) { + stop("Parameter 'table_title' must be a character string.") + } + } + # Check table_subtitle + if (is.null(table_subtitle)) { + table_subtitle <- "" + } else { + if (!is.character(table_subtitle)) { + stop("Parameter 'table_subtitle' must be a character string.") + } + } + # Check legend_breaks + if (inherits(legend_breaks, 'list')) { + if (!(length(legend_breaks) == as.numeric(dim(data)[legend_dim]))) { + stop("Parameter 'legend_breaks' must be a list with the same number of ", + "elements as the length of the 'legend_dim' dimension in data.") + } + } else if (is.numeric(legend_breaks)) { + legend_breaks <- rep(list(legend_breaks), as.numeric(dim(data)[legend_dim])) + } else if (is.null(legend_breaks)) { + legend_breaks <- rep(list(seq(-1, 1, 0.2)), as.numeric(dim(data)[legend_dim])) + } else { + stop("Parameter 'legend_breaks' must be a numeric vector, a list or NULL.") + } + # Check plot_legend + if (!inherits(plot_legend, 'logical')) { + stop("Parameter 'plot_legend' must be a logical value.") + } + # Check label_scale + if (any(!is.numeric(label_scale), length(label_scale) != 1)) { + stop("Parameter 'label_scale' must be a numeric value of length 1.") + } + # Check legend_width + if (is.null(legend_width)) { + legend_width <- length(subcol_names) * 46.5 + } else if (any(!is.numeric(legend_width), length(legend_width) != 1)) { + stop("Parameter 'legend_width' must be a numeric value of length 1.") + } + # Check legend_height + if (any(!is.numeric(legend_height), length(legend_height) != 1)) { + stop("Parameter 'legend_height' must be a numeric value of length 1.") + } + # Check colour palette input + if (inherits(palette, 'list')) { + if (!(length(palette) == as.numeric(dim(data)[legend_dim]))) { + stop("Parameter 'palette' must be a list with the same number of ", + "elements as the length of the 'legend_dim' dimension in data.") + } + if (!all(sapply(palette, is.character))) { + stop("Parameter 'palette' must be a list of character vectors.") + } + } else if (is.character(palette)) { + palette <- rep(list(palette), as.numeric(dim(data)[legend_dim])) + } else if (is.null(palette)) { + n <- length(legend_breaks[[1]]) + if (n == 1) { + stop("Parameter 'legend_breaks' can't be of length 1.") + } else if (n == 2) { + colors <- c('#B35806') + } else if (n == 3) { + colors <- c('#8073AC', '#E08214') + } else if (n == 11) { + colors <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', + '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') + } else { + stop("Parameter 'palette' must be provided when 'legend_breaks' ", + "exceed the length of 11.") + } + palette <- rep(list(colors), as.numeric(dim(data)[legend_dim])) + } else { + stop("Parameter 'palette' must be a character vector, a list or NULL.") + } + # Check colorunder + if (is.null(colorunder)) { + colorunder <- rep("#04040E", as.numeric(dim(data)[legend_dim])) + } + if (!is.character(colorunder)) { + stop("Parameter 'colorunder' must be a character string vector.") + } + if (length(colorunder) == 1) { + colorunder <- rep(colorunder, as.numeric(dim(data)[legend_dim])) + } + if (length(colorunder) != as.numeric(dim(data)[legend_dim])) { + stop("Parameter 'colorunder' must be a list with the same number of ", + "elements as the length of the 'legend_dim' dimension in data.") + } + # Check colorsup + if (is.null(colorsup)) { + colorsup <- rep("#730C04", as.numeric(dim(data)[legend_dim])) + } + if (!is.character(colorsup)) { + stop("Parameter 'colorsup' must be a character string vector.") + } + if (length(colorsup) == 1) { + colorsup <- rep(colorsup, as.numeric(dim(data)[legend_dim])) + } + if (length(colorsup) != as.numeric(dim(data)[legend_dim])) { + stop("Parameter 'colorsup' must be a list with the same number of ", + "elements as the length of the 'legend_dim' dimension in data.") + } + # Check round_decimal + if (!is.numeric(round_decimal)) { + stop("Parameter 'round_decimal' must be a numeric value of length 1.") + } + # Check font_size + if (!is.numeric(font_size)) { + stop("Parameter 'font_size' must be a numeric value of length 1.") + } + # Check legend white space + if (!is.numeric(legend_white_space)) { + stop("Parameter 'legend_white_space' must be a numeric value of length 1.") + } + # columns_width + if (!is.numeric(columns_width)) { + stop("Parameter 'columns_width' must be a numeric value.") + } + # Check col1_width + if (is.null(col1_width)) { + if (max(nchar(row_names)) == 1) { + col1_width <- max(nchar(row_names)) + } else { + col1_width <- max(nchar(row_names))/4 + } + } else if (!is.numeric(col1_width)) { + stop("Parameter 'col1_width' must be a numeric value of length 1.") + } + # Check col2_width + if (is.null(col2_width)) { + if (max(nchar(subrow_names)) == 1 ) { + col2_width <- max(nchar(subrow_names)) + } else { + col2_width <- max(nchar(subrow_names))/4 + } + } else if (!is.numeric(col2_width)) { + stop("Parameter 'col2_width' must be a numeric value of length 1.") + } + + # Get dimensions of inputs + n_col_names <- length(col_names) + n_subcol_names <- length(subcol_names) + n_row_names <- length(row_names) + n_subrow_names <- length(subrow_names) + + # Define table size + n_rows <- n_row_names * n_subrow_names + n_columns <- 2 + (n_col_names * n_subcol_names) + + # Column names + row_names_table <- rep("", n_rows) + for (row in 1:n_row_names) { + row_names_table[floor(n_subrow_names/2) + (row - 1) * n_subrow_names] <- row_names[row] + } + + # Define scorecard table titles + column_titles <- c(row_title, subrow_title, rep(c(subcol_names), n_col_names)) + + # Round data + data <- round(data, round_decimal) + + # Define data inside the scorecards table + for (row in 1:n_row_names) { + table_temp <- data.frame(table_column_2 = as.character(subrow_names)) + for (col in 1:n_col_names) { + table_temp <- data.frame(table_temp, + Reorder(data = Subset(x = data, along = c(col_dim, row_dim), + indices = list(col, row), drop = 'selected'), + order = c(subrow_dim, subcol_dim))) + } + if (row == 1) { + table_data <- table_temp + } else { + table_data <- rbind(table_data, table_temp) + } + } + + # All data for plotting in table + table <- data.frame(table_column_1 = row_names_table, table_data) + table_temp <- array(unlist(table[3:n_columns]), dim = c(n_rows, n_columns - 2)) + + # Define colors to show in table + table_colors <- .ScorecardColors(table = table_temp, n_col = n_col_names, + n_subcol = n_subcol_names, n_row = n_row_names, + n_subrow = n_subrow_names, legend_breaks = legend_breaks, + palette = palette, colorunder = colorunder, + colorsup = colorsup) + metric_color <- table_colors$metric_color + metric_text_color <- table_colors$metric_text_color + # metric_text_bold <- table_colors$metric_text_bold + + # Remove temporary table + rm(table_temp) + + # Format values to underline in table + metric_underline <- MergeDims(sign, c(subcol_dim, col_dim), + rename_dim = 'col', na.rm = FALSE) + metric_underline <- MergeDims(metric_underline, c(subrow_dim, row_dim), + rename_dim = 'row', na.rm = FALSE) + metric_underline <- Reorder(metric_underline, c('row', 'col')) + + options(stringsAsFactors = FALSE) + title <- data.frame(c1 = table_title, c2 = n_columns) + subtitle <- data.frame(c1 = table_subtitle, c2 = n_columns) + header.names <- as.data.frame(data.frame(c1 = c("", col_names), + c2 = c(2, rep(n_subcol_names, n_col_names)))) + header.names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col_title, n_col_names))), + c2 = c(2, rep(n_subcol_names, n_col_names)))) + title.space <- data.frame(c1 = "\n", c2 = n_columns) + + # Hide NA values in table + options(knitr.kable.NA = '') + + # Create HTML table + table_html_part <- list() + table_html_part[[1]] <- kbl(table, escape = F, col_names = column_titles, align = rep("c", n_columns)) %>% + kable_paper("hover", full_width = FALSE, font_size = 14 * font_size) %>% + add_header_above(header = header.names2, font_size = 16 * font_size) %>% + add_header_above(header = title.space, font_size = 10 * font_size) %>% + add_header_above(header = header.names, font_size = 20 * font_size) %>% + add_header_above(header = title.space, font_size = 10 * font_size) %>% + add_header_above(header = subtitle, font_size = 16 * font_size, align = "left") %>% + add_header_above(header = title.space, font_size = 10 * font_size) %>% + add_header_above(header = title, font_size = 22 * font_size, align = "left") + + for (i in 1:n_col_names) { + for (j in 1:n_subcol_names) { + my_background <- metric_color[, (i - 1) * n_subcol_names + j] + my_text_color <- metric_text_color[, (i - 1) * n_subcol_names + j] + my_underline <- metric_underline[, (i - 1) * n_subcol_names + j] + # my_bold <- metric_text_bold[(i - 1) * n_subcol_names + j] + + table_html_part[[(i - 1) * n_subcol_names + j + 1]] <- + column_spec(table_html_part[[(i - 1) * n_subcol_names + j]], + 2 + n_subcol_names * (i - 1) + j, + background = my_background[1:n_rows], + color = my_text_color[1:n_rows], + underline = my_underline[1:n_rows], + bold = T) # strsplit(toString(bold), ', ')[[1]] + } + } + + # Define position of table borders + column_borders <- NULL + for (i in 1:n_col_names) { + column_spacing <- (n_subcol_names * i) + 2 + column_borders <- c(column_borders, column_spacing) + } + + n_last_list <- n_col_names * n_subcol_names + 1 + + table_html <- column_spec(table_html_part[[n_last_list]], 1, bold = TRUE, + width_min = paste0(col1_width, 'cm')) %>% + column_spec(2, bold = TRUE, width_min = paste0(col2_width, 'cm')) %>% + column_spec(3:n_columns, width_min = paste0(columns_width, 'cm')) %>% + column_spec(c(1, 2, column_borders), border_right = "2px solid black") %>% + column_spec(1, border_left = "2px solid black") %>% + column_spec(n_columns, border_right = "2px solid black") %>% + row_spec(seq(from = 0, to = n_subrow_names * n_row_names, by = n_subrow_names), + extra_css = "border-bottom: 2px solid black", hline_after = TRUE) + if (plot_legend == TRUE) { + # Save the scorecard (without legend) + save_kable(table_html, file = paste0(fileout, '_tmpScorecard.png'), vheight = 1) + + # White space for legend + legend_white_space <- 37.8 * legend_white_space # converting pixels to cm + + # Create and save color bar legend + .ScorecardLegend(legend_breaks = legend_breaks, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + label_scale = label_scale, + legend_width = legend_width, + legend_height = legend_height, + legend_white_space = legend_white_space, + fileout = fileout) + + # Add the legends below the scorecard table + system(paste0('convert -append ', fileout, '_tmpScorecard.png ', fileout, + '_tmpScorecardLegend.png ', fileout)) + # Remove temporary scorecard table + unlink(paste0(fileout, '_tmpScorecard*.png')) + } + if (plot_legend == FALSE) { + save_kable(table_html, file = fileout) + } +} + +# Scorecards function to create the color bar legends for the required metrics +# and paste them below the scorecard table +.ScorecardLegend <- function(legend_breaks, palette, colorunder, colorsup, + label_scale, legend_width, legend_height, + legend_white_space, fileout) { + + # Create color bar legends for each metric + for (i in 1:length(palette)) { + png(filename = paste0(fileout, '_tmpLegend', i, '.png'), width = legend_width, + height = legend_height) + ColorBarContinuous(brks = legend_breaks[[i]], cols = palette[[i]], vertical = FALSE, + label_scale = label_scale, col_inf = colorunder[[i]], + col_sup = colorsup[[i]]) + dev.off() + if (i == 1) { + # Add white space to the left of the first color bar legend + system(paste0('convert ', fileout, '_tmpLegend1.png -background white -splice ', + legend_white_space, 'x0 ', fileout, '_tmpScorecardLegend.png')) + } else { + system(paste0('convert +append ', fileout, '_tmpScorecardLegend.png ', + fileout, '_tmpLegend', i, '.png ', fileout, + '_tmpScorecardLegend.png')) + } + } + unlink(paste0(fileout,'_tmpLegend*.png')) +} + +# Scorecards function to assign background color of table cells, +# color of text in table and to bold the text. +# +# It will return a list with 2 arrays: +# (1) metric_color, A 2-dimensional array with character strings containing the +# color codes for each cell background. +# (2) metric_text_color, A 2-dimensional array with character strings +# containing the color codes for each cell text. +.ScorecardColors <- function(table, n_col, n_subcol, n_row, n_subrow, + legend_breaks, palette, colorunder, colorsup) { + # Define rows and columns + n_rows <- n_row * n_subrow + n_columns <- n_col * n_subcol + + # Set table background colors + metric_color <- array(colorunder, c(n_row * n_subrow, n_columns)) + metric_text_color <- array("#2A2A2A", c(n_row * n_subrow , n_columns)) + # metric_text_bold <- array(TRUE, c(n_row * n_subrow , n_columns - 2)) # Setting all values to bold + + # Define cell and text colors to show in table + for (i in 1:n_col) { + metric_int <- legend_breaks[[i]] + for (rr in 1:n_rows) { + for (j in 1:n_subcol) { + for (pp in 1:(length(metric_int) - 1)) { + if (is.na(table[rr, ((i - 1) * n_subcol + j)])) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- "gray" + } else { + if (table[rr, ((i - 1) * n_subcol + j)] >= + metric_int[pp] && table[rr, ((i - 1) * n_subcol + j)] <= + metric_int[pp + 1]) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- palette[[i]][pp] # palette[pp] + } + if (table[rr, ((i - 1) * n_subcol + j)] < metric_int[1]) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- colorunder[i] + } + if (table[rr,((i - 1) * n_subcol + j)] >= + metric_int[length(metric_int)]) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- colorsup[i] + } + } + # color text in white and bold if background is white or dark blue or dark red: + if (is.na(table[rr, ((i - 1) * n_subcol + j)]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == 1 && + table[rr, ((i - 1) * n_subcol + j)] < metric_int[2]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == 2 && + table[rr, ((i - 1) * n_subcol + j)] < metric_int[3]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == (length(metric_int) - 1) && + table[rr, ((i - 1) * n_subcol + j)] >= metric_int[length(metric_int) - 1]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == (length(metric_int) - 2) && + table[rr, ((i - 1) * n_subcol + j)] >= metric_int[length(metric_int) - 2])) { + metric_text_color[rr, ((i - 1) * n_subcol + j)] <- "white" + # metric_text_bold[rr,((i - 1) * n_subcol + j)] <- TRUE + } + } + } + } + } + return(list(metric_color = metric_color, + metric_text_color = metric_text_color)) +} \ No newline at end of file diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 889bdc1e..2e266bbf 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -10,7 +10,13 @@ source('modules/Scorecards/R/tmp/SCTransform.R') source('modules/Scorecards/R/tmp/ScorecardsSingle.R') source('modules/Scorecards/R/tmp/ScorecardsMulti.R') source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/SCPlotScorecard.R') +source('modules/Scorecards/R/tmp/VizScorecard.R') + +## Temporary for new ESviz function +source('modules/Scorecards/R/tmp/ColorBarContinuous.R') +source('modules/Scorecards/R/tmp/ClimPalette.R') +.IsColor <- s2dv:::.IsColor +.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs ## TODO: Change function name to 'Scorecards'? @@ -34,7 +40,7 @@ Scorecards <- function(recipe) { if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { - start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") @@ -51,7 +57,8 @@ Scorecards <- function(recipe) { metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - + ncores <- 1 # recipe$Analysis$ncores + if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ alpha <- 0.05 } else { @@ -70,28 +77,57 @@ Scorecards <- function(recipe) { na.rm <- recipe$Analysis$remove_NAs } - ncores <- 1 # recipe$Analysis$ncores - ## Parameters for scorecard layout + table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height - label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale - round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal - font.size <- recipe$Analysis$Workflow$Scorecards$font_size - + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ plot.legend <- TRUE } else { plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend } + if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ + columns.width <- 1.2 + } else { + columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ + legend.white.space <- 6 + } else { + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ + legend.height <- 50 + } else { + legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ + label.scale <- 1.4 + } else { + label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ + round.decimal <- 2 + } else { + round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ + font.size <- 1.1 + } else { + font.size <- recipe$Analysis$Workflow$Scorecards$font_size + } + ## Define if difference scorecard is to be plotted if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ calculate.diff <- FALSE -- GitLab From f309f09542bfb7ba7450a0319fb3062dc5e203dd Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 19 Jan 2024 12:56:51 +0100 Subject: [PATCH 20/43] VizScorecard function corrections --- modules/Scorecards/R/tmp/VizScorecard.R | 128 +++++++++++++----------- 1 file changed, 68 insertions(+), 60 deletions(-) diff --git a/modules/Scorecards/R/tmp/VizScorecard.R b/modules/Scorecards/R/tmp/VizScorecard.R index f3b50a6b..425b799f 100644 --- a/modules/Scorecards/R/tmp/VizScorecard.R +++ b/modules/Scorecards/R/tmp/VizScorecard.R @@ -59,18 +59,19 @@ #' list of vectors can be given as input if different colors are desired for #' the legend_dims. This parameter must be included even if the legend is #' not plotted, to define the colors in the scorecard table. -#'@param colorunder A character string or of vector of character strings -#' defining the colors to use for data values with are inferior to the lowest -#' breaks value. This parameter will also plot a inferior triangle in the -#' legend bar. The parameter can be set to NULL if there are no inferior values. -#' If a character string is given this color will be applied to all -#' 'legend_dims'. It is set as NULL by default. -#'@param colorsup A character string or of vector of character strings -#' defining the colors to use for data values with are superior to the highest -#' breaks value. This parameter will also plot a inferior triangle in the -#' legend bar. The parameter can be set to NULL if there are no superior values. -#' If a character string is given this color will be applied to all -#' legend_dims. It is set as NULL by default. +#'@param colorunder A character string, a vector of character strings or a +#' list with single character string elements defining the colors to use for +#' data values with are inferior to the lowest breaks value. This parameter +#' will also plot a inferior triangle in the legend bar. The parameter can be +#' set to NULL if there are no inferior values. If a character string is given +#' this color will be applied to all 'legend_dims'. It is set as NULL by +#' default. +#'@param colorsup A character string, a vector of character strings or a +#' list with single character string elements defining the colors to use for +#' data values with are superior to the highest breaks value. This parameter +#' will also plot a inferior triangle in the legend bar. The parameter can be +#' set to NULL if there are no superior values. If a character string is given +#' this color will be applied to all legend_dims. It is set as NULL by default. #'@param round_decimal A numeric indicating to which decimal point the data #' is to be displayed in the scorecard table. It is set as 2 by default. #'@param font_size A numeric indicating the font size on the scorecard table. @@ -103,6 +104,7 @@ #' fileout = 'test.png') #' #'@import kableExtra +#'@importFrom RColorBrewer brewer.pal #'@importFrom s2dv Reorder #'@importFrom ClimProjDiags Subset #'@importFrom CSTools MergeDims @@ -306,9 +308,11 @@ VizScorecard <- function(data, sign = NULL, row_dim = 'region', } else if (n == 11) { colors <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') - } else { + } else if (n > 11) { stop("Parameter 'palette' must be provided when 'legend_breaks' ", "exceed the length of 11.") + } else { + colors <- rev(brewer.pal(n-1, "PuOr")) } palette <- rep(list(colors), as.numeric(dim(data)[legend_dim])) } else { @@ -317,30 +321,34 @@ VizScorecard <- function(data, sign = NULL, row_dim = 'region', # Check colorunder if (is.null(colorunder)) { colorunder <- rep("#04040E", as.numeric(dim(data)[legend_dim])) - } - if (!is.character(colorunder)) { - stop("Parameter 'colorunder' must be a character string vector.") } if (length(colorunder) == 1) { colorunder <- rep(colorunder, as.numeric(dim(data)[legend_dim])) } if (length(colorunder) != as.numeric(dim(data)[legend_dim])) { - stop("Parameter 'colorunder' must be a list with the same number of ", - "elements as the length of the 'legend_dim' dimension in data.") + stop("Parameter 'colorunder' must be a character string vector or a list ", + "with the same number of elements as the length of the 'legend_dim' ", + "dimension in data.") + } + if (!is.character(unlist(colorunder))) { + stop("Parameter 'colorunder' must be a character string vector ", + "or a list of character string elements.") } # Check colorsup if (is.null(colorsup)) { colorsup <- rep("#730C04", as.numeric(dim(data)[legend_dim])) } - if (!is.character(colorsup)) { - stop("Parameter 'colorsup' must be a character string vector.") - } if (length(colorsup) == 1) { colorsup <- rep(colorsup, as.numeric(dim(data)[legend_dim])) } if (length(colorsup) != as.numeric(dim(data)[legend_dim])) { - stop("Parameter 'colorsup' must be a list with the same number of ", - "elements as the length of the 'legend_dim' dimension in data.") + stop("Parameter 'colorsup' must be a character string vector or a list ", + "with the same number of elements as the length of the 'legend_dim' ", + "dimension in data.") + } + if (!is.character(unlist(colorsup))) { + stop("Parameter 'colorsup' must be a character string vector ", + "or a list of character string elements.") } # Check round_decimal if (!is.numeric(round_decimal)) { @@ -444,25 +452,25 @@ VizScorecard <- function(data, sign = NULL, row_dim = 'region', options(stringsAsFactors = FALSE) title <- data.frame(c1 = table_title, c2 = n_columns) subtitle <- data.frame(c1 = table_subtitle, c2 = n_columns) - header.names <- as.data.frame(data.frame(c1 = c("", col_names), + header_names <- as.data.frame(data.frame(c1 = c("", col_names), c2 = c(2, rep(n_subcol_names, n_col_names)))) - header.names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col_title, n_col_names))), + header_names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col_title, n_col_names))), c2 = c(2, rep(n_subcol_names, n_col_names)))) - title.space <- data.frame(c1 = "\n", c2 = n_columns) + title_space <- data.frame(c1 = "\n", c2 = n_columns) # Hide NA values in table options(knitr.kable.NA = '') # Create HTML table table_html_part <- list() - table_html_part[[1]] <- kbl(table, escape = F, col_names = column_titles, align = rep("c", n_columns)) %>% + table_html_part[[1]] <- kbl(table, escape = F, col.names = column_titles, align = rep("c", n_columns)) %>% kable_paper("hover", full_width = FALSE, font_size = 14 * font_size) %>% - add_header_above(header = header.names2, font_size = 16 * font_size) %>% - add_header_above(header = title.space, font_size = 10 * font_size) %>% - add_header_above(header = header.names, font_size = 20 * font_size) %>% - add_header_above(header = title.space, font_size = 10 * font_size) %>% + add_header_above(header = header_names2, font_size = 16 * font_size) %>% + add_header_above(header = title_space, font_size = 10 * font_size) %>% + add_header_above(header = header_names, font_size = 20 * font_size) %>% + add_header_above(header = title_space, font_size = 10 * font_size) %>% add_header_above(header = subtitle, font_size = 16 * font_size, align = "left") %>% - add_header_above(header = title.space, font_size = 10 * font_size) %>% + add_header_above(header = title_space, font_size = 10 * font_size) %>% add_header_above(header = title, font_size = 22 * font_size, align = "left") for (i in 1:n_col_names) { @@ -529,33 +537,6 @@ VizScorecard <- function(data, sign = NULL, row_dim = 'region', } } -# Scorecards function to create the color bar legends for the required metrics -# and paste them below the scorecard table -.ScorecardLegend <- function(legend_breaks, palette, colorunder, colorsup, - label_scale, legend_width, legend_height, - legend_white_space, fileout) { - - # Create color bar legends for each metric - for (i in 1:length(palette)) { - png(filename = paste0(fileout, '_tmpLegend', i, '.png'), width = legend_width, - height = legend_height) - ColorBarContinuous(brks = legend_breaks[[i]], cols = palette[[i]], vertical = FALSE, - label_scale = label_scale, col_inf = colorunder[[i]], - col_sup = colorsup[[i]]) - dev.off() - if (i == 1) { - # Add white space to the left of the first color bar legend - system(paste0('convert ', fileout, '_tmpLegend1.png -background white -splice ', - legend_white_space, 'x0 ', fileout, '_tmpScorecardLegend.png')) - } else { - system(paste0('convert +append ', fileout, '_tmpScorecardLegend.png ', - fileout, '_tmpLegend', i, '.png ', fileout, - '_tmpScorecardLegend.png')) - } - } - unlink(paste0(fileout,'_tmpLegend*.png')) -} - # Scorecards function to assign background color of table cells, # color of text in table and to bold the text. # @@ -616,4 +597,31 @@ VizScorecard <- function(data, sign = NULL, row_dim = 'region', } return(list(metric_color = metric_color, metric_text_color = metric_text_color)) -} \ No newline at end of file +} + +# Scorecards function to create the color bar legends for the required metrics +# and paste them below the scorecard table +.ScorecardLegend <- function(legend_breaks, palette, colorunder, colorsup, + label_scale, legend_width, legend_height, + legend_white_space, fileout) { + + # Create color bar legends for each metric + for (i in 1:length(palette)) { + png(filename = paste0(fileout, '_tmpLegend', i, '.png'), width = legend_width, + height = legend_height) + ColorBarContinuous(brks = legend_breaks[[i]], cols = palette[[i]], vertical = FALSE, + label_scale = label_scale, col_inf = colorunder[[i]], + col_sup = colorsup[[i]]) + dev.off() + if (i == 1) { + # Add white space to the left of the first color bar legend + system(paste0('convert ', fileout, '_tmpLegend1.png -background white -splice ', + legend_white_space, 'x0 ', fileout, '_tmpScorecardLegend.png')) + } else { + system(paste0('convert +append ', fileout, '_tmpScorecardLegend.png ', + fileout, '_tmpLegend', i, '.png ', fileout, + '_tmpScorecardLegend.png')) + } + } + unlink(paste0(fileout,'_tmpLegend*.png')) +} -- GitLab From 8152ca0c1eea5709347d273a7dc5865bed6ab3f8 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 19 Jan 2024 13:03:57 +0100 Subject: [PATCH 21/43] remove testing parameters --- modules/Scorecards/Scorecards.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 2e266bbf..16de2a2d 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -24,9 +24,8 @@ source('modules/Scorecards/R/tmp/ClimPalette.R') Scorecards <- function(recipe) { ## Parameters for loading data files - input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp - skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") + skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) system <- recipe$Analysis$Datasets$System$name -- GitLab From cc2d92cd6403dab70c1dacf35aa319acf4ba5ce4 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 25 Jan 2024 16:39:50 +0100 Subject: [PATCH 22/43] code fixes to be done --- modules/Scorecards/R/tmp/LoadMetrics.R | 6 ++++++ modules/Scorecards/Scorecards.R | 30 +++++++++++++++----------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 55030d95..3ba9fa3c 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -50,6 +50,10 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, calib_method = NULL, syear = NULL) { + ## TO DO: + ## Include condition for region + ## Change lat lon attributes + ## Remove . from names system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) @@ -72,6 +76,8 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, forecast_months = forecast_months, metrics = metrics, calib_method = calib_method, syear = syear) + # attributes(met)$lon <- as.vector(attributes(met)$Variable$dat$lon) + # attributes(met)$lat <- as.vector(attributes(met)$Variable$dat$lat) ## Save metric data as array in reference list by_reference[[reference[ref]]] <- met ## Remove -Inf from crpss data if variable is precipitation diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 16de2a2d..75a22196 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -18,14 +18,19 @@ source('modules/Scorecards/R/tmp/ClimPalette.R') .IsColor <- s2dv:::.IsColor .FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs +## TO DO: +## Apply condition in SingleScorecards/MultiScorecards to only make transformed scorecard when sdate > ftime -## TODO: Change function name to 'Scorecards'? ## Define function Scorecards <- function(recipe) { ## Parameters for loading data files - skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") + + input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp + skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") + stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") + # skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + # stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) system <- recipe$Analysis$Datasets$System$name @@ -77,7 +82,6 @@ Scorecards <- function(recipe) { } ## Parameters for scorecard layout - table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width @@ -138,17 +142,19 @@ Scorecards <- function(recipe) { if(metric.aggregation == 'skill'){ ## Load data files - loaded_metrics <- LoadMetrics(system = system, + loaded_metrics <- LoadMetrics(input_path = skill.input.path, + system = system, reference = reference, var = var, - start.year = start.year, - end.year = end.year, metrics = metrics.visualize, ## metrics.load - start.months = start.months, - forecast.months = forecast.months, - inf.to.na = inf.to.na, - input.path = skill.input.path) - + period = period, + start_months = start.months, + forecast_months = forecast.months, + calib_method = calib.method, + syear = NULL, + inf_to_na = inf.to.na + ) + ## Spatial Aggregation of metrics if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ -- GitLab From bc3361602bfc0ee7d92a87dfb9601ce97f8c60a7 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 30 Jan 2024 17:26:35 +0100 Subject: [PATCH 23/43] included changes for skill aggregate --- modules/Scorecards/R/tmp/LoadMetrics.R | 13 +- modules/Scorecards/R/tmp/ScorecardsMulti.R | 254 ++++++++++++-------- modules/Scorecards/R/tmp/ScorecardsSingle.R | 141 ++++++----- modules/Scorecards/R/tmp/WeightedMetrics.R | 14 +- modules/Scorecards/Scorecards.R | 5 +- 5 files changed, 256 insertions(+), 171 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 3ba9fa3c..2cecd5c1 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -50,9 +50,7 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, calib_method = NULL, syear = NULL) { - ## TO DO: - ## Include condition for region - ## Change lat lon attributes + ## TO DO: Include condition for region ## Remove . from names system <- gsub('.','', system, fixed = T) @@ -76,8 +74,8 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, forecast_months = forecast_months, metrics = metrics, calib_method = calib_method, syear = syear) - # attributes(met)$lon <- as.vector(attributes(met)$Variable$dat$lon) - # attributes(met)$lat <- as.vector(attributes(met)$Variable$dat$lat) + attributes(met)$lon <- as.vector(attributes(met)$Variable$dat$lon) + attributes(met)$lat <- as.vector(attributes(met)$Variable$dat$lat) ## Save metric data as array in reference list by_reference[[reference[ref]]] <- met ## Remove -Inf from crpss data if variable is precipitation @@ -89,6 +87,11 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, all_metrics[[system[sys]]] <- by_reference } ## close loop on system + attributes(all_metrics)$metrics <- metrics + attributes(all_metrics)$start.months <- start.months + attributes(all_metrics)$forecast.months <- forecast.months + + return(all_metrics) } diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 020a2047..be3aab71 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -4,6 +4,8 @@ #' and references (types 9 to 12). #'@param data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. +#'@param sign is an array with the same dimensions as data indicting the +#' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the #' archive.yml format from verification suite #'@param reference a vector of character strings defining the references @@ -92,8 +94,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, ## Make sure data is in correct order for using in functions: data_order <- c('system','reference','metric','time','sdate','region') data <- Reorder(data, data_order) - sign <- Reorder(sign, data_order) - + ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics @@ -101,18 +102,28 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) attributes(data)$metrics <- metrics - sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) - attributes(sign)$metrics <- metrics + if(!is.null(sign)){ + sign <- Reorder(sign, data_order) + sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(sign)$metrics <- metrics + } ## Transform data for scorecards by forecast month (types 11 & 12) - transformed_data <- SCTransform(data = data, - sdate_dim = 'sdate', - ftime_dim = 'time') - - transformed_sign <- SCTransform(data = sign, - sdate_dim = 'sdate', - ftime_dim = 'time') + if(length(start.months) >= length(forecast.months)){ + + transformed_data <- SCTransform(data = data, + sdate_dim = 'sdate', + ftime_dim = 'time') + + if(!is.null(sign)){ + transformed_sign <- SCTransform(data = sign, + sdate_dim = 'sdate', + ftime_dim = 'time') + } else { + transformed_sign <- NULL + } + } ## Load configuration files if (is.null(recipe$Run$filesystem)) { @@ -231,10 +242,18 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, if(model == 'system'){ data_sc_9 <- Subset(data, c('reference','region'), list(1, reg), drop = 'selected') - sign_sc_9 <- Subset(sign, c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_9 <- Subset(sign, c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_9 <- NULL + } } else if(model == 'reference'){ data_sc_9 <- Subset(data, c('system','region'), list(1, reg), drop = 'selected') - sign_sc_9 <- Subset(sign, c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_9 <- Subset(sign, c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_9 <- NULL + } } VizScorecard(data = data_sc_9, @@ -281,10 +300,18 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, if(model == 'system'){ data_sc_10 <- Subset(Reorder(data, new_order), c('reference','region'), list(1, reg), drop = 'selected') - sign_sc_10 <- Subset(Reorder(sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_10 <- Subset(Reorder(sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_10 <- NULL + } } else if(model == 'reference'){ data_sc_10 <- Subset(Reorder(data, new_order), c('system','region'), list(1, reg), drop = 'selected') - sign_sc_10 <- Subset(Reorder(sign, new_order), c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_10 <- Subset(Reorder(sign, new_order), c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_10 <- NULL + } } VizScorecard(data = data_sc_10, sign = sign_sc_10, @@ -321,100 +348,121 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, #### Scorecard_type 11 #### ## (transformation only) - fileout <- .Filename(model = model, eval.name = eval.filename, var = var, - start.year = start.year, end.year = end.year, scorecard.type = 11, - region = sub(" ", "-", region.names[reg]), - fileout.label = fileout.label, output.path = output.path) + if(length(start.months) >= length(forecast.months)){ - if(model == 'system'){ - data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') - sign_sc_11 <- Subset(transformed_sign, c('reference','region'), list(1, reg), drop = 'selected') - } else if(model == 'reference'){ - data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') - sign_sc_11 <- Subset(transformed_sign, c('system','region'), list(1, reg), drop = 'selected') + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 11, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + + if(model == 'system'){ + data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_11 <- Subset(transformed_sign, c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_11 <- NULL + } + } else if(model == 'reference'){ + data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_11 <- Subset(transformed_sign, c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_11 <- NULL + } + } + + VizScorecard(data = data_sc_11, + sign = sign_sc_11, + row_dim = model, + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = model.name, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = table.model.name, + subrow_title = 'Forecast Month', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = 4, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) } - VizScorecard(data = data_sc_11, - sign = sign_sc_11, - row_dim = model, - subrow_dim = 'time', - col_dim = 'metric', - subcol_dim = 'sdate', - legend_dim = 'metric', - row_names = model.name, - subrow_names = forecast.months, - col_names = metric.names, - subcol_names = month.abb[as.numeric(start.months)], - table_title = table.title, - table_subtitle = table.subtitle, - row_title = table.model.name, - subrow_title = 'Forecast Month', - col_title = 'Target month', - legend_breaks = legend.breaks, - plot_legend = plot.legend, - label_scale = label.scale, - legend_width = legend.width, - legend_height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round_decimal = round.decimal, - font_size = font.size, - legend_white_space = legend.white.space, - col1_width = 4, - col2_width = col2.width, - columns_width = columns.width, - fileout = fileout) - - #### Scorecard_type 12 #### ## (transformation and reorder) - fileout <- .Filename(model = model, eval.name = eval.filename, var = var, - start.year = start.year, end.year = end.year, scorecard.type = 12, - region = sub(" ", "-", region.names[reg]), - fileout.label = fileout.label, output.path = output.path) - - new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') - - if(model == 'system'){ - data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') - sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') - } else if(model == 'reference'){ - data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') - sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('system','region'), list(1, reg), drop = 'selected') + if(length(start.months) >= length(forecast.months)){ + + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 12, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + + if(model == 'system'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_12 <- NULL + } + } else if(model == 'reference'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_12 <- NULL + } + } + + VizScorecard(data = data_sc_12, + sign = sign_sc_12, + row_dim = 'time', + subrow_dim = model, + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = model.name, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast Month', + subrow_title = table.model.name, + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = 4, + columns_width = columns.width, + fileout = fileout) } - - VizScorecard(data = data_sc_12, - sign = sign_sc_12, - row_dim = 'time', - subrow_dim = model, - col_dim = 'metric', - subcol_dim = 'sdate', - legend_dim = 'metric', - row_names = forecast.months, - subrow_names = model.name, - col_names = metric.names, - subcol_names = month.abb[as.numeric(start.months)], - table_title = table.title, - table_subtitle = table.subtitle, - row_title = 'Forecast Month', - subrow_title = table.model.name, - col_title = 'Target month', - legend_breaks = legend.breaks, - plot_legend = plot.legend, - label_scale = label.scale, - legend_width = legend.width, - legend_height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round_decimal = round.decimal, - font_size = font.size, - legend_white_space = legend.white.space, - col1_width = col1.width, - col2_width = 4, - columns_width = columns.width, - fileout = fileout) } ## close loop on region diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index c5525fd4..7c76c92c 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -107,8 +107,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## Make sure data is in correct order for using in functions: data_order <- c('system', 'reference', 'metric', 'time', 'sdate', 'region') data <- Reorder(data, data_order) - sign <- Reorder(sign, data_order) - + ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics @@ -116,17 +115,27 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) attributes(data)$metrics <- metrics - sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) - attributes(sign)$metrics <- metrics - - ## Transform data for scorecards by forecast month (types 3 & 4) - transformed_data <- SCTransform(data = data, - sdate_dim = 'sdate', - ftime_dim = 'time') + if(!is.null(sign)){ + sign <- Reorder(sign, data_order) + sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(sign)$metrics <- metrics + } - transformed_sign <- SCTransform(data = sign, - sdate_dim = 'sdate', - ftime_dim = 'time') + ## Transform data for scorecards by forecast month (types 3 & 4) + if(length(start.months) >= length(forecast.months)){ + + transformed_data <- SCTransform(data = data, + sdate_dim = 'sdate', + ftime_dim = 'time') + + if(!is.null(sign)){ + transformed_sign <- SCTransform(data = sign, + sdate_dim = 'sdate', + ftime_dim = 'time') + } else { + transformed_sign <- NULL + } + } ## Load configuration files if (is.null(recipe$Run$filesystem)) { @@ -226,7 +235,12 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, fileout.label = fileout.label, output.path = output.path) data_sc_1 <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') - sign_sc_1 <- Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + + if(!is.null(sign)){ + sign_sc_1 <- Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + } else { + sign_sc_1 <- NULL + } VizScorecard(data = data_sc_1, sign = sign_sc_1, @@ -272,8 +286,14 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, fileout.label = fileout.label, output.path = output.path) new_order <- c('metric', 'region', 'sdate', 'time') + data_sc_2 <- Reorder(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) - sign_sc_2 <- Reorder(Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + + if(!is.null(sign)){ + sign_sc_2 <- Reorder(Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + } else { + sign_sc_2 <- NULL + } VizScorecard(data = data_sc_2, sign = sign_sc_2, @@ -311,59 +331,72 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, #### Scorecard_type 3 #### ## (transformation only) - fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, - start.year = start.year, end.year = end.year, scorecard.type = 3, - fileout.label = fileout.label, output.path = output.path) - - data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') - sign_sc_3 <- Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected') - VizScorecard(data = data_sc_3, - sign = sign_sc_3, - row_dim = 'region', - subrow_dim = 'time', - col_dim = 'metric', - subcol_dim = 'sdate', - legend_dim = 'metric', - row_names = region.names, - subrow_names = forecast.months, - col_names = metric.names, - subcol_names = month.abb[as.numeric(start.months)], - table_title = table.title, - table_subtitle = table.subtitle, - row_title = 'Region', - subrow_title = 'Forecast Month', - col_title = 'Target month', - legend_breaks = legend.breaks, - plot_legend = plot.legend, - label_scale = label.scale, - legend_width = legend.width, - legend_height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round_decimal = round.decimal, - font_size = font.size, - legend_white_space = legend.white.space, - col1_width = col1.width, - col2_width = col2.width, - columns_width = columns.width, - fileout = fileout) - + if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, + start.year = start.year, end.year = end.year, scorecard.type = 3, + fileout.label = fileout.label, output.path = output.path) + + data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + + if(!is.null(sign)){ + sign_sc_3 <- Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + } else { + sign_sc_3 <- NULL + } + + VizScorecard(data = data_sc_3, + sign = sign_sc_3, + row_dim = 'region', + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = region.names, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Region', + subrow_title = 'Forecast Month', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) + } #### Scorecard_type 4 #### ## (transformation and reorder) ## Scorecard type 4 is same as type 3 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(data)['region'] > 1) { + if(dim(data)['region'] > 1 & length(start.months) >= length(forecast.months)){ fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) new_order <- c('metric', 'region', 'sdate', 'time') + data_sc_4 <- Reorder(Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) - sign_sc_4 <- Reorder(Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + + if(!is.null(sign)){ + sign_sc_4 <- Reorder(Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + } else { + sign_sc_4 + } VizScorecard(data = data_sc_4, sign = sign_sc_4, diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R index 7df20c71..41758386 100644 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ b/modules/Scorecards/R/tmp/WeightedMetrics.R @@ -53,9 +53,9 @@ WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, ## Get metric names ## TO DO: check all metric are in the same order for all sys - metrics <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + metrics <- attributes(loaded_metrics)$metrics + forecast.months <- attributes(loaded_metrics)$forecast.months + start.months <- attributes(loaded_metrics)$start.months all_metric_means <- array(dim = c(metric = length(metrics), time = length(forecast.months), @@ -83,18 +83,20 @@ WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, latdim = lat_dim_name, na.rm = na.rm, ncores = ncores) + all_metric_means[, , , reg, ref, sys] <- weighted.mean + } ## close loop on region } ## close loop on reference } ## close loop on system - + ## reorder dimensions in array all_metric_means <- s2dv::Reorder(all_metric_means, c('system','reference','metric','time','sdate','region')) ## Add attributes attributes(all_metric_means)$metrics <- metrics - attributes(all_metric_means)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(all_metric_means)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(all_metric_means)$start.months <- start.months + attributes(all_metric_means)$forecast.months <- forecast.months attributes(all_metric_means)$regions <- regions attributes(all_metric_means)$system.name <- names(loaded_metrics) attributes(all_metric_means)$reference.name <- names(loaded_metrics[[1]]) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 75a22196..93ad7ed9 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -18,9 +18,6 @@ source('modules/Scorecards/R/tmp/ClimPalette.R') .IsColor <- s2dv:::.IsColor .FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs -## TO DO: -## Apply condition in SingleScorecards/MultiScorecards to only make transformed scorecard when sdate > ftime - ## Define function Scorecards <- function(recipe) { @@ -194,6 +191,8 @@ Scorecards <- function(recipe) { metric.aggregation = metric.aggregation, ncores = ncores) } ## close if on region + metrics_significance <- NULL + } ## close if on skill ###### SCORE AGGREGATION ###### -- GitLab From 0e0c64628debf23776422c5aecc11c0d83030807 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 1 Feb 2024 18:05:32 +0100 Subject: [PATCH 24/43] working progress --- modules/Scorecards/R/tmp/LoadMetrics.R | 21 ++++++++++++++++++++- modules/Scorecards/Scorecards.R | 10 +++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 2cecd5c1..b4b91128 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -72,7 +72,8 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, period = period, start_months = start_months, forecast_months = forecast_months, - metrics = metrics, calib_method = calib_method, + metrics = metrics, + calib_method = calib_method, syear = syear) attributes(met)$lon <- as.vector(attributes(met)$Variable$dat$lon) attributes(met)$lat <- as.vector(attributes(met)$Variable$dat$lat) @@ -129,5 +130,23 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, # (3): Call startR loaded_metrics_start <- do.call(Start, args = args) + + ## If data to load is region not lat lon + # if ("region" %in% rownames(allfiledims)) { + # file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) + # region <- ncdf4::ncatt_get(file_for_att, 'region') + # ncdf4::nc_close(file_for_att) + # attributes(array_met_by_sdate)$region <- region + # } else { + # lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', + # unlist = T, drop_var_dim = T) + # lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', + # unlist = T, drop_var_dim = T) + # attributes(array_met_by_sdate)$lon <- lon + # attributes(array_met_by_sdate)$lat <- lat + # } + + + return(loaded_metrics_start) } diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 93ad7ed9..50653c78 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -23,11 +23,11 @@ Scorecards <- function(recipe) { ## Parameters for loading data files - input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp - skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") - # skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") - # stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") + # input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp + # skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") + # stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") + skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) system <- recipe$Analysis$Datasets$System$name -- GitLab From 218a2d8338e90ee78faa37a3a56f61d42aa2f1f0 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 9 Feb 2024 16:30:04 +0100 Subject: [PATCH 25/43] corrected LoadMetrics including region condition --- modules/Scorecards/R/tmp/LoadMetrics.R | 241 ++++++++----- modules/Scorecards/R/tmp/WeightedMetrics.R | 7 +- modules/Scorecards/Scorecards.R | 379 +++++++++------------ 3 files changed, 323 insertions(+), 304 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index b4b91128..4f803c5a 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -1,15 +1,7 @@ -#'Scorecards load metrics from verification suite +#' Scorecards load metrics from verification suite #' -#'Scorecards function to load saved data files. This function is created to -#'load NetCDF files that contain 1 metric for each file. It calls startR::Start -#'internally. The NetCDF file is supposed to have the following dimensions: -#''longitude', 'latitude' and 'time'. If 'syear' is also a dimension, you must -#'set the parameter 'syear' different to NULL. +#'@description Scorecards function to load saved data files #' -#'@param input_path A character string indicating the path where metrics output -#' files from verification suite are saved (or any other compatible files) -#'@param metrics A vector of character strings indicating the metrics name to -#' be loaded. #'@param system A vector of character strings defining the names of the #' system names following the archive.yml format from verification suite. #' Accepted system names: 'ECMWF-SEAS5', 'DWD-GFCS2.1', 'CMCC-SPS3.5', @@ -23,40 +15,97 @@ #'@param period A character string indicating the start and end years of the #' reference period (e.g. '1993-203') #'@param start_months A vector indicating the numbers of the start months -#'@param forecast_months A vector indicating the numbers of the forecast months -#'@param syear Used when the NetCDF file contains a metric that has the -#' dimension 'syear'. It can be any value. If there is no 'syear' dimension, -#' you must set it to NULL. It is NULL by default. +#'@param input_path A character string indicating the path where metrics output +#' files from verification suite are saved (or any other compatible files) #' #'@return A is a list by system and reference containing an array of with #' the following dimensions: longitude, latitude, forecast months, metrics, #' start dates. + #'@examples #'\dontrun{ #'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), -#' reference = 'ERA5', +#' reference. = 'ERA5', #' var = 'tas', -#' period = '1993-2016', +#' period = '1993-2016' #' metrics = c('mean_bias', 'enscorr', 'rpss', 'crpss', 'enssprerr'), #' start_months = sprintf("%02d", 1:12), -#' forecast_months = 1:6, +#' calib_method = 'raw', #' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') #'} #'@import easyNCDF #'@import multiApply -LoadMetrics <- function(input_path, metrics, var, system, reference, - period = NULL, start_months = NULL, - forecast_months = NULL, inf_to_na = FALSE, - calib_method = NULL, syear = NULL) { +#'@export + +system <- 'ECMWF-SEAS5' +reference <- 'ERA5' +var <- 'tas' +period <- '1993-2016' +metrics <- 'rps_syear' +start_months <- 1:2 +input_path <- '/esarchive/scratch/nmilders/scorecards_data/syear/testing/Skill/' +calib_method <- 'raw' +syear <- TRUE +LoadMetrics <- function(input_path, system, reference, var, period, + metrics, start_months, calib_method = NULL, + inf_to_na = FALSE) { + + # Initial checks + ## system + if (!is.character(system)) { + stop("Parameter 'system' must be a character vector with the system names.") + } + ## reference + if (!is.character(reference)) { + stop("Parameter 'reference' must be a character vector with the reference ", + "names.") + } + ## var + if (!is.character(var)) { + stop("Parameter 'var' must be a character vector with the var ", + "names.") + } + if (length(var) > 1) { + warning("Parameter 'var' must be of length one. Only the first value ", + "will be used.") + var <- var[1] + } + ## metrics + if (!is.character(metrics)) { + stop("Parameter 'metrics' cannot be NULL.") + } + ## start_months + if (is.character(start_months)) { + warning("Parameter 'start_months' must be a numeric vector indicating ", + "the starting months.") + start_months <- as.numeric(start_months) + } + if (!is.numeric(start_months)) { + stop("Parameter 'start_months' must be a numeric vector indicating ", + "the starting months.") + } + start_months <- sprintf("%02d", start_months) + ## Check if sdates are continuous or discrete + if (all(diff(as.numeric(start_months)) == 1)) { + consecutive_start_months <- TRUE + } else { + consecutive_start_months <- FALSE + } + ## input_path + if (!is.character(input_path)) { + stop("Parameter 'input_path must be a character string.") + } + if (length(input_path) > 1) { + input_path <- input_path[1] + warning("Parameter 'input_path' has length greater than 1 and only the ", + "first element will be used.") + } - ## TO DO: Include condition for region - ## Remove . from names system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - ## Define empty list to saved data all_metrics <- sapply(system, function(x) NULL) ## Load data for each system for (sys in 1:length(system)) { @@ -65,23 +114,29 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, ## Load data for each reference for (ref in 1:length(reference)) { ## Call function to load metrics data - met <- .loadmetrics(input_path = input_path, # recipe$Run$output, - system = system[sys], - reference = reference[ref], - var = var, - period = period, - start_months = start_months, - forecast_months = forecast_months, - metrics = metrics, - calib_method = calib_method, - syear = syear) - attributes(met)$lon <- as.vector(attributes(met)$Variable$dat$lon) - attributes(met)$lat <- as.vector(attributes(met)$Variable$dat$lat) + met_by_smonth <- NULL + for (met in metrics) { + result <- .loadmetrics(input_path = input_path, + system = system[sys], + reference = reference[ref], + var = var, + period = period, + start_months = start_months, + calib_method = calib_method, + metric = met) + + result_attr <- attributes(result) + met_by_smonth <- abind::abind(met_by_smonth, result, along = length(dim(result)) + 1) + } + attributes(met_by_smonth) <- result_attr[-1] + # names(dim(met_by_smonth)) <- c(names(result_attr$dim), 'metric') + + dim(met_by_smonth) <- c(dim(result), metric = length(metrics)) ## Save metric data as array in reference list - by_reference[[reference[ref]]] <- met + by_reference[[reference[ref]]] <- met_by_smonth ## Remove -Inf from crpss data if variable is precipitation if (inf_to_na) { - by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA + by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA } } ## close loop on reference ## Save reference data in list of system @@ -89,64 +144,76 @@ LoadMetrics <- function(input_path, metrics, var, system, reference, } ## close loop on system attributes(all_metrics)$metrics <- metrics - attributes(all_metrics)$start.months <- start.months - attributes(all_metrics)$forecast.months <- forecast.months - + attributes(all_metrics)$start_months <- start_months return(all_metrics) -} +} ## close function -.loadmetrics <- function(input_path, metrics, var, system = NULL, - reference = NULL, period = NULL, start_months = NULL, - forecast_months = NULL, calib_method = NULL, - syear = NULL) { - # (1) Create the path to the files - directory_path <- paste0(input_path, system, "/", reference, "/", - calib_method, "/", var, "/") - file_path <- paste0(directory_path, "scorecards_", system, "_", reference, - "_", var, "_$var$_", period, "_s", "$smonths$", ".nc") +########################################################### - # (2) Create the startR arguments list - if (is.null(forecast_months)) forecast_months <- 'all' - if (is.null(start_months)) start_months <- 'all' +.loadmetrics <- function(input_path, system, reference, + var, period, start_months, + calib_method, metric) { - args = list(dat = file_path, - var = metrics, - longitude = 'all', - latitude = 'all', - time = forecast_months, - syear = "all", - smonths = start_months, - return_vars = list(longitude = 'dat', - latitude = 'dat', - time = NULL), - metadata_dims = c('dat', 'var'), - retrieve = TRUE) + ## Load data for each start date + allfiles <- sapply(start_months, function(m) { + paste0(input_path, "/", system, "/", reference, "/", calib_method, "/", + var, "/scorecards_", system, "_", reference, "_", + var, "_", metric, "_", period, "_s", m, # mod.pressure, + ".nc")}) + allfiles_exist <- sapply(allfiles, file.exists) - if (is.null(syear)) { - args$syear <- NULL + # Check dims + files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] + allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) + if (length(files_exist_by_month) == 0) { + stop("No files are found.") } - - # (3): Call startR - loaded_metrics_start <- do.call(Start, args = args) - - ## If data to load is region not lat lon - # if ("region" %in% rownames(allfiledims)) { - # file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) - # region <- ncdf4::ncatt_get(file_for_att, 'region') - # ncdf4::nc_close(file_for_att) - # attributes(array_met_by_sdate)$region <- region - # } else { - # lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', - # unlist = T, drop_var_dim = T) - # lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', - # unlist = T, drop_var_dim = T) - # attributes(array_met_by_sdate)$lon <- lon - # attributes(array_met_by_sdate)$lat <- lat - # } + num_dims <- numeric(dim(allfiledims)[1]) + for (i in 1:dim(allfiledims)[1]) { + if (length(unique(allfiledims[i,])) > 1) { + warning(paste0("Dimensions of system ", system," with var ", var, + " don't match.")) + } + num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension + } + # dims: [metric, longitude, latitude, time, smonth] + # or [metric, region, time, smonth] + # Loop for file + dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) + array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { + if (file.exists(x)) { + res <- easyNCDF::NcToArray(x, vars_to_read = metric, unlist = T, + drop_var_dim = T) + names(dim(res)) <- NULL + } else { + res <- array(dim = c(length(metrics), allfiledims[-1,1])) + names(dim(res)) <- NULL + } + res})$output1 - return(loaded_metrics_start) + dim(array_met_by_sdate) <- c(allfiledims[-1,1], + sdate = length(allfiles)) + # Attributes + # Read attributes from the first existing file + if ("region" %in% rownames(allfiledims)) { + file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) + region <- ncdf4::ncatt_get(file_for_att, 'region') + ncdf4::nc_close(file_for_att) + attributes(array_met_by_sdate)$region <- region + } else { + lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', + unlist = T, drop_var_dim = T) + lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', + unlist = T, drop_var_dim = T) + attributes(array_met_by_sdate)$lon <- lon + attributes(array_met_by_sdate)$lat <- lat + } + + return(array_met_by_sdate) } + + \ No newline at end of file diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R index 41758386..9d1630c4 100644 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ b/modules/Scorecards/R/tmp/WeightedMetrics.R @@ -27,8 +27,8 @@ #'@importFrom ClimProjDiags WeightedMean #'@importFrom s2dv Reorder #'@export -WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, - ncores = NULL, na.rm = TRUE) { +WeightedMetrics <- function(loaded_metrics, regions, forecast.months, + metric.aggregation, ncores = NULL, na.rm = TRUE) { ## Initial checks # loaded_metrics if (any(sapply(loaded_metrics, function(x) { @@ -54,8 +54,7 @@ WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, ## Get metric names ## TO DO: check all metric are in the same order for all sys metrics <- attributes(loaded_metrics)$metrics - forecast.months <- attributes(loaded_metrics)$forecast.months - start.months <- attributes(loaded_metrics)$start.months + start.months <- attributes(loaded_metrics)$start_months all_metric_means <- array(dim = c(metric = length(metrics), time = length(forecast.months), diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 50653c78..e3d1abd8 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -22,10 +22,6 @@ source('modules/Scorecards/R/tmp/ClimPalette.R') Scorecards <- function(recipe) { ## Parameters for loading data files - - # input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp - # skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") - # stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") @@ -146,9 +142,7 @@ Scorecards <- function(recipe) { metrics = metrics.visualize, ## metrics.load period = period, start_months = start.months, - forecast_months = forecast.months, calib_method = calib.method, - syear = NULL, inf_to_na = inf.to.na ) @@ -156,9 +150,9 @@ Scorecards <- function(recipe) { if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months + metrics.dim <- attributes(loaded_metrics)$metrics + forecast.months.dim <- forecast.months + start.months.dim <- attributes(loaded_metrics)$start_months regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) aggregated_metrics <- array(dim = c(system = length(loaded_metrics), @@ -177,8 +171,8 @@ Scorecards <- function(recipe) { ## Add attributes attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics)$start_months + attributes(aggregated_metrics)$forecast.months <- forecast.months attributes(aggregated_metrics)$regions <- regions attributes(aggregated_metrics)$system.name <- names(loaded_metrics) attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) @@ -188,6 +182,7 @@ Scorecards <- function(recipe) { ## Calculate weighted mean of spatial aggregation aggregated_metrics <- WeightedMetrics(loaded_metrics, regions = regions, + forecast.months = forecast.months, metric.aggregation = metric.aggregation, ncores = ncores) } ## close if on region @@ -220,11 +215,9 @@ Scorecards <- function(recipe) { region = length(regions), metric = length(metrics.visualize))) - + ## Load and aggregated data for each metric for (sys in 1:length(system)){ - # sys_num <- which(system == sys) for (ref in 1:length(reference)){ - # ref_num <- which(refence == ref) for (met in metrics.visualize) { if(met == 'rpss'){ @@ -232,69 +225,61 @@ Scorecards <- function(recipe) { rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_syear', - calib_method = calib.method, syear = TRUE) + calib_method = calib.method, metric = 'rps_syear') rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'rps_clim_syear') ## Spatially aggregate data - rps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') + rps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_syear)$lon), + lat = as.vector(attributes(rps_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + rps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_clim_syear)$lon), + lat = as.vector(attributes(rps_clim_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') ## Include name of region dimension - names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' - names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' + names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' + names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + ## Temporally aggregate data - rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, + rps_syear <- Apply(data = rps_syear, target_dims = time_dim, fun = 'mean', ncores = ncores)$output1 - rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, + rps_clim_syear <- Apply(data = rps_clim_syear, target_dims = time_dim, fun = 'mean', ncores = ncores)$output1 ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - + rpss <- 1 - rps_syear / rps_clim_syear + ## Save metric result in arrays - aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'sdate','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'sdate','region')) } ## close if on rpss @@ -304,69 +289,61 @@ Scorecards <- function(recipe) { crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_syear', - calib_method = calib.method, syear = TRUE) - + calib_method = calib.method, metric = 'crps_syear') + crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - + calib_method = calib.method, metric = 'crps_clim_syear') + ## Spatially aggregate data - crps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') + crps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_syear)$lon), + lat = as.vector(attributes(crps_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_clim_syear)$lon), + lat = as.vector(attributes(crps_clim_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') ## Include name of region dimension - names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' - names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' + names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' + names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign ## Temporally aggregate data - crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, + crps_syear <- Apply(data = crps_syear, target_dims = time_dim, fun = 'mean', ncores = ncores)$output1 - crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, + crps_clim_syear <- Apply(data = crps_clim_syear, target_dims = time_dim, fun = 'mean', ncores = ncores)$output1 ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign + crpss <- 1 - crps_syear / crps_clim_syear ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'sdate','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'sdate','region')) } ## close if on crpss @@ -375,106 +352,93 @@ Scorecards <- function(recipe) { cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'cov', - calib_method = calib.method, syear = NULL) + calib_method = calib.method, metric = 'cov') std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_hcst', - calib_method = calib.method, syear = NULL) - + calib_method = calib.method, metric = 'std_hcst') + std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_obs', - calib_method = calib.method, syear = NULL) - + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'std_obs') + n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'n_eff', - calib_method = calib.method, syear = NULL) - + calib_method = calib.method, metric = 'n_eff') + ## Calculate spatial aggregation - cov_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = cov, - region = regions[[X]], - lon = as.vector(attributes(cov)$Variables$dat1$longitude), - lat = as.vector(attributes(cov)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') + cov <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = cov, + region = regions[[X]], + lon = as.vector(attributes(cov)$lon), + lat = as.vector(attributes(cov)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') ## Include name of region dimension - names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' + names(dim(cov))[length(dim(cov))] <- 'region' - std_hcst_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_hcst, - region = regions[[X]], - lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), - lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' - - std_obs_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_obs, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' - - n_eff_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = n_eff, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' - n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - + std_hcst <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_hcst, + region = regions[[X]], + lon = as.vector(attributes(std_hcst)$lon), + lat = as.vector(attributes(std_hcst)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' + + std_obs <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_obs, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$lon), + lat = as.vector(attributes(std_obs)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_obs))[length(dim(std_obs))] <- 'region' + + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = n_eff, + region = regions[[X]], + lon = as.vector(attributes(n_eff)$lon), + lat = as.vector(attributes(n_eff)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + ## Calculate correlation - enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) + enscorr <- cov / (std_hcst * std_obs) - ## Drop unwanted dimensions - enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - ## Calculate significance of corr - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) sign_corr<- array(data = NA, dim = c(time = length(forecast.months), - smonths = length(start.months), + sdate = length(start.months), region = length(regions))) - for (time in 1:dim(sign_corr)[['time']]){ - for (mon in 1:dim(sign_corr)[['smonths']]){ + for (mon in 1:dim(sign_corr)[['sdate']]){ for (reg in 1:dim(sign_corr)[['region']]){ if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE @@ -483,14 +447,13 @@ Scorecards <- function(recipe) { } else { sign_corr[time, mon, reg] <- FALSE } - } } } ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'sdate','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'sdate','region')) } ## close if on enscorr @@ -499,29 +462,24 @@ Scorecards <- function(recipe) { mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'mean_bias', - calib_method = calib.method, syear = NULL) + calib_method = calib.method, metric = 'mean_bias') ## Calculate spatial aggregation - mean_bias_spatial_aggr <- sapply(X = 1:length(regions), + mean_bias <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = mean_bias, region = regions[[X]], - lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), - lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), + lon = as.vector(attributes(mean_bias)$lon), + lat = as.vector(attributes(mean_bias)$lat), londim = lon_dim, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'sdate','region')) } ## close on mean_bias @@ -530,29 +488,24 @@ Scorecards <- function(recipe) { enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], reference = reference[ref], var = var, period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'enssprerr', - calib_method = calib.method, syear = NULL) + calib_method = calib.method, metric = 'enssprerr') ## Calculate spatial aggregation - enssprerr_spatial_aggr <- sapply(X = 1:length(regions), + enssprerr <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = enssprerr, region = regions[[X]], - lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), - lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), + lon = as.vector(attributes(enssprerr)$lon), + lat = as.vector(attributes(enssprerr)$lat), londim = lon_dim, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - + names(dim(enssprerr))[length(dim(enssprerr))] <- 'region' + ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'sdate','region')) } ## close on enssprerr @@ -560,10 +513,10 @@ Scorecards <- function(recipe) { } ## close if on reference } ## close if on system - #Include metric attributes + ## Include metric attributes attributes(aggregated_metrics)$metrics <- metrics.visualize - ## set NAs to False + ## Set NAs to False metrics_significance[is.na(metrics_significance)] <- FALSE } ## close if on score -- GitLab From 691fc577ec0f04c80bb251ffb43488aa26da5480 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 14 Feb 2024 16:54:20 +0100 Subject: [PATCH 26/43] including statistics in plot_skill_metrics.R --- modules/Statistics/Statistics.R | 12 ++---- modules/Visualization/R/plot_skill_metrics.R | 40 +++++++++++++++++++- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 777f5c78..640c7b58 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -11,17 +11,13 @@ compute_statistics <- function(recipe, data, agg = 'global'){ memb_dim <- 'ensemble' ncores <- recipe$Analysis$ncores - # ## Remove unwanted dimensions + ## Remove unwanted dimensions obs_data <- Subset(data$obs$data, along = c('dat', 'sday', 'sweek', 'ensemble'), indices = list(1,1,1,1) , drop = 'selected') hcst_data <- Subset(data$hcst$data, along = c('dat', 'sday', 'sweek'), indices = list(1,1,1) , drop = 'selected') ## Repeat ensemble members for obs obs_data_ens <- InsertDim(data = obs_data, pos = length(dim(obs_data))+1, lendim = 25, name = 'ensemble') - - # obs_data <- adrop(data$obs$data, drop = 9) - # obs_data_ens <- InsertDim(data = obs_data, pos = 9, lendim = 25, name = 'ensemble') - - + statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) statistics_metrics <- list() @@ -94,8 +90,8 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics - save_metrics_scorecards(recipe = recipe, skill = statistics_metrics, ## Not able to save data with these dimensions - data_cube = data$hcst, agg = agg) ## The length of parameter 'order' should be the same with the dimension length of parameter 'data'. + save_metrics_scorecards(recipe = recipe, skill = statistics_metrics, + data_cube = data$hcst, agg = agg) } # Return results diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index 2698d499..6fed8aed 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -58,13 +58,16 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", "enscorr_specs", "rmsss", "msss") scores <- c("rps", "frps", "crps", "frps_specs", "mse") + statistics <- c("cov", "std_hcst", "std_obs") + # Loop over variables and assign colorbar and plot parameters to each metric for (var in 1:data_cube$dims[['var']]) { + var_name <- data_cube$attrs$Variable$varName[[var]] ## Need to include for statistics plotting to work var_skill <- lapply(skill_metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + for (name in c(skill_scores, scores, statistics, "mean_bias", "enssprerr")) { if (name %in% names(skill_metrics)) { units <- NULL # Define plot characteristics and metric name to display in plot @@ -120,7 +123,42 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } else if (name %in% "cov") { + skill <- var_skill[[name]] + display_name <- "Covariance" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- paste0(data_cube$attrs$Variable$metadata[[var_name]]$units, "²") + } else if (name %in% "std_hcst") { + skill <- var_skill[[name]] + display_name <- "Hindcast Standard Deviation" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } else if (name %in% "std_obs") { + skill <- var_skill[[name]] + display_name <- "Observation Standard Deviation" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- data_cube$attrs$Variable$metadata[[var_name]]$units } + + # Reorder dimensions skill <- Reorder(skill, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, -- GitLab From 13b8e035404d4b9985e29e5a3363235efeeed869 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 16 Feb 2024 17:38:44 +0100 Subject: [PATCH 27/43] corrected statistics calculations --- modules/Scorecards/Scorecards.R | 2 +- modules/Statistics/Statistics.R | 28 +++++++++++++--------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index e3d1abd8..37aa421c 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -139,7 +139,7 @@ Scorecards <- function(recipe) { system = system, reference = reference, var = var, - metrics = metrics.visualize, ## metrics.load + metrics = metrics.visualize, period = period, start_months = start.months, calib_method = calib.method, diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 640c7b58..3eff08e2 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -8,26 +8,23 @@ compute_statistics <- function(recipe, data, agg = 'global'){ # recipe: auto-s2s recipe as provided by read_yaml time_dim <- 'syear' - memb_dim <- 'ensemble' ncores <- recipe$Analysis$ncores - - ## Remove unwanted dimensions - obs_data <- Subset(data$obs$data, along = c('dat', 'sday', 'sweek', 'ensemble'), indices = list(1,1,1,1) , drop = 'selected') - hcst_data <- Subset(data$hcst$data, along = c('dat', 'sday', 'sweek'), indices = list(1,1,1) , drop = 'selected') - ## Repeat ensemble members for obs - obs_data_ens <- InsertDim(data = obs_data, pos = length(dim(obs_data))+1, lendim = 25, name = 'ensemble') - + ## Calculate ensemble mean + hcst_data <- Apply(data$hcst$data, target_dims = 'ensemble', fun = 'mean')$output1 + obs_data <- Apply(data$obs$data, target_dims = 'ensemble', fun = 'mean')$output1 + statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) statistics_metrics <- list() for (stat in strsplit(statistics_list, ", | |,")[[1]]) { - # Whether the fair version of the metric is to be computed + if (stat %in% c('cov', 'covariance')) { - covariance <- Apply(data = list(x = obs_data_ens, y = hcst_data), - target_dims = c(time_dim, memb_dim), + ## Calculate covariance + covariance <- Apply(data = list(x = obs_data, y = hcst_data), + target_dims = time_dim, fun = function(x,y){cov(as.vector(x),as.vector(y), use = "everything", method = "pearson")})$output1 @@ -39,9 +36,9 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (stat %in% c('std', 'standard_deviation')) { - ## Calculate standard deviation + ## Calculate standard deviation std_hcst <- Apply(data = hcst_data, - target_dims = c(time_dim, memb_dim), + target_dims = c(time_dim), fun = 'sd')$output1 std_obs <- Apply(data = obs_data, @@ -55,9 +52,9 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (stat %in% c('var', 'variance')) { - ## Calculate standard deviation + ## Calculate variance var_hcst <- (Apply(data = hcst_data, - target_dims = c(time_dim, memb_dim), + target_dims = c(time_dim), fun = 'sd')$output1)^2 var_obs <- (Apply(data = obs_data, @@ -71,6 +68,7 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (stat == 'n_eff') { + ## Calculate degrees of freedom n_eff <- s2dv::Eno(data = obs_data, time_dim = time_dim, na.action = na.pass, ncores = ncores) statistics_metrics[['n_eff']] <- n_eff -- GitLab From 43e346f49bcb6b7c6eb853486e0f16d80e599817 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 16 Feb 2024 18:38:04 +0100 Subject: [PATCH 28/43] including statistics plotting from recipe --- modules/Statistics/Statistics.R | 24 ++++++++++++++---------- modules/Visualization/Visualization.R | 12 ++++++++++++ tools/check_recipe.R | 2 +- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 3eff08e2..4bfa107f 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -9,14 +9,18 @@ compute_statistics <- function(recipe, data, agg = 'global'){ time_dim <- 'syear' ncores <- recipe$Analysis$ncores - + ## Calculate ensemble mean hcst_data <- Apply(data$hcst$data, target_dims = 'ensemble', fun = 'mean')$output1 obs_data <- Apply(data$obs$data, target_dims = 'ensemble', fun = 'mean')$output1 + ## Remove unwanted dimensions + hcst_data <- Subset(hcst_data, along = c('dat', 'sday', 'sweek'), indices = list(1,1,1) , drop = 'selected') + obs_data <- Subset(obs_data, along = c('dat', 'sday', 'sweek'), indices = list(1,1,1) , drop = 'selected') + statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) - statistics_metrics <- list() + statistics <- list() for (stat in strsplit(statistics_list, ", | |,")[[1]]) { @@ -29,7 +33,7 @@ compute_statistics <- function(recipe, data, agg = 'global'){ use = "everything", method = "pearson")})$output1 - statistics_metrics[[ stat ]] <- covariance + statistics[[ stat ]] <- covariance } ## close if on covariance @@ -45,8 +49,8 @@ compute_statistics <- function(recipe, data, agg = 'global'){ target_dims = c(time_dim), fun = 'sd')$output1 - statistics_metrics[['std_hcst']] <- std_hcst - statistics_metrics[['std_obs']] <- std_obs + statistics[['std_hcst']] <- std_hcst + statistics[['std_obs']] <- std_obs } ## close if on std @@ -61,8 +65,8 @@ compute_statistics <- function(recipe, data, agg = 'global'){ target_dims = c(time_dim), fun = 'sd')$output1)^2 - statistics_metrics[['var_hcst']] <- var_hcst - statistics_metrics[['var_obs']] <- var_obs + statistics[['var_hcst']] <- var_hcst + statistics[['var_obs']] <- var_obs } ## close if on variance @@ -71,7 +75,7 @@ compute_statistics <- function(recipe, data, agg = 'global'){ ## Calculate degrees of freedom n_eff <- s2dv::Eno(data = obs_data, time_dim = time_dim, na.action = na.pass, ncores = ncores) - statistics_metrics[['n_eff']] <- n_eff + statistics[['n_eff']] <- n_eff } ## close on n_eff } @@ -88,11 +92,11 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics - save_metrics_scorecards(recipe = recipe, skill = statistics_metrics, + save_metrics_scorecards(recipe = recipe, skill = statistics, data_cube = data$hcst, agg = agg) } # Return results - return(statistics_metrics) + return(statistics) } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a6b6bd75..62075132 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -83,6 +83,18 @@ Visualization <- function(recipe, "parameter 'skill_metrics' is NULL")) } } + + # Plot statistics + if ("statistics" %in% plots) { + if (!is.null(statistics)) { + plot_skill_metrics(recipe, data$hcst, statistics, outdir, + significance, output_conf = output_conf) + } else { + error(recipe$Run$logger, + paste0("The statistics plots have been requested, but the ", + "parameter 'skill_metrics' is NULL")) + } + } # Plot forecast ensemble mean if ("forecast_ensemble_mean" %in% plots) { diff --git a/tools/check_recipe.R b/tools/check_recipe.R index c398c345..e0c44cc0 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -505,7 +505,7 @@ check_recipe <- function(recipe) { # Visualization if ("Visualization" %in% names(recipe$Analysis$Workflow)) { PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", - "most_likely_terciles") + "most_likely_terciles", "statistics") # Separate plots parameter and check if all elements are in PLOT_OPTIONS if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { error(recipe$Run$logger, -- GitLab From 86d12d0085524e1caf1a5ea51a5aab103d6d76be Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 23 Feb 2024 15:51:42 +0100 Subject: [PATCH 29/43] removing SCPlotScorecard function, replaced by VizScorecard --- modules/Scorecards/R/tmp/SCPlotScorecard.R | 478 --------------------- 1 file changed, 478 deletions(-) delete mode 100644 modules/Scorecards/R/tmp/SCPlotScorecard.R diff --git a/modules/Scorecards/R/tmp/SCPlotScorecard.R b/modules/Scorecards/R/tmp/SCPlotScorecard.R deleted file mode 100644 index a6575ea5..00000000 --- a/modules/Scorecards/R/tmp/SCPlotScorecard.R +++ /dev/null @@ -1,478 +0,0 @@ -#'Scorecards function create simple scorecards by region (types 1 & 3) -#' -#'@description This function creates a scorecard for a single system and -#'reference combination, showing data by region and forecast month. -#' -#'@param data A multidimensional array containing spatially aggregated metrics -#' data with dimensions: metric, region, sdate and ftime. -#'@param sign A multidimensional boolean array with the same dimensions as -#' 'data', indicting which values to be highlighted. If set to NULL no values -#' will be highlighted. -#'@param row.dim A character string indicating the dimension name to show in the -#' rows of the plot. -#'@param subrow.dim A character string indicating the dimension name to show in -#' the sub-rows of the plot. -#'@param col.dim A character string indicating the dimension name to show in the -#' columns of the plot. -#'@param subcol.dim A character string indicating the dimension name to show in -#' the sub-columns of the plot. -#'@param legend.dim A character string indicating the dimension name to use for -#' the legend. -#'@param row.names A vector of character strings with row display names. -#'@param subrow.names A vector of character strings with sub-row display names. -#'@param col.names A vector of character strings with column display names. -#'@param subcol.names A vector of character strings with sub-column display -#' names. -#'@param row.title A character string for the title of the row names. -#'@param subrow.title A character string for the title of the sub-row names. -#'@param table.title A character string for the title of the plot. -#'@param table.subtitle A character string for the sub-title of the plot. -#'@param legend.breaks A vector of numerics or a list of vectors of numerics, -#' containing the breaks for the legends. If a vector is given as input, then -#' these breaks will be repeated for each legend.dim. A list of vectors can be -#' given as input if the legend.dims require different breaks. This parameter -#' is required even if the legend is not plotted, to define the colors in the -#' scorecard table. -#'@param plot.legend A logical value to determine if the legend is plotted. -#'@param legend.width A numeric value to define the width of the legend bars. -#'@param legend.height A numeric value to define the height of the legend bars. -#'@param label.scale A numeric value to define the size of the legend labels. -#'@param palette A vector of character strings or a list of vectors of -#' character strings containing the colors to use in the legends. If a vector -#' is given as input, then these colors will be used for each legend.dim. A -#' list of vectors can be given as input if different colors are desired for -#' the legend.dims. This parameter must be included even if the the legend is -#' not plotted, to define the colors in the scorecard table. -#'@param colorunder A character string or of vector of character strings -#' defining the colors to use for data values with are inferior to the lowest -#' breaks value. This parameter will also plot a inferior triangle in the -#' legend bar. The parameter can be set to NULL if there are no inferior values. -#' If a character string is given this color will be applied to all legend.dims. -#'@param colorsup A character string or of vector of character strings -#' defining the colors to use for data values with are superior to the highest -#' breaks value. This parameter will also plot a inferior triangle in the -#' legend bar. The parameter can be set to NULL if there are no superior values. -#' If a character string is given this color will be applied to all legend.dims. -#'@param legend.white.space A numeric value defining the initial starting -#' position of the legend bars, the white space infront of the legend is -#' calculated from the left most point of the table as a distance in cm. -#'@param round.decimal A numeric indicating to which decimal point the data -#' is to be displayed in the scorecard table. Default is 2. -#'@param font.size A numeric indicating the font size on the scorecard table. -#'@param col1.width A numeric value defining the width of the first table column -#' in cm. -#'@param col2.width A numeric value defining the width of the second table -#' column in cm. -#'@param columns.width A numeric value defining the width all columns within the -#' table in cm (excluding the first and second columns containing the titles). -#'@param fileout A path of the location to save the scorecard plots. -#' -#'@return An image file containing the scorecard. -#'@example -#'data <- array(rnorm(1000), dim = c('sdate' = 12, 'metric' = 4, 'region' = 3, -#' 'time' = 6)) -#'row.names <- c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') -#'col.names <- c('Mean bias (K)', 'Correlation', 'RPSS','CRPSS') -#'SCPlotScorecard(data = data, row.names = row.names, col.names = col.names, -#' subcol.names = month.abb[as.numeric(1:12)], -#' row.title = 'Region', subrow.title = 'Forecast Month', -#' col.title = 'Start date', -#' table.title = "Temperature of ECMWF System 5", -#' table.subtitle = "(Ref: ERA5 1994-2016)", -#' fileout = 'test.png') -#' -#'@import kableExtra -#'@import s2dv -#'@import ClimProjDiags -#'@export -SCPlotScorecard <- function(data, sign = NULL, - row.dim = 'region', subrow.dim = 'time', - col.dim = 'metric', subcol.dim = 'sdate', - legend.dim = 'metric', row.names = NULL, - subrow.names = NULL, col.names = NULL, - subcol.names = NULL, row.title = NULL, - subrow.title = NULL, col.title = NULL, - table.title = NULL, table.subtitle = NULL, - legend.breaks = NULL, plot.legend = TRUE, - label.scale = NULL, legend.width = NULL, - legend.height = NULL, palette = NULL, - colorunder = NULL, colorsup = NULL, - round.decimal = 2, font.size = 1.1, - legend.white.space = NULL, - col1.width = NULL, col2.width = NULL, - columns.width = 1.2, - fileout = './scorecard.png') { - # Input parameter checks - ## Check data - if (!is.array(data)) { - stop("Parameter 'data' must be a numeric array.") - } - ## Check sign - if (is.null(sign)){ - sign <- array(F, dim = dim(data)) - } else { - if (!is.array(sign)) { - stop("Parameter 'sign' must be a boolean array or NULL.") - } - } - ## Check row.dim - if (!is.character(row.dim)) { - stop("Parameter 'row.dim' must be a character string.") - } - if (!row.dim %in% names(dim(data))) { - stop("Parameter 'row.dim' is not found in 'data' dimensions.") - } - ## Check row.names - if (!is.null(row.names)) { - if (length(row.names) != as.numeric(dim(data)[row.dim])) { - stop("Parameter 'row.names' must have the same length of dimension 'row.dims'.") - } - } else { - row.names <- as.character(1:dim(data)[row.dim]) - } - ## Check subrow.dim - if (!is.character(subrow.dim)) { - stop("Parameter 'subrow.dim' must be a character string.") - } - if (!subrow.dim %in% names(dim(data))) { - stop("Parameter 'subrow.dim' is not found in 'data' dimensions.") - } - ## Check subrow.names - if (!is.null(subrow.names)) { - if (length(subrow.names) != as.numeric(dim(data)[subrow.dim])) { - stop("Parameter 'subrow.names' must have the same length of dimension 'subrow.dims'.") - } - } else { - subrow.names <- as.character(1:dim(data)[subrow.dim]) - } - ## Check col.dim - if (!is.character(col.dim)) { - stop("Parameter 'col.dim' must be a character string.") - } - if (!col.dim %in% names(dim(data))) { - stop("Parameter 'col.dim' is not found in 'data' dimensions.") - } - ## Check col.names - if (!is.null(col.names)) { - if (length(col.names) != as.numeric(dim(data)[col.dim])) { - stop("Parameter 'col.names' must have the same length of dimension 'col.dims'.") - } - } else { - col.names <- as.character(1:dim(data)[col.dim]) - } - ## Check subcol.dim - if (!is.character(subcol.dim)) { - stop("Parameter 'subcol.dim' must be a character string.") - } - if (!subcol.dim %in% names(dim(data))) { - stop("Parameter 'subcol.dim' is not found in 'data' dimensions.") - } - ## Check subcol.names - if (!is.null(subcol.names)) { - if (length(subcol.names) != as.numeric(dim(data)[subcol.dim])) { - stop("Parameter 'subcol.names' must have the same length of dimension 'subcol.dims'.") - } - } else { - subcol.names <- as.character(1:dim(data)[subcol.dim]) - } - ## Check legend.dim - if (!is.character(legend.dim)) { - stop("Parameter 'legend.dim' must be a character string.") - } - if (!legend.dim %in% names(dim(data))) { - stop("Parameter 'legend.dim' is not found in 'data' dimensions.") - } - ## Check row.title inputs - if (!is.null(row.title)) { - if (!is.character(row.title)) { - stop("Parameter 'row.title must be a character string.") - } - } else { - row.title <- "" - } - ## Check subrow.title - if (!is.null(subrow.title)) { - if (!is.character(subrow.title)) { - stop("Parameter 'subrow.title must be a character string.") - } - } else { - subrow.title <- "" - } - ## Check col.title - if (!is.null(col.title)) { - if (!is.character(col.title)) { - stop("Parameter 'col.title must be a character string.") - } - } else { - col.title <- "" - } - ## Check table.title - if (!is.null(table.title)) { - if (!is.character(table.title)) { - stop("Parameter 'table.title' must be a character string.") - } - } else { - table.title <- "" - } - ## Check table.subtitle - if (!is.null(table.subtitle)) { - if (!is.character(table.subtitle)) { - stop("Parameter 'table.subtitle' must be a character string.") - } - } else { - table.subtitle <- "" - } - # Check legend.breaks - if (is.vector(legend.breaks) && is.numeric(legend.breaks)) { - legend.breaks <- rep(list(legend.breaks), as.numeric(dim(data)[legend.dim])) - } else if (is.null(legend.breaks)) { - legend.breaks <- rep(list(seq(-1, 1, 0.2)), as.numeric(dim(data)[legend.dim])) - } else if (inherits(legend.breaks, 'list')) { - stopifnot(length(legend.breaks) == as.numeric(dim(data)[legend.dim])) - } else { - stop("Parameter 'legend.breaks' must be a numeric vector, a list or NULL.") - } - ## Check plot.legend - if (!inherits(plot.legend, 'logical')) { - stop("Parameter 'plot.legend' must be a logical value.") - } - ## Check label.scale - if (is.null(label.scale)) { - label.scale <- 1.4 - } else { - if (!is.numeric(label.scale) | length(label.scale) != 1) { - stop("Parameter 'label.scale' must be a numeric value of length 1.") - } - } - ## Check legend.width - if (is.null(legend.width)) { - legend.width <- length(subcol.names) * 46.5 - } else { - if (!is.numeric(legend.width) | length(legend.width) != 1) { - stop("Parameter 'legend.width' must be a numeric value of length 1.") - } - } - if (is.null(legend.height)) { - legend.height <- 50 - } else { - if (!is.numeric(legend.height) | length(legend.height) != 1) { - stop("Parameter 'legend.height' must be a numeric value of length 1.") - } - } - ## Check colour palette input - if (is.vector(palette)) { - palette <- rep(list(palette), as.numeric(dim(data)[legend.dim])) - } else if (is.null(palette)) { - palette <- rep(list(c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', - '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08')), - as.numeric(dim(data)[legend.dim])) - } else if (inherits(palette, 'list')) { - stopifnot(length(palette) == as.numeric(dim(data)[legend.dim])) - } else { - stop("Parameter 'palette' must be a numeric vector, a list or NULL.") - } - ## Check colorunder - if (is.null(colorunder)) { - colorunder <- rep("#04040E",as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorunder) && length(colorunder) == 1) { - colorunder <- rep(colorunder, as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorunder) && - length(colorunder) != as.numeric(dim(data)[legend.dim])) { - stop("Parameter 'colorunder' must be a numeric vector, a list or NULL.") - } - ## Check colorsup - if (is.null(colorsup)) { - colorsup <- rep("#730C04", as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorsup) && length(colorsup) == 1) { - colorsup <- rep(colorsup,as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorsup) && - length(colorsup) != as.numeric(dim(data)[legend.dim])) { - stop("Parameter 'colorsup' must be a numeric vector, a list or NULL.") - } - ## Check round.decimal - if (is.null(round.decimal)) { - round.decimal <- 2 - } else if (!is.numeric(round.decimal) | length(round.decimal) != 1) { - stop("Parameter 'round.decimal' must be a numeric value of length 1.") - } - ## Check font.size - if (is.null(font.size)) { - font.size <- 1 - } else if (!is.numeric(font.size) | length(font.size) != 1) { - stop("Parameter 'font.size' must be a numeric value of length 1.") - } - ## Check legend white space - if (is.null(legend.white.space)){ - legend.white.space <- 6 - } else { - legend.white.space <- legend.white.space - } - ## Check col1.width - if (is.null(col1.width)) { - if (max(nchar(row.names)) == 1 ) { - col1.width <- max(nchar(row.names)) - } else { - col1.width <- max(nchar(row.names))/4 - } - } else if (!is.numeric(col1.width)) { - stop("Parameter 'col1.width' must be a numeric value of length 1.") - } - ## Check col2.width - if (is.null(col2.width)) { - if (max(nchar(subrow.names)) == 1 ) { - col2.width <- max(nchar(subrow.names)) - } else { - col2.width <- max(nchar(subrow.names))/4 - } - } else if (!is.numeric(col2.width)) { - stop("Parameter 'col2.width' must be a numeric value of length 1.") - } - - - # Get dimensions of inputs - n.col.names <- length(col.names) - n.subcol.names <- length(subcol.names) - n.row.names <- length(row.names) - n.subrow.names <- length(subrow.names) - - # Define table size - n.rows <- n.row.names * n.subrow.names - n.columns <- 2 + (n.col.names * n.subcol.names) - - # Column names - row.names.table <- rep("", n.rows) - for (row in 1:n.row.names) { - row.names.table[floor(n.subrow.names/2) + (row - 1) * n.subrow.names] <- row.names[row] - } - - # Define scorecard table titles - column.titles <- c(row.title, subrow.title, rep(c(subcol.names), n.col.names)) - - # Round data - data <- round(data, round.decimal) - - # Define data inside the scorecards table - for (row in 1:n.row.names) { - table_temp <- data.frame(table_column_2 = as.character(subrow.names)) - for (col in 1:n.col.names) { - table_temp <- data.frame(table_temp, - Reorder(data = Subset(x = data, along = c(col.dim, row.dim), - indices = list(col, row), drop = 'selected'), - order = c(subrow.dim, subcol.dim))) - } - if (row == 1) { - table_data <- table_temp - } else { - table_data <- rbind(table_data, table_temp) - } - } - - # All data for plotting in table - table <- data.frame(table_column_1 = row.names.table, table_data) - table_temp <- array(unlist(table[3:n.columns]), dim = c(n.rows, n.columns - 2)) - - # Define colors to show in table - table_colors <- .SCTableColors(table = table_temp, - n.col = n.col.names, n.subcol = n.subcol.names, - n.row = n.row.names, n.subrow = n.subrow.names, - legend.breaks = legend.breaks, palette = palette, - colorunder = colorunder, colorsup = colorsup) - - metric.color <- table_colors$metric.color - metric.text.color <- table_colors$metric.text.color - # metric.text.bold <- table_colors$metric.text.bold - - # Remove temporary table - rm(table_temp) - - ## Format values to underline in table - metric.underline <- MergeDims(sign, c(subcol.dim, col.dim) , rename_dim = 'col', na.rm =F) - metric.underline <- MergeDims(metric.underline, c(subrow.dim, row.dim) , rename_dim = 'row', na.rm =F) - - options(stringsAsFactors = FALSE) - title <- data.frame(c1 = table.title, c2 = n.columns) - subtitle <- data.frame(c1 = table.subtitle, c2 = n.columns) - header.names <- as.data.frame(data.frame(c1 = c("", col.names), - c2 = c(2, rep(n.subcol.names, n.col.names)))) - header.names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col.title, n.col.names))), - c2 = c(2, rep(n.subcol.names, n.col.names)))) - title.space <- data.frame(c1 = "\n", c2 = n.columns) - - # Hide NA values in table - options(knitr.kable.NA = '') - - # Create HTML table - table.html.part <- list() - table.html.part[[1]] <- kbl(table, escape = F, col.names = column.titles, align = rep("c", n.columns)) %>% - kable_paper("hover", full_width = F, font_size = 14 * font.size) %>% - add_header_above(header = header.names2, font_size = 16 * font.size) %>% - add_header_above(header = title.space, font_size = 10 * font.size) %>% - add_header_above(header = header.names, font_size = 20 * font.size) %>% - add_header_above(header = title.space, font_size = 10 * font.size) %>% - add_header_above(header = subtitle, font_size = 16 * font.size, align = "left") %>% - add_header_above(header = title.space, font_size = 10 * font.size) %>% - add_header_above(header = title, font_size = 22 * font.size, align = "left") - - for (i in 1:n.col.names) { - for (j in 1:n.subcol.names) { - my.background <- metric.color[, (i - 1) * n.subcol.names + j] - my.text.color <- metric.text.color[, (i - 1) * n.subcol.names + j] - my.underline <- metric.underline[, (i - 1) * n.subcol.names + j] - # my.bold <- metric.text.bold[(i - 1) * n.subcol.names + j] - - table.html.part[[(i - 1) * n.subcol.names + j + 1]] <- - column_spec(table.html.part[[(i - 1) * n.subcol.names + j]], - 2 + n.subcol.names * (i - 1) + j, - background = my.background[1:n.rows], - color = my.text.color[1:n.rows], - underline = my.underline[1:n.rows], - bold = T) ## strsplit(toString(bold), ', ')[[1]] - } - } - - # Define position of table borders - column.borders <- NULL - for (i in 1:n.col.names) { - column.spacing <- (n.subcol.names * i) + 2 - column.borders <- c(column.borders, column.spacing) - } - - n.last.list <- n.col.names * n.subcol.names + 1 - - table.html <- column_spec(table.html.part[[n.last.list]], 1, bold = TRUE, width_min = paste0(col1.width, 'cm')) %>% - column_spec(2, bold = TRUE, width_min = paste0(col2.width, 'cm')) %>% - column_spec(3:n.columns, width_min = paste0(columns.width, 'cm')) %>% - column_spec(c(1, 2, column.borders), border_right = "2px solid black") %>% - column_spec(1, border_left = "2px solid black") %>% - column_spec(n.columns, border_right = "2px solid black") %>% - row_spec(seq(from = 0, to = n.subrow.names * n.row.names, by = n.subrow.names), - extra_css = "border-bottom: 2px solid black", hline_after = TRUE) - - if (plot.legend == TRUE) { - # Save the scorecard (without legend) - save_kable(table.html, file = paste0(fileout, '_tmpScorecard.png'), vheight = 1) - - # White space for legend - legend.white.space <- 37.8 * legend.white.space ## converting pixels to cm - - # Create and save color bar legend - scorecard_legend <- .SCLegend(legend.breaks = legend.breaks, - palette = palette, - colorunder = colorunder, - colorsup = colorsup, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - legend.white.space = legend.white.space, - fileout = fileout) - - # Add the legends below the scorecard table - system(paste0('convert -append ', fileout, '_tmpScorecard.png ', fileout, - '_tmpScorecardLegend.png ', fileout)) - # Remove temporary scorecard table - unlink(paste0(fileout, '_tmpScorecard*.png')) - } - if (plot.legend == FALSE) { - save_kable(table.html, file = fileout) - } -} -- GitLab From 904d1c32190153f643b3fbba98dd271b21c313a9 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 28 Feb 2024 10:50:28 +0100 Subject: [PATCH 30/43] included output_format check for syear metrics, set to Scorecards output_format --- tools/check_recipe.R | 159 +++++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 74 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 4c084b9d..d9f79603 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -2,11 +2,11 @@ check_recipe <- function(recipe) { # recipe: yaml recipe already read it ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) - + # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- - + TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", "hcst_end") TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") @@ -15,28 +15,28 @@ check_recipe <- function(recipe) { HORIZONS <- c("subseasonal", "seasonal", "decadal") ARCHIVE_SEASONAL <- "conf/archive.yml" ARCHIVE_DECADAL <- "conf/archive_decadal.yml" - + # Define error status variable error_status <- F - + # Check basic elements in recipe:Analysis: if (!("Analysis" %in% names(recipe))) { error(recipe$Run$logger, "The recipe must contain an element called 'Analysis'.") error_status <- T } - + if (!all(PARAMS %in% names(recipe$Analysis))) { error(recipe$Run$logger, paste0("The element 'Analysis' in the recipe must contain all of ", "the following: ", paste(PARAMS, collapse = ", "), ".")) error_status <- T } - + if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", - "following: ", paste(HORIZONS, collapse = ", "), ".")) + "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T } # Check time settings @@ -88,7 +88,7 @@ check_recipe <- function(recipe) { } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% - names(archive$Reference))) { + names(archive$Reference))) { error(recipe$Run$logger, "The specified Reference name was not found in the archive.") error_status <- T @@ -138,7 +138,7 @@ check_recipe <- function(recipe) { } else { stream <- "fcst" } - + ## TODO: To be implemented in the future # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && # tolower(recipe$Analysis$Horizon) != "subseasonal") { @@ -152,7 +152,7 @@ check_recipe <- function(recipe) { # error(recipe$Run$logger, # paste("The element 'fcst_sday' in the recipe should be defined.")) # } - + if (is.null(recipe$Analysis$Time$fcst_year)) { warn(recipe$Run$logger, paste("The element 'fcst_year' is not defined in the recipe.", @@ -168,7 +168,7 @@ check_recipe <- function(recipe) { # } # } # fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) - + # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { error(recipe$Run$logger, @@ -183,7 +183,7 @@ check_recipe <- function(recipe) { "Only one single Horizon can be specified in the recipe") error_status <- T } - + ## TODO: Refine this # nvar <- length(recipe$Analysis$Variables) # if (nvar > 2) { @@ -191,19 +191,19 @@ check_recipe <- function(recipe) { # "Only two type of Variables can be listed: ECVs and Indicators.") # stop("EXECUTION FAILED") # } - + # remove NULL or None Indicators or ECVs from the recipe: if (!is.null(recipe$Analysis$Variables$Indicators) && !is.list(recipe$Analysis$Variables$Indicators)) { recipe$Analysis$Variables <- recipe$Analysis$Variables[ - -which(names(recipe$Analysis$Variables) == 'Indicators')] + -which(names(recipe$Analysis$Variables) == 'Indicators')] } if (!is.null(recipe$Analysis$Variables$ECVs) && !is.list(recipe$Analysis$Variables$ECVs)) { recipe$Analysis$Variables <- recipe$Analysis$Variables[ - -which(names(recipe$Analysis$Variables) == 'ECVs')] + -which(names(recipe$Analysis$Variables) == 'ECVs')] } - + # Region checks: LIMITS <- c('latmin', 'latmax', 'lonmin', 'lonmax') # Ordinary recipe @@ -212,7 +212,7 @@ check_recipe <- function(recipe) { if (!all(LIMITS %in% names(region))) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", - paste(LIMITS, collapse = ", "), ".")) + paste(LIMITS, collapse = ", "), ".")) error_status <- T } } @@ -225,35 +225,35 @@ check_recipe <- function(recipe) { } } } - # Atomic recipe + # Atomic recipe } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) error_status <- T } - ## TODO: Implement multiple regions - # nregions <- length(recipe$Analysis$Region) - # for (i in 1:length(recipe$Analysis$Region)) { - # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { - # limits <- paste(limits, collapse = " ") - # error(recipe$Run$logger, - # paste0("Each region defined in element 'Region' ", - # "should have 4 elements: ", - # paste(limits, collapse = ", "), ".")) - # error_status <- T - # } - # if (length(recipe$Analysis$Region) > 1) { - # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { - # error(recipe$Run$logger, - # paste("If multiple regions are requested, each region must", - # "have a 'name'".) - # # are numeric? class list mode list - # } + ## TODO: Implement multiple regions + # nregions <- length(recipe$Analysis$Region) + # for (i in 1:length(recipe$Analysis$Region)) { + # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { + # limits <- paste(limits, collapse = " ") + # error(recipe$Run$logger, + # paste0("Each region defined in element 'Region' ", + # "should have 4 elements: ", + # paste(limits, collapse = ", "), ".")) + # error_status <- T + # } + # if (length(recipe$Analysis$Region) > 1) { + # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { + # error(recipe$Run$logger, + # paste("If multiple regions are requested, each region must", + # "have a 'name'".) + # # are numeric? class list mode list + # } # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- - + # Calibration # If 'method' is FALSE/no/'none' or NULL, set to 'raw' ## TODO: Review this check @@ -313,7 +313,7 @@ check_recipe <- function(recipe) { } } } - + # Downscaling if ("Downscaling" %in% names(recipe$Analysis$Workflow)) { downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) @@ -329,27 +329,27 @@ check_recipe <- function(recipe) { downscal_params$type <- "none" warn(recipe$Run$logger, paste("Downscaling 'type' is empty in the recipe, setting it to", - "'none'.")) + "'none'.")) } if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { error(recipe$Run$logger, paste0("The type of Downscaling request in the recipe is not ", - "available. It must be one of the following: ", - paste(DOWNSCAL_TYPES, collapse = ", "), ".")) + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) error_status <- T } if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && (is.null(downscal_params$target_grid))) { error(recipe$Run$logger, paste("A target grid is required for the downscaling method", - "requested in the recipe.")) + "requested in the recipe.")) error_status <- T } if (downscal_params$type == "int") { if (is.null(downscal_params$int_method)) { error(recipe$Run$logger, paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) + "interpolation method is provided in the recipe.")) error_status <- T } } else if (downscal_params$type %in% @@ -358,63 +358,63 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("Downscaling type", downscal_params$type, "was requested in the recipe, but no", - "interpolation method is provided.")) + "interpolation method is provided.")) error_status <- T } } else if (downscal_params$type == "intbc") { if (is.null(downscal_params$bc_method)) { error(recipe$Run$logger, paste("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) + "no bias correction method is provided.")) error_status <- T } else if (!(downscal_params$bc_method %in% BC_METHODS)) { error(recipe$Run$logger, paste0("The accepted Bias Correction methods for the downscaling", - " module are: ", paste(BC_METHODS, collapse = ", "), ".")) + " module are: ", paste(BC_METHODS, collapse = ", "), ".")) error_status <- T } } else if (downscal_params$type == "intlr") { if (length(downscal_params$lr_method) == 0) { error(recipe$Run$logger, paste("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method was provided.")) + "no linear regression method was provided.")) error_status <- T } else if (!(downscal_params$lr_method %in% LR_METHODS)) { error(recipe$Run$logger, paste0("The accepted linear regression methods for the", - " downscaling module are: ", - paste(LR_METHODS, collapse = ", "), ".")) + " downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) error_status <- T } } else if (downscal_params$type == "analogs") { if (is.null(downscal_params$nanalogs)) { warn(recipe$Run$logger, paste("Downscaling type is 'analogs, but the number of analogs", - "has not been provided in the recipe. The default is 3.")) + "has not been provided in the recipe. The default is 3.")) } } else if (downscal_params$type == "logreg") { if (is.null(downscal_params$int_method)) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method was provided.")) + "no interpolation method was provided.")) error_status <- T } if (is.null(downscal_params$log_reg_method)) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe,", - "but no logistic regression method is provided.")) + "but no logistic regression method is provided.")) error_status <- T } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { error(recipe$Run$logger, paste0("The accepted logistic regression methods for the ", - "downscaling module are: ", - paste(LOGREG_METHODS, collapse = ", "), ".")) + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) error_status <- T } } } } - + # Indices if ("Indices" %in% names(recipe$Analysis$Workflow)) { nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) @@ -425,9 +425,9 @@ check_recipe <- function(recipe) { "in the recipe.")) error_status <- T } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, - paste0("Indices uses Anomalies as input, but the parameter", - "'Anomalies:compute' is set as no/False.")) + error(recipe$Run$logger, + paste0("Indices uses Anomalies as input, but the parameter", + "'Anomalies:compute' is set as no/False.")) error_status <- T } recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices)) @@ -456,7 +456,7 @@ check_recipe <- function(recipe) { error_status <- T } } - + # Skill AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", "rpss", "frps", "frpss", "crps", "crps_syear", @@ -467,9 +467,9 @@ check_recipe <- function(recipe) { "frpss_specs", "bss10_specs", "bss90_specs") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { - error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Skill'.") - error_status <- T + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Skill'.") + error_status <- T } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] @@ -480,6 +480,14 @@ check_recipe <- function(recipe) { "full list of accepted skill metrics.")) error_status <- T } + if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + if (any(grepl('_syear', requested_metrics))) { + recipe$Analysis$Output_format <- 'scorecards' + warn(recipe$Run$logger, + paste0("'_syear' metrics can only be saved as 'scorecards' ", + "output format. The output format is now 'scorecards'.")) + } + } } # Saving checks SAVING_OPTIONS_SKILL <- c("all", "none") @@ -492,7 +500,7 @@ check_recipe <- function(recipe) { error_status <- T } } - + # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { @@ -517,7 +525,7 @@ check_recipe <- function(recipe) { error_status <- T } } - + # Visualization if ("Visualization" %in% names(recipe$Analysis$Workflow)) { PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", @@ -584,9 +592,9 @@ check_recipe <- function(recipe) { if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { if(recipe$Analysis$Workflow$Scorecards$execute == TRUE){ if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { - error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Scorecards'.") - error_status <- T + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Scorecards'.") + error_status <- T } else { sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, ", | |,")[[1]] @@ -610,7 +618,10 @@ check_recipe <- function(recipe) { if ('enscorr' %in% tolower(sc_metrics)) { recipe$Analysis$Workflow$Statistics <- c('std', 'cov', 'n_eff') } - recipe$Analysis$Workflow$Skill$metric <- requested_metrics + recipe$Analysis$Workflow$Skill$metric <- requested_metrics + } + if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + recipe$Analysis$Output_format <- 'scorecards' } if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { error(recipe$Run$logger, @@ -624,11 +635,11 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- # RUN CHECKS # --------------------------------------------------------------------- - + ## TODO: These checks should probably go first RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") - + if (!("Run" %in% names(recipe))) { stop("The recipe must contain an element named 'Run'.") } @@ -668,11 +679,11 @@ check_recipe <- function(recipe) { paste0(LOG_LEVELS, collapse='/'))) error_status <- T } - + # --------------------------------------------------------------------- # AUTOSUBMIT CHECKS # --------------------------------------------------------------------- - + AUTO_PARAMS <- c("script", "expid", "hpc_user", "wallclock", "processors_per_job", "platform", "email_notifications", "email_address", "notify_completed", "notify_failed") @@ -741,7 +752,7 @@ check_recipe <- function(recipe) { error_status <- T } } - + # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- @@ -751,7 +762,7 @@ check_recipe <- function(recipe) { #nverifications <- check_number_of_dependent_verifications(recipe) # info(recipe$Run$logger, paste("Start Dates:", # paste(fcst.sdate, collapse = " "))) - + # Return error if any check has failed if (error_status) { error(recipe$Run$logger, "RECIPE CHECK FAILED.") -- GitLab From 46f11ce2ecf5dc2bf76f1cee50b126e0983ab9bb Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 28 Feb 2024 14:52:55 +0100 Subject: [PATCH 31/43] bug fix in saving metrics in S2S4E format --- modules/Skill/Skill.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index cc0da1e1..aad44951 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -385,7 +385,7 @@ Skill <- function(recipe, data, agg = 'global') { save_metrics_scorecards(recipe = recipe, skill = skill_metrics[-corr_metric_names], data_cube = data$hcst, agg = agg) } else { - save_metrics_scorecards(recipe = recipe, skill = skill_metrics[-corr_metric_names], + save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], data_cube = data$hcst, agg = agg) } } -- GitLab From a9ec37f85080fb4862d3af44f6e2041db5ce5ff5 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 28 Feb 2024 16:19:07 +0100 Subject: [PATCH 32/43] corrected test-seasonal_NAO.R --- tests/testthat/test-seasonal_NAO.R | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R index 9582bdfc..4bbfd3b3 100644 --- a/tests/testthat/test-seasonal_NAO.R +++ b/tests/testthat/test-seasonal_NAO.R @@ -214,8 +214,8 @@ TRUE expect_equal( names(skill_metrics), c("mean_bias", "enscorr", - "enscorr_significance", "rps", "rps_clim", "rpss", "rpss_significance", - "crps", "crps_clim", "crpss", "crpss_significance", "enssprerr") + "enscorr_significance", "rps", "rpss", "rpss_significance", + "crps", "crpss", "crpss_significance", "enssprerr") ) expect_equal( class(skill_metrics$rpss), @@ -247,12 +247,22 @@ expect_equal( all(list.files(outputs, recursive = T) %in% c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), paste0("Indices/ERA5/nao/", paste0("nao_", 1993:2000, "0301.nc")), - "Skill/ECMWF-SEAS5/nao/scorecards_ECMWF-SEAS5_ERA5_nao-skill_1993-2000_s03.nc") + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_crps_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_crpss_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_crpss_significance_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_significance_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rps_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_significance_1993-2000_s03.nc" + ) ), TRUE) expect_equal( length(list.files(outputs, recursive = T)), -17 +26 ) }) -- GitLab From 527a91e28b46fdbe6e140530f53867c231a65efe Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 28 Feb 2024 17:41:29 +0100 Subject: [PATCH 33/43] corrected test-decadal_monthly_2.R --- tests/testthat/test-decadal_monthly_2.R | 2 +- .../test-seasonal_monthly_statistics.R | 251 ++++++++++++++++++ 2 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test-seasonal_monthly_statistics.R diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 9adc16b6..6ee681bb 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -164,7 +164,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps", "frps_clim") +c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") ) expect_equal( class(skill_metrics$rpss_specs), diff --git a/tests/testthat/test-seasonal_monthly_statistics.R b/tests/testthat/test-seasonal_monthly_statistics.R new file mode 100644 index 00000000..d9b7b3c3 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_statistics.R @@ -0,0 +1,251 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- Calibration(recipe, data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, calibrated_data) +))}) + +suppressWarnings({invisible(capture.output( +probs <- Probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +Saving(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(293.9651, 295.9690, 290.6771, 290.7957), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +290.8758, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(284.7413, 299.6219), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') +) + +}) + +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "obs", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(calibrated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(calibrated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +mean(calibrated_data$fcst$data), +291.6433, +tolerance = 0.0001 +) +expect_equal( +mean(calibrated_data$hcst$data), +290.9006, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +c(291.8887, 287.0233, 289.8808), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$fcst$data), +c(283.8926, 299.0644), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", + "enscorr_specs") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(var = 1, time = 3, latitude = 3, longitude = 3) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(skill_metrics$rpss[, , 2, 3]), +c(-0.2918857, -1.4809143, -1.3842286), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$rpss_significance[, , 2, 3]), +rep(FALSE, 3) +) + +}) + +test_that("4. Saving", { +outputs <- paste0(recipe$Run$output_dir, "/outputs/") +expect_equal( +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc")), +TRUE +) +expect_equal( +length(list.files(outputs, recursive = T)), +17 +) + +}) + +test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("crpss-november.png", "enscorr_specs-november.png", "enscorr-november.png", + "forecast_ensemble_mean-20201101.png", "forecast_most_likely_tercile-20201101.png", + "rpss-november.png")), +TRUE +) +expect_equal( +length(list.files(plots, recursive = T)), +6 +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) -- GitLab From 93e5d73f02d8d3aaa61d0757ceb69f719bc54f13 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 29 Feb 2024 12:40:17 +0100 Subject: [PATCH 34/43] statistics unit test --- modules/Statistics/Statistics.R | 12 +- modules/Visualization/R/plot_skill_metrics.R | 2 +- .../recipe-seasonal_monthly_1_statistics.yml | 58 ++++++++ .../recipe-seasonal_monthly_1_tas-tos.yml | 2 +- .../test-seasonal_monthly_statistics.R | 126 +++++------------- 5 files changed, 101 insertions(+), 99 deletions(-) create mode 100644 tests/recipes/recipe-seasonal_monthly_1_statistics.yml diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 4bfa107f..26f52de9 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -1,6 +1,6 @@ -compute_statistics <- function(recipe, data, agg = 'global'){ +Statistics <- function(recipe, data, agg = 'global'){ # data$hcst: s2dv_cube containing the hindcast @@ -92,8 +92,14 @@ compute_statistics <- function(recipe, data, agg = 'global'){ if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics - save_metrics_scorecards(recipe = recipe, skill = statistics, - data_cube = data$hcst, agg = agg) + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + save_metrics_scorecards(recipe = recipe, skill = statistics, + data_cube = data$hcst, agg = agg) + } else { + save_metrics(recipe = recipe, skill = statistics, + data_cube = data$hcst, agg = agg) + } + } # Return results diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index 0ab92bc5..26fb4a73 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -58,7 +58,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", "enscorr_specs", "rmsss", "msss") scores <- c("rps", "frps", "crps", "frps_specs", "mse") - statistics <- c("cov", "std_hcst", "std_obs") + statistics <- c("cov", "std_hcst", "std_obs", "var_hcst", "var_obs", "n_eff") # Loop over variables and assign colorbar and plot parameters to each metric for (var in 1:data_cube$dims[['var']]) { diff --git a/tests/recipes/recipe-seasonal_monthly_1_statistics.yml b/tests/recipes/recipe-seasonal_monthly_1_statistics.yml new file mode 100644 index 00000000..ccfd3cf4 --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_statistics.yml @@ -0,0 +1,58 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Statistics: + metric: cov std var n_eff + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: statistics + multi_panel: yes + projection: cylindrical_equidistant + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ #/esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml index 41048db1..c1404e7d 100644 --- a/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml +++ b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml @@ -53,4 +53,4 @@ Run: Loglevel: INFO Terminal: yes output_dir: ./tests/out-logs/ - code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ ##/esarchive/scratch/vagudets/repos/auto-s2s/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-seasonal_monthly_statistics.R b/tests/testthat/test-seasonal_monthly_statistics.R index d9b7b3c3..2db006dc 100644 --- a/tests/testthat/test-seasonal_monthly_statistics.R +++ b/tests/testthat/test-seasonal_monthly_statistics.R @@ -1,12 +1,11 @@ context("Seasonal monthly data") source("modules/Loading/Loading.R") -source("modules/Calibration/Calibration.R") -source("modules/Skill/Skill.R") +source("modules/Statistics/Statistics.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_statistics.yml" recipe <- prepare_outputs(recipe_file, disable_checks = F) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive @@ -15,30 +14,21 @@ suppressWarnings({invisible(capture.output( data <- Loading(recipe) ))}) -# Calibrate data +# Compute statistics suppressWarnings({invisible(capture.output( -calibrated_data <- Calibration(recipe, data) -))}) - -# Compute skill metrics -suppressWarnings({invisible(capture.output( -skill_metrics <- Skill(recipe, calibrated_data) -))}) - -suppressWarnings({invisible(capture.output( -probs <- Probabilities(recipe, calibrated_data) +statistics <- Statistics(recipe, data) ))}) # Saving suppressWarnings({invisible(capture.output( -Saving(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs) +Saving(recipe = recipe, data = data, + skill_metrics = statistics) ))}) # Plotting suppressWarnings({invisible(capture.output( -Visualization(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, +Visualization(recipe = recipe, data = data, + skill_metrics = statistics, significance = T) ))}) outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) @@ -125,119 +115,67 @@ as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') }) -test_that("2. Calibration", { - -expect_equal( -is.list(calibrated_data), -TRUE -) -expect_equal( -names(calibrated_data), -c("hcst", "obs", "fcst") -) -expect_equal( -class(calibrated_data$hcst), -"s2dv_cube" -) -expect_equal( -class(calibrated_data$fcst), -"s2dv_cube" -) -expect_equal( -dim(calibrated_data$hcst$data), -c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) -) -expect_equal( -dim(calibrated_data$fcst$data), -c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) -) -expect_equal( -mean(calibrated_data$fcst$data), -291.6433, -tolerance = 0.0001 -) -expect_equal( -mean(calibrated_data$hcst$data), -290.9006, -tolerance = 0.0001 -) -expect_equal( -as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), -c(291.8887, 287.0233, 289.8808), -tolerance = 0.0001 -) -expect_equal( -range(calibrated_data$fcst$data), -c(283.8926, 299.0644), -tolerance = 0.0001 -) - -}) - - #====================================== -test_that("3. Metrics", { +test_that("2. Statistics", { expect_equal( -is.list(skill_metrics), +is.list(statistics), TRUE ) expect_equal( -names(skill_metrics), -c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", - "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", - "enscorr_specs") +names(statistics), +c("cov", "std_hcst", "std_obs", "var_hcst", "var_obs", "n_eff") ) expect_equal( -class(skill_metrics$rpss), +class(statistics$cov), "array" ) expect_equal( -dim(skill_metrics$rpss), +dim(statistics$cov), c(var = 1, time = 3, latitude = 3, longitude = 3) ) expect_equal( -dim(skill_metrics$rpss_significance), -dim(skill_metrics$rpss) +dim(statistics$cov), +dim(statistics$var_hcst) ) expect_equal( -as.vector(skill_metrics$rpss[, , 2, 3]), -c(-0.2918857, -1.4809143, -1.3842286), +as.vector(statistics$cov[, , 2, 3]), +c(1.14846389, 0.05694802, 0.02346492), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$rpss_significance[, , 2, 3]), -rep(FALSE, 3) +as.vector(statistics$var_hcst[, , 2, 3]), +c(0.74897676, 0.14698283, 0.04864656), +tolerance = 0.0001 ) }) -test_that("4. Saving", { +test_that("3. Saving", { outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( all(basename(list.files(outputs, recursive = T)) %in% -c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", - "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", - "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", - "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", - "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", - "tas-probs_20201101.nc", "tas-skill_month11.nc")), +c("scorecards_Meteo-France-System7_ERA5_tas_cov_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_n_eff_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_std_hcst_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_std_obs_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_var_hcst_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_var_obs_1993-1996_s11.nc")), TRUE ) expect_equal( length(list.files(outputs, recursive = T)), -17 +6 ) }) -test_that("5. Visualization", { +test_that("4. Visualization", { plots <- paste0(recipe$Run$output_dir, "/plots/") expect_equal( all(basename(list.files(plots, recursive = T)) %in% -c("crpss-november.png", "enscorr_specs-november.png", "enscorr-november.png", - "forecast_ensemble_mean-20201101.png", "forecast_most_likely_tercile-20201101.png", - "rpss-november.png")), +c("cov-november.png", "n_eff-november.png", "std_hcst-november.png", + "std_obs-november.png", "var_hcst-november.png", "var_obs-november.png" )), TRUE ) expect_equal( -- GitLab From f8981392f0976e96422be72f1e8896e88a554f51 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 13 Mar 2024 10:50:58 +0100 Subject: [PATCH 35/43] Formatting --- modules/Statistics/Statistics.R | 65 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 26f52de9..db907f04 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -1,51 +1,54 @@ -Statistics <- function(recipe, data, agg = 'global'){ - - # data$hcst: s2dv_cube containing the hindcast - - # obs: s2dv_cube containing the observations +Statistics <- function(recipe, data, agg = 'global') { + # data$hcst: s2dv_cube containing the hindcast + # data$obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml + # agg: data aggregation time_dim <- 'syear' ncores <- recipe$Analysis$ncores ## Calculate ensemble mean - hcst_data <- Apply(data$hcst$data, target_dims = 'ensemble', fun = 'mean')$output1 - obs_data <- Apply(data$obs$data, target_dims = 'ensemble', fun = 'mean')$output1 + hcst_ensmean <- Apply(data$hcst$data, + target_dims = 'ensemble', + fun = 'mean')$output1 + obs_ensmean <- Apply(data$obs$data, + target_dims = 'ensemble', + fun = 'mean')$output1 ## Remove unwanted dimensions - hcst_data <- Subset(hcst_data, along = c('dat', 'sday', 'sweek'), indices = list(1,1,1) , drop = 'selected') - obs_data <- Subset(obs_data, along = c('dat', 'sday', 'sweek'), indices = list(1,1,1) , drop = 'selected') + ## TODO: Apply .drop_dims() function instead? + hcst_ensmean <- Subset(hcst_ensmean, + along = c('dat', 'sday', 'sweek'), + indices = list(1, 1, 1), + drop = 'selected') + obs_ensmean <- Subset(obs_ensmean, + along = c('dat', 'sday', 'sweek'), + indices = list(1, 1, 1), + drop = 'selected') statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) - statistics <- list() - + # Compute statistics in the list for (stat in strsplit(statistics_list, ", | |,")[[1]]) { - if (stat %in% c('cov', 'covariance')) { - - ## Calculate covariance - covariance <- Apply(data = list(x = obs_data, y = hcst_data), + ## Calculate covariance + covariance <- Apply(data = list(x = obs_ensmean, y = hcst_ensmean), target_dims = time_dim, - fun = function(x,y){cov(as.vector(x),as.vector(y), + fun = function(x,y) {cov(as.vector(x),as.vector(y), use = "everything", method = "pearson")})$output1 statistics[[ stat ]] <- covariance - } ## close if on covariance - - if (stat %in% c('std', 'standard_deviation')) { - ## Calculate standard deviation - std_hcst <- Apply(data = hcst_data, + std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 - std_obs <- Apply(data = obs_data, + std_obs <- Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 @@ -53,30 +56,26 @@ Statistics <- function(recipe, data, agg = 'global'){ statistics[['std_obs']] <- std_obs } ## close if on std - if (stat %in% c('var', 'variance')) { - ## Calculate variance - var_hcst <- (Apply(data = hcst_data, + var_hcst <- (Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 - var_obs <- (Apply(data = obs_data, + var_obs <- (Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 statistics[['var_hcst']] <- var_hcst statistics[['var_obs']] <- var_obs - } ## close if on variance - if (stat == 'n_eff') { - ## Calculate degrees of freedom - n_eff <- s2dv::Eno(data = obs_data, time_dim = time_dim, na.action = na.pass, ncores = ncores) - + n_eff <- s2dv::Eno(data = obs_ensmean, + time_dim = time_dim, + na.action = na.pass, + ncores = ncores) statistics[['n_eff']] <- n_eff - } ## close on n_eff } @@ -99,9 +98,7 @@ Statistics <- function(recipe, data, agg = 'global'){ save_metrics(recipe = recipe, skill = statistics, data_cube = data$hcst, agg = agg) } - } - # Return results return(statistics) } -- GitLab From 343948289652e580549cef055a1638b6e6b6df69 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 13 Mar 2024 12:42:02 +0100 Subject: [PATCH 36/43] Formatting --- modules/Skill/R/CRPS_clim.R | 19 +++++++++++++------ modules/Skill/R/RPS_clim.R | 15 +++++++++++---- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/modules/Skill/R/CRPS_clim.R b/modules/Skill/R/CRPS_clim.R index b66cab78..bede13e4 100644 --- a/modules/Skill/R/CRPS_clim.R +++ b/modules/Skill/R/CRPS_clim.R @@ -3,10 +3,14 @@ CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross. time_dim <- names(dim(obs)) obs_time_len <- dim(obs)[time_dim] - if (isFALSE(clim.cross.val)) { ## Without cross-validation - ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) - ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + if (isFALSE(clim.cross.val)) { + # Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), + dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, + dim = c(obs_time_len, obs_time_len - 1)) for (i in 1:obs_time_len) { ref[i, ] <- obs[-i] } @@ -15,8 +19,11 @@ CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross. names(dim(ref)) <- c(time_dim, memb_dim) # ref: [sdate, memb] # obs: [sdate] - crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, - dat_dim = NULL, Fair = FALSE) + crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = NULL, + Fair = FALSE) # crps_ref should be [sdate] if (return_mean == TRUE) { diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R index adde5fe2..4b309c00 100644 --- a/modules/Skill/R/RPS_clim.R +++ b/modules/Skill/R/RPS_clim.R @@ -1,15 +1,22 @@ # RPS version for climatology -RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3), cross.val = TRUE, return_mean = TRUE) { +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), + cross.val = TRUE, + return_mean = TRUE) { if (is.null(indices_for_clim)){ indices_for_clim <- 1:length(obs) } - obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + obs_probs <- .GetProbs(data = obs, + indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, + weights = NULL, + cross.val = cross.val) # clim_probs: [bin, sdate] - clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- c(prob_thresholds[1], + diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) # Calculate RPS for each time step -- GitLab From 1b5fd8ace55bc238e724543768ab1da5ba57b869 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 13 Mar 2024 12:43:32 +0100 Subject: [PATCH 37/43] Add a TODO --- modules/Skill/Skill.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index aad44951..175d0218 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -363,6 +363,7 @@ Skill <- function(recipe, data, agg = 'global') { recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Skill/") # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim + ## TODO: merge save_metrics() and save_metrics_scorecards() if (recipe$Analysis$Workflow$Skill$save == 'all') { corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) if (length(corr_metric_names) == 0) { -- GitLab From 4eff27398d003a3315103ee1184fe9d58615c1ca Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 13 Mar 2024 12:43:54 +0100 Subject: [PATCH 38/43] Update RPS() and CRPS() with version in master branch of s2dv --- modules/Skill/R/tmp/CRPS.R | 32 +++++++++++-------- modules/Skill/R/tmp/RPS.R | 64 +++++++++++++++++++++----------------- 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/modules/Skill/R/tmp/CRPS.R b/modules/Skill/R/tmp/CRPS.R index 9f91be34..bb63095c 100644 --- a/modules/Skill/R/tmp/CRPS.R +++ b/modules/Skill/R/tmp/CRPS.R @@ -25,9 +25,9 @@ #'@param Fair A logical indicating whether to compute the FairCRPS (the #' potential CRPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. -#'@return_mean A logical idicating whether to return the temporal mean of CRPS -#' or not. When TRUE the temporal mean is calculated, when FALSE the time -#' dimension is not aggregated. The default is TRUE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the CRPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -57,8 +57,8 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU stop("Parameter 'exp' must be a numeric array.") if (!is.array(obs) | !is.numeric(obs)) stop("Parameter 'obs' must be a numeric array.") - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -89,7 +89,8 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).") + stop("Not implemented for observations with members ", + "('obs' can have 'memb_dim', but it should be of length = 1).") } } name_exp <- sort(names(dim(exp))) @@ -101,13 +102,17 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { + if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -126,7 +131,7 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU Fair = Fair, ncores = ncores)$output1 - if (isTRUE(return_mean)) { + if (return_mean) { crps <- MeanDims(crps, time_dim, na.rm = FALSE) } else { crps <- crps @@ -137,6 +142,7 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU .CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE) { + # exp: [sdate, memb, (dat_dim)] # obs: [sdate, (dat_dim)] @@ -158,14 +164,14 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU for (i in 1:nexp) { for (j in 1:nobs) { - exp_data <- exp[ , , i] - obs_data <- obs[ , j] + exp_data <- exp[, , i] + obs_data <- obs[, j] if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) - CRPS[ , i, j] <- crps + CRPS[, i, j] <- crps } } diff --git a/modules/Skill/R/tmp/RPS.R b/modules/Skill/R/tmp/RPS.R index e15a1754..59b2d01a 100644 --- a/modules/Skill/R/tmp/RPS.R +++ b/modules/Skill/R/tmp/RPS.R @@ -52,9 +52,9 @@ #'@param cross.val A logical indicating whether to compute the thresholds #' between probabilistic categories in cross-validation. The default value is #' FALSE. -#'@return_mean A logical idicating whether to return the temporal mean of CRPS -#' or not. When TRUE the temporal mean is calculated, when FALSE the time -#' dimension is not aggregated. The default is TRUE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. #'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it #' means the lower limit for the fraction of the non-NA values. 1 is equal to #' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). @@ -97,8 +97,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL stop('Parameter "exp" must be a numeric array.') if (!is.array(obs) | !is.numeric(obs)) stop('Parameter "obs" must be a numeric array.') - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -154,8 +154,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } ## prob_thresholds if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | @@ -164,7 +164,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } ## indices_for_clim if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] + indices_for_clim <- seq_len(dim(obs)[time_dim]) } else { if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") @@ -175,9 +175,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } } ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { + if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } ## cross.val if (!is.logical(cross.val) | length(cross.val) > 1) { stop("Parameter 'cross.val' must be either TRUE or FALSE.") @@ -187,23 +191,25 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL if (!is.array(weights) | !is.numeric(weights)) stop("Parameter 'weights' must be a named numeric array.") if (is.null(dat_dim)) { - if (length(dim(weights)) != 2 | any(!names(dim(weights)) %in% c(memb_dim, time_dim))) - stop("Parameter 'weights' must have two dimensions with the names of 'memb_dim' and 'time_dim'.") + if (length(dim(weights)) != 2 | !all(names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | dim(weights)[time_dim] != dim(exp)[time_dim]) { - stop(paste0("Parameter 'weights' must have the same dimension lengths ", - "as 'memb_dim' and 'time_dim' in 'exp'.")) + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.") } weights <- Reorder(weights, c(time_dim, memb_dim)) } else { - if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) - stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") + if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | dim(weights)[time_dim] != dim(exp)[time_dim] | dim(weights)[dat_dim] != dim(exp)[dat_dim]) { - stop(paste0("Parameter 'weights' must have the same dimension lengths ", - "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") } weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) @@ -252,12 +258,12 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL weights = weights, cross.val = cross.val, na.rm = na.rm, ncores = ncores)$output1 - if (isTRUE(return_mean)) { + if (return_mean) { rps <- MeanDims(rps, time_dim, na.rm = TRUE) } else { rps <- rps } - + return(rps) } @@ -295,8 +301,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL for (i in 1:nexp) { for (j in 1:nobs) { - exp_data <- exp[ , , i] - obs_data <- obs[ , , j] + exp_data <- exp[, , i] + obs_data <- obs[, , j] if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) @@ -323,7 +329,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL # If the data inputs are forecast/observation, calculate probabilities if (is.null(cat_dim)) { if (!is.null(weights)) { - weights_data <- weights[which(good_values) , , i] + weights_data <- weights[which(good_values), , i] if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) } else { weights_data <- weights #NULL @@ -334,10 +340,12 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL good_indices_for_clim <- dum[!is.na(dum)] exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, - prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) + prob_thresholds = prob_thresholds, weights = weights_data, + cross.val = cross.val) # exp_probs: [bin, sdate] obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) # obs_probs: [bin, sdate] } else { # inputs are probabilities already @@ -352,17 +360,17 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) if (Fair) { # FairRPS - ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] R <- dim(exp)[2] #memb - R_new <- Inf adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) adjustment <- colSums(adjustment) - rps[ , i, j] <- rps[ , i, j] + adjustment + rps[, i, j] <- rps[, i, j] + adjustment } } else { ## not enough values different from NA - rps[ , i, j] <- as.numeric(NA) + rps[, i, j] <- NA_real_ } -- GitLab From b7e7dd87d56dda96c7257f31ff9a50b68964e4b6 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 20 Mar 2024 12:24:45 +0100 Subject: [PATCH 39/43] Rename plot_skill_metrics() to plot_metrics(), generalize function and add statistics parameter --- .../{plot_skill_metrics.R => plot_metrics.R} | 80 +++++++++---------- modules/Visualization/Visualization.R | 23 +++--- .../test-seasonal_monthly_statistics.R | 2 +- 3 files changed, 54 insertions(+), 51 deletions(-) rename modules/Visualization/R/{plot_skill_metrics.R => plot_metrics.R} (83%) diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_metrics.R similarity index 83% rename from modules/Visualization/R/plot_skill_metrics.R rename to modules/Visualization/R/plot_metrics.R index 26fb4a73..9b93f432 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,11 +1,11 @@ library(stringr) -plot_skill_metrics <- function(recipe, data_cube, skill_metrics, - outdir, significance = F, output_conf) { +plot_metrics <- function(recipe, data_cube, metrics, + outdir, significance = F, output_conf) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive # data_cube: s2dv_cube object with the corresponding hindcast data - # skill_metrics: list of named skill metrics arrays + # metrics: list of named metric arrays with named dimensions # outdir: output directory # significance: T/F, whether to display the significance dots in the plots @@ -15,9 +15,9 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, for daily data.") stop() } - # Abort if skill_metrics is not list - if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { - stop("The element 'skill_metrics' must be a list of named arrays.") + # Abort if metrics is not list + if (!is.list(metrics) || is.null(names(metrics))) { + stop("The element 'metrics' must be a list of named arrays.") } latitude <- data_cube$coords$lat @@ -62,20 +62,20 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, # Loop over variables and assign colorbar and plot parameters to each metric for (var in 1:data_cube$dims[['var']]) { - var_name <- data_cube$attrs$Variable$varName[[var]] ## Need to include for statistics plotting to work - var_skill <- lapply(skill_metrics, function(x) { + var_name <- data_cube$attrs$Variable$varName[[var]] ## For statistics + var_metric <- lapply(metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) for (name in c(skill_scores, scores, statistics, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { + if (name %in% names(metrics)) { units <- NULL # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", + "rpss_specs", "bss90:_specs", "bss10_specs", "rmsss", "msss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- var_skill[[name]] + metric <- var_metric[[name]] brks <- seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -83,7 +83,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- NULL } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" - skill <- var_skill[[name]] + metric <- var_metric[[name]] brks <- seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -91,13 +91,13 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- NULL } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" - skill <- var_skill[[name]] + metric <- var_metric[[name]] brks <- seq(-1, 1, by = 0.2) cols <- clim.colors(length(brks) - 1, diverging_palette) col_inf <- NULL col_sup <- NULL } else if (name %in% scores) { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) @@ -105,7 +105,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_inf <- NULL col_sup <- colorbar[length(colorbar)] } else if (name == "enssprerr") { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- "Spread-to-Error Ratio" brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) colorbar <- clim.colors(length(brks), diverging_palette) @@ -113,10 +113,10 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_inf <- NULL col_sup <- colorbar[length(colorbar)] } else if (name %in% "mean_bias") { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) brks <- max_value * seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -124,10 +124,10 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- colorbar[length(colorbar)] units <- data_cube$attrs$Variable$metadata[[var_name]]$units } else if (name %in% "cov") { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- "Covariance" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) brks <- max_value * seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -135,10 +135,10 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- colorbar[length(colorbar)] units <- paste0(data_cube$attrs$Variable$metadata[[var_name]]$units, "²") } else if (name %in% "std_hcst") { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- "Hindcast Standard Deviation" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) brks <- max_value * seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -146,10 +146,10 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- colorbar[length(colorbar)] units <- data_cube$attrs$Variable$metadata[[var_name]]$units } else if (name %in% "std_obs") { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- "Observation Standard Deviation" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) brks <- max_value * seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -160,23 +160,23 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) + metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - skill_significance <- var_skill[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", + if ((significance) && (significance_name %in% names(metrics))) { + metric_significance <- var_metric[[significance_name]] + metric_significance <- Reorder(metric_significance, c("time", "longitude", "latitude")) - # Split skill significance into list of lists, along the time dimension + # Split significance into list of lists, along the time dimension # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + metric_significance <- ClimProjDiags::ArrayToList(metric_significance, dim = "time", level = "sublist", names = "dots") } else { - skill_significance <- NULL + metric_significance <- NULL } # Define output file name and titles if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -196,9 +196,9 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, ## TODO: Combine PlotLayout with PlotRobinson? suppressWarnings( PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list + asplit(metric, MARGIN=1), # Splitting array into a list longitude, latitude, - special_args = skill_significance, + special_args = metric_significance, dot_symbol = 20, toptitle = toptitle, title_scale = 0.6, @@ -249,7 +249,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_inf = col_inf, col_sup = col_sup) } # Loop over forecast times - for (i in 1:dim(skill)[['time']]) { + for (i in 1:dim(metric)[['time']]) { # Get forecast time label forecast_time <- match(months[i], month.name) - init_month + 1 @@ -263,9 +263,9 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, "\n", display_name, "/", months[i], "/", hcst_period) # Modify base arguments - base_args[[1]] <- skill[i, , ] - if (!is.null(skill_significance)) { - base_args[[2]] <- skill_significance[[i]][[1]] + base_args[[1]] <- metric[i, , ] + if (!is.null(metric_significance)) { + base_args[[2]] <- metric_significance[[i]][[1]] significance_caption <- "alpha = 0.05" } else { significance_caption <- NULL diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 0ce32bae..a9badff6 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -4,7 +4,7 @@ ## TODO: Add param 'raw'? ## TODO: Decadal plot names -source("modules/Visualization/R/plot_skill_metrics.R") +source("modules/Visualization/R/plot_metrics.R") source("modules/Visualization/R/get_proj_code.R") ## TODO: Remove after the next s2dv release source("modules/Visualization/R/tmp/PlotRobinson.R") @@ -16,6 +16,7 @@ source("modules/Visualization/plot_data.R") Visualization <- function(recipe, data, skill_metrics = NULL, + statistics = NULL, probabilities = NULL, significance = F, output_conf = NULL) { @@ -62,10 +63,12 @@ Visualization <- function(recipe, dir.create(directory, showWarnings = FALSE, recursive = TRUE) } - if ((is.null(skill_metrics)) && (is.null(data$fcst))) { - error(recipe$Run$logger, "The Visualization module has been called, - but there is no fcst in 'data', and 'skill_metrics' is NULL - so there is no data that can be plotted.") + if ((is.null(skill_metrics)) && (is.null(statistics)) && + (is.null(data$fcst))) { + error(recipe$Run$logger, + paste0("The Visualization module has been called, but there is no ", + "fcst in 'data', and 'skill_metrics' and 'statistics' are ", + "NULL, so there is no data that can be plotted.")) stop() } # Set default single-panel plots if not specified @@ -75,8 +78,8 @@ Visualization <- function(recipe, # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, data$hcst, skill_metrics, outdir, - significance, output_conf = output_conf) + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance, output_conf = output_conf) } else { error(recipe$Run$logger, paste0("The skill metric plots have been requested, but the ", @@ -87,12 +90,12 @@ Visualization <- function(recipe, # Plot statistics if ("statistics" %in% plots) { if (!is.null(statistics)) { - plot_skill_metrics(recipe, data$hcst, statistics, outdir, - significance, output_conf = output_conf) + plot_metrics(recipe, data$hcst, statistics, outdir, + significance, output_conf = output_conf) } else { error(recipe$Run$logger, paste0("The statistics plots have been requested, but the ", - "parameter 'skill_metrics' is NULL")) + "parameter 'statistics' is NULL")) } } diff --git a/tests/testthat/test-seasonal_monthly_statistics.R b/tests/testthat/test-seasonal_monthly_statistics.R index 2db006dc..d45b5ed6 100644 --- a/tests/testthat/test-seasonal_monthly_statistics.R +++ b/tests/testthat/test-seasonal_monthly_statistics.R @@ -28,7 +28,7 @@ Saving(recipe = recipe, data = data, # Plotting suppressWarnings({invisible(capture.output( Visualization(recipe = recipe, data = data, - skill_metrics = statistics, + statistics = statistics, significance = T) ))}) outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) -- GitLab From b33248ed7d3f333f7527d7c59e516436e885c415 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 22 Mar 2024 12:46:01 +0100 Subject: [PATCH 40/43] Merge save_metrics() and save_metrics_scorecards() --- modules/Saving/R/save_metrics.R | 126 +++++++++++++-------- modules/Saving/R/save_metrics_scorecards.R | 79 ------------- modules/Statistics/Statistics.R | 10 +- 3 files changed, 80 insertions(+), 135 deletions(-) delete mode 100644 modules/Saving/R/save_metrics_scorecards.R diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 2a59f2c2..5942ef7e 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -3,7 +3,8 @@ save_metrics <- function(recipe, dictionary = NULL, data_cube, agg = "global", - outdir = NULL) { + outdir = NULL, + module = "skill") { # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. # Define grid dimensions and names @@ -89,59 +90,86 @@ save_metrics <- function(recipe, if (!dir.exists(outdir)) { dir.create(outdir, recursive = T) } - outfile <- get_filename(outdir, recipe, variable, - fcst.sdate, agg, "skill") - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - subset_skill <- lapply(subset_skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - attr(subset_skill[[1]], 'global_attrs') <- global_attributes + if (recipe$Analysis$Output_format == "scorecards") { + for (i in 1:length(subset_skill)) { + if (any('syear' %in% names(dim(subset_skill[[i]])))) { + sdate_dim_save = 'syear' + dates <- data_cube$attrs$Dates + } else { + sdate_dim_save = NULL + dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) + } + ## TODO: Maybe 'scorecards' condition could go here to further simplify + ## the code + extra_string <- get_filename(NULL, recipe, variable, + fcst.sdate, agg, names(subset_skill)[[i]]) + SaveExp(data = subset_skill[[i]], destination = outdir, + Dates = dates, + coords = c(data_cube$coords['longitude'], + data_cube$coords['latitude']), + varname = names(subset_skill)[[i]], + metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, + startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, + ftime_dim = 'time', var_dim = NULL, memb_dim = NULL, + drop_dims = NULL, single_file = TRUE, + extra_string = extra_string) + } + } else { + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, module) + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_skill <- lapply(subset_skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + attr(subset_skill[[1]], 'global_attrs') <- global_attributes - for (i in 1:length(subset_skill)) { - metric <- names(subset_skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - subset_skill[[i]][is.na(subset_skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') + for (i in 1:length(subset_skill)) { + metric <- names(subset_skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_skill[[i]][is.na(subset_skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else if (tolower(agg) == "region") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('region', 'time') + } else { + sdname <- paste0(metric) + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(subset_skill[[i]], 'variables') <- metadata + names(dim(subset_skill[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_skill), outfile) } else if (tolower(agg) == "region") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('region', 'time') + region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) + # TODO: check metadata when more than 1 region is store in the data array + metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name)) + attr(region, 'variables') <- metadata + vars <- list(region, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) } else { - sdname <- paste0(metric) - dims <- c(lalo, 'time') + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(subset_skill[[i]], 'variables') <- metadata - names(dim(subset_skill[[i]])) <- dims - } - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, subset_skill), outfile) - } else if (tolower(agg) == "region") { - region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) - # TODO: check metadata when more than 1 region is store in the data array - metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name)) - attr(region, 'variables') <- metadata - vars <- list(region, time) - vars <- c(vars, subset_skill) - ArrayToNc(vars, outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_skill) - ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, + paste("#####", toupper(module), " METRICS SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_metrics_scorecards.R b/modules/Saving/R/save_metrics_scorecards.R deleted file mode 100644 index 9c133395..00000000 --- a/modules/Saving/R/save_metrics_scorecards.R +++ /dev/null @@ -1,79 +0,0 @@ -save_metrics_scorecards <- function(recipe, - skill, - data_cube, - agg = "global", - outdir = NULL) { - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - - # archive <- get_archive(recipe) - # global_attributes <- .get_global_attributes(recipe, archive) - # ## TODO: Sort out the logic once default behavior is decided - # if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - # (recipe$Analysis$Workflow$Anomalies$compute)) { - # global_attributes <- c(list(from_anomalies = "Yes"), - # global_attributes) - # } else { - # global_attributes <- c(list(from_anomalies = "No"), - # global_attributes) - # } - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - # This needs to be developed: - coords <- c(data_cube$coords['longitude'], - data_cube$coords['latitude']) - # Loop over variable dimension - for (var in 1:data_cube$dims[['var']]) { - # Subset skill arrays - subset_skill <- lapply(skill, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - variable <- data_cube$attrs$Variable$varName[[var]] - outdir <- get_dir(recipe = recipe, variable = variable) - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - - for (i in 1:length(subset_skill)) { - if (any('syear' %in% names(dim(subset_skill[[i]])))) { - sdate_dim_save = 'syear' - dates <- data_cube$attrs$Dates - } else { - sdate_dim_save = NULL - dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) - } - extra_string <- get_filename(NULL, recipe, variable, - fcst.sdate, agg, names(subset_skill)[[i]]) - SaveExp(data = subset_skill[[i]], destination = outdir, - Dates = dates, - coords = coords, - varname = names(subset_skill)[[i]], - metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, - startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, - ftime_dim = 'time', var_dim = NULL, memb_dim = NULL, - drop_dims = NULL, single_file = TRUE, - extra_string = extra_string) - } - } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") -} \ No newline at end of file diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index db907f04..23d0df6a 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -91,13 +91,9 @@ Statistics <- function(recipe, data, agg = 'global') { if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - save_metrics_scorecards(recipe = recipe, skill = statistics, - data_cube = data$hcst, agg = agg) - } else { - save_metrics(recipe = recipe, skill = statistics, - data_cube = data$hcst, agg = agg) - } + save_metrics(recipe = recipe, skill = statistics, + data_cube = data$hcst, agg = agg, + module = "statistics") } # Return results return(statistics) -- GitLab From 5af936bac6875755c2706e4f55efdb8068cbaba6 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 22 Mar 2024 13:03:20 +0100 Subject: [PATCH 41/43] Fix pipeline --- modules/Saving/Saving.R | 1 - modules/Skill/Skill.R | 18 ++++-------------- modules/Statistics/Statistics.R | 2 -- tools/check_recipe.R | 3 ++- 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 60e886b9..4faaa65a 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -14,7 +14,6 @@ source("modules/Saving/R/get_time.R") source("modules/Saving/R/get_latlon.R") source("modules/Saving/R/get_global_attributes.R") source("modules/Saving/R/drop_dims.R") -source("modules/Saving/R/save_metrics_scorecards.R") source("modules/Saving/R/tmp/CST_SaveExp.R") Saving <- function(recipe, data, diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 175d0218..d3da374f 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -367,13 +367,8 @@ Skill <- function(recipe, data, agg = 'global') { if (recipe$Analysis$Workflow$Skill$save == 'all') { corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) if (length(corr_metric_names) == 0) { - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - save_metrics_scorecards(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, agg = agg) - } else { - save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, agg = agg) - } + save_metrics(recipe = recipe, skill = skill_metrics, + data_cube = data$hcst, agg = agg, module = "skill") } else { # Save corr if (length(skill_metrics[corr_metric_names]) > 0) { @@ -382,13 +377,8 @@ Skill <- function(recipe, data, agg = 'global') { } # Save other skill metrics if (length(skill_metrics[-corr_metric_names]) > 0) { - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - save_metrics_scorecards(recipe = recipe, skill = skill_metrics[-corr_metric_names], - data_cube = data$hcst, agg = agg) - } else { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], - data_cube = data$hcst, agg = agg) - } + save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + data_cube = data$hcst, agg = agg) } } } diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 23d0df6a..6f4d61a2 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -1,5 +1,3 @@ - - Statistics <- function(recipe, data, agg = 'global') { # data$hcst: s2dv_cube containing the hindcast # data$obs: s2dv_cube containing the observations diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 7ec50085..4d583ed4 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -73,10 +73,11 @@ check_recipe <- function(recipe) { if (("tas-tos" %in% recipe_variables) && (!is.null(recipe$Analysis$Variables$sic_threshold))) { if (!is.numeric(recipe$Analysis$Variables$sic_threshold) || - dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { + !dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { error(recipe$Run$logger, paste("The element Analysis:Variables:sic_threshold must be a", "numeric value between 0 and 1.")) + error_status <- TRUE } } # Check system names -- GitLab From f89a59cdbd7614997781efc1d75a2c2299f1faeb Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 22 Mar 2024 14:43:47 +0100 Subject: [PATCH 42/43] Change 'skill' parameter name to 'metrics' --- modules/Saving/R/save_metrics.R | 36 ++++++++++++++++----------------- modules/Saving/Saving.R | 2 +- modules/Skill/Skill.R | 4 ++-- modules/Statistics/Statistics.R | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 5942ef7e..9cd1fb0d 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -1,5 +1,5 @@ save_metrics <- function(recipe, - skill, + metrics, dictionary = NULL, data_cube, agg = "global", @@ -80,8 +80,8 @@ save_metrics <- function(recipe, # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { # Subset skill arrays - subset_skill <- lapply(skill, function(x) { - ClimProjDiags::Subset(x, along = 'var', + subset_metric <- lapply(metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) # Generate name of output file @@ -91,8 +91,8 @@ save_metrics <- function(recipe, dir.create(outdir, recursive = T) } if (recipe$Analysis$Output_format == "scorecards") { - for (i in 1:length(subset_skill)) { - if (any('syear' %in% names(dim(subset_skill[[i]])))) { + for (i in 1:length(subset_metric)) { + if (any('syear' %in% names(dim(subset_metric[[i]])))) { sdate_dim_save = 'syear' dates <- data_cube$attrs$Dates } else { @@ -102,12 +102,12 @@ save_metrics <- function(recipe, ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code extra_string <- get_filename(NULL, recipe, variable, - fcst.sdate, agg, names(subset_skill)[[i]]) - SaveExp(data = subset_skill[[i]], destination = outdir, + fcst.sdate, agg, names(subset_metric)[[i]]) + SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, coords = c(data_cube$coords['longitude'], data_cube$coords['latitude']), - varname = names(subset_skill)[[i]], + varname = names(subset_metric)[[i]], metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, ftime_dim = 'time', var_dim = NULL, memb_dim = NULL, @@ -119,16 +119,16 @@ save_metrics <- function(recipe, fcst.sdate, agg, module) # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { - subset_skill <- lapply(subset_skill, function(x) { + subset_metric <- lapply(subset_metric, function(x) { Reorder(x, c(lalo, 'time'))}) } - attr(subset_skill[[1]], 'global_attrs') <- global_attributes + attr(subset_metric[[1]], 'global_attrs') <- global_attributes - for (i in 1:length(subset_skill)) { - metric <- names(subset_skill[i]) + for (i in 1:length(subset_metric)) { + metric <- names(subset_metric[i]) long_name <- dictionary$metrics[[metric]]$long_name missing_val <- -9.e+33 - subset_skill[[i]][is.na(subset_skill[[i]])] <- missing_val + subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val if (tolower(agg) == "country") { sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') @@ -143,20 +143,20 @@ save_metrics <- function(recipe, standard_name = sdname, long_name = long_name, missing_value = missing_val)) - attr(subset_skill[[i]], 'variables') <- metadata - names(dim(subset_skill[[i]])) <- dims + attr(subset_metric[[i]], 'variables') <- metadata + names(dim(subset_metric[[i]])) <- dims } # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, subset_skill), outfile) + ArrayToNc(append(country, time, subset_metric), outfile) } else if (tolower(agg) == "region") { region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) # TODO: check metadata when more than 1 region is store in the data array metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name)) attr(region, 'variables') <- metadata vars <- list(region, time) - vars <- c(vars, subset_skill) + vars <- c(vars, subset_metric) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] @@ -164,7 +164,7 @@ save_metrics <- function(recipe, latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_skill) + vars <- c(vars, subset_metric) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 4faaa65a..1995c4d5 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -74,7 +74,7 @@ Saving <- function(recipe, data, # Export skill metrics if (!is.null(skill_metrics)) { save_metrics(recipe = recipe, - skill = skill_metrics, + metrics = skill_metrics, data_cube = data$hcst, agg = agg, outdir = outdir[var]) } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index d3da374f..3dbbd5f6 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -367,7 +367,7 @@ Skill <- function(recipe, data, agg = 'global') { if (recipe$Analysis$Workflow$Skill$save == 'all') { corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) if (length(corr_metric_names) == 0) { - save_metrics(recipe = recipe, skill = skill_metrics, + save_metrics(recipe = recipe, metrics = skill_metrics, data_cube = data$hcst, agg = agg, module = "skill") } else { # Save corr @@ -377,7 +377,7 @@ Skill <- function(recipe, data, agg = 'global') { } # Save other skill metrics if (length(skill_metrics[-corr_metric_names]) > 0) { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + save_metrics(recipe = recipe, metrics = skill_metrics[-corr_metric_names], data_cube = data$hcst, agg = agg) } } diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 6f4d61a2..085bcdc5 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -89,7 +89,7 @@ Statistics <- function(recipe, data, agg = 'global') { if (recipe$Analysis$Workflow$Statistics$save == 'all') { # Save all statistics - save_metrics(recipe = recipe, skill = statistics, + save_metrics(recipe = recipe, metrics = statistics, data_cube = data$hcst, agg = agg, module = "statistics") } -- GitLab From 338306b33bb81a98223ac6009367d123a6448cf7 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 25 Mar 2024 14:38:54 +0100 Subject: [PATCH 43/43] Included plot characteristics for statistic n_eff in visualization --- modules/Visualization/R/plot_metrics.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 8741e319..8fc84e6f 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -161,6 +161,15 @@ plot_metrics <- function(recipe, data_cube, metrics, col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } else if (name %in% "n_eff") { + metric <- var_metric[[name]] + display_name <- "Effective Sample Size" + max_value <- max(metric) + brks <- max_value * seq(0, 1, by = 0.1) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- NULL } -- GitLab