diff --git a/example_scripts/example_full_crossval_anomalies.R b/example_scripts/example_full_crossval_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..a1db8fb87f79aeeb5001f6ac9d84207c304f729f --- /dev/null +++ b/example_scripts/example_full_crossval_anomalies.R @@ -0,0 +1,30 @@ +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") + +# OPTION 1. If running on terminal, use this: +args = commandArgs(trailingOnly = TRUE) +recipe_file <- "recipes/examples/recipe_full_crossval_anomalies.yml" +recipe <- prepare_outputs(recipe_file) + +# OPTION 2. If using launch_SUNSET.sh, use this: +# args = commandArgs(trailingOnly = TRUE) +# recipe_file <- args[1] +# recipe <- read_atomic_recipe(recipe_file) + +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) + +# Compute anomalies and thresholds in full cross-validation +source("modules/Crossval/Crossval_anomalies.R") +data <- Crossval_anomalies(recipe = recipe, data = data) + +# Skill metrics +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = data, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, probabilities = data$probs) diff --git a/modules/Crossval/R/tmp/RandomWalkTest.R b/modules/Crossval/R/tmp/RandomWalkTest.R index 16d89f6d8b34bf824188677dd4f1728823725eca..7b9ad8b879f6df2c599daf2d9044565e1dc51e87 100644 --- a/modules/Crossval/R/tmp/RandomWalkTest.R +++ b/modules/Crossval/R/tmp/RandomWalkTest.R @@ -28,6 +28,11 @@ #' significance test. The default value is TRUE. #'@param sign A logical value indicating whether to return the statistical #' significance of the test based on 'alpha'. The default value is FALSE. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be FALSE (and the length of the time series will be used), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "skill_A" except "time_dim" (for a particular N.eff to be used for each +#' case). The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -87,7 +92,7 @@ #'@export RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, - sign = FALSE, ncores = NULL) { + sign = FALSE, N.eff = FALSE, ncores = NULL) { # Check inputs ## skill_A and skill_B @@ -126,6 +131,21 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } sign <- TRUE } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | + any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "skill_A" except "time_dim".') + } + } else if (any((!isFALSE(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be FALSE, a numeric, or an array with ', + 'the same dimensions as "skill_A" except "time_dim".') + } + if (!isFALSE(N.eff) & test.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "test.type" is "two.sided.approx".') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -134,23 +154,34 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } ## Compute the Random Walk Test - res <- Apply(data = list(skill_A = skill_A, - skill_B = skill_B), - target_dims = list(skill_A = time_dim, - skill_B = time_dim), - fun = .RandomWalkTest, - test.type = test.type, - alpha = alpha, pval = pval, sign = sign, - ncores = ncores) + if (is.array(N.eff)) { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B, + N.eff = N.eff), + target_dims = list(skill_A = time_dim, + skill_B = time_dim, + N.eff = NULL), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + } else { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + N.eff = N.eff, ncores = ncores) + } return(res) } .RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', - alpha = 0.05, pval = TRUE, sign = FALSE) { + alpha = 0.05, pval = TRUE, N.eff = FALSE, sign = FALSE) { #skill_A and skill_B: [sdate] - - N.eff <- length(skill_A) A_better <- sum(skill_B > skill_A) B_better <- sum(skill_B < skill_A) @@ -159,14 +190,16 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', output$score <- A_better - B_better if (test.type == 'two.sided.approx') { - output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + output$sign <- abs(output$score) > (2 * sqrt(length(skill_A))) } else { - if (!is.na(output$score)) { - p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + if (isFALSE(N.eff)){N.eff <- length(skill_A)} + + if (!is.na(output$score) & isTRUE(N.eff >= A_better)) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, + conf.level = 1 - alpha, alternative = test.type)$p.value - } else { p.val <- NA } diff --git a/recipe_template.yml b/recipe_template.yml index d3882ba7bb81b79bcec167cf9c2cc88470da1d11..b73c62135d800333dddc5af90297f935a426b726 100644 --- a/recipe_template.yml +++ b/recipe_template.yml @@ -133,9 +133,14 @@ Analysis: metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) save: 'all' # Options: 'all', 'none' (Mandatory, str) Probabilities: - percentiles: [[1/3, 2/3], [1/10], [9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: # Thresholds + Terciles: [1/3, 2/3] + P10: [1/10] + P90: [9/10] + Quartiles: [1/4, 2/4, 3/4] # for quantiles and probability categories. Each set of thresholds should be - # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + # named and enclosed within brackets. For now, they are INDEPENDENT from skill metrics, + # except in the Crossval_metrics() function. (Optional) save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) Visualization: plots: skill_metrics, most_likely_terciles, forecast_map, extreme_probabilities # Types of plots to generate (Optional, str) @@ -169,6 +174,7 @@ Analysis: col1_width: NULL # Optional, int: to adjust width of first column in scorecards table col2_width: NULL # Optional, int: to adjust width of second column in scorecards table calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Cross-validation method for full crossval work ncores: 10 # Number of cores to be used in parallel computation. # If left empty, defaults to 1. (Optional, int) remove_NAs: yes # Whether to remove NAs. diff --git a/recipes/examples/recipe_full_crossval_anomalies.yml b/recipes/examples/recipe_full_crossval_anomalies.yml new file mode 100644 index 0000000000000000000000000000000000000000..dc66fb7e667bebf7dc8f337148821164b561b361 --- /dev/null +++ b/recipes/examples/recipe_full_crossval_anomalies.yml @@ -0,0 +1,110 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + #- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1201' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: to_system + cross.method: 'leave-one-out' + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: rpss + save: 'all' + cross_validation: yes + Probabilities: + percentiles: + Terciles: [1/3, 2/3] + P10: [1/10] + P90: [9/10] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_map most_likely_terciles + forecast_method: median + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + # Scorecards: + # execute: no # yes/no + # regions: + # Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + # Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + # Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + # start_months: NULL + # metric: mean_bias enscorr rpss crpss EnsSprErr + # metric_aggregation: 'skill' + # #inf_to_na: yes + # table_label: NULL + # fileout_label: NULL + # col1_width: NULL + # col2_width: NULL + # calculate_diff: FALSE + ncores: 30 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /home/Earth/vagudets/sunset-outputs/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: example_scripts/example_full_crossval_anomalies.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails +