From b80a86cfc9f57bebdb27f4c8a1d0e28db92beb7a Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 7 May 2024 18:08:59 +0200 Subject: [PATCH 01/78] temporal agg --- modules/Aggregation/Aggregation.R | 67 +++++++++++++++++++ .../ex0_1_sample_dataset/ex0_1-recipe.yml | 6 ++ 2 files changed, 73 insertions(+) create mode 100644 modules/Aggregation/Aggregation.R diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R new file mode 100644 index 00000000..40018a7d --- /dev/null +++ b/modules/Aggregation/Aggregation.R @@ -0,0 +1,67 @@ +# recipe +## Time_aggregation$execute YES/NO +## Time_aggregation$method accumulated or average +## Time_aggregation$set for decadal +## Time_aggregation$ini based on forecast time +## Time_aggregation$end based on forecast time +## ini = 2, end = 3 with monthly freq would be a 2 months agg +Aggregation(recipe, data) { + + ncores <- recipe$Analysis$ncores # is it already checked? NULL or number + na.rm <- recipe$Analysis$remove_NAs # is it already checked? TRUE/FALSE + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + # original freq + raw_freq <- tolower(recipe$Analysis$Variable$freq) + custom <- recipe$Analysis$Workflow$Time_aggregation$set + ini <- recipe$Analysis$Workflow$Time_aggregation$ini + end <- recipe$Analysis$Workflow$Time_aggregation$end + method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) +## Instead of raw freq may be better to use ini/end +## if (raw_freq %in% c('daily', 'daily_mean', 'weekly', 'weekly_mean')) { + if (!is.null(ini) && !is.null(end)) { + # calculate aggregations from fday1 to fday2 + # calculate aggregations from fweek1 to fweek2 + # other cases could be indicators or other original freq + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method = method, + na.rm = na.rm, ncores = ncores, + ini = ini, end = end) + } else { + result <- data[x] + }}, simplify = TRUE) + + } else if (!is.null(custom)) { + # caluclate aggreagtions from fmonth1 to ftmonth2 + # for decadal, it makes sense to get all seasons and annual means too + + } else { + # higher frequencies are typically used to compute indicators + # lower frequencies are not typically stored in esarchive + } + } +} + +agg_ini_end <- function(x, ini, end, method, na.rm , ncores) { + if (method == 'average') { + x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', + function(y) { + mean(y[ini:end], na.rm = na.rm)}, + ncores = ncores) + } else if (method == 'accumulated') { + x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', + function(y) { + sum(y[ini:end], na.rm = na.rm)}, + ncores = ncores) + } else { + stop("Unknown method") + } + dims <- x[[1]]$dims + dims['time'] <- length(ini:end) + x[[1]]$dims <- dims + x[[1]]$coords$time <- ini:end + attr(x[[1]]$coords$time, "indices") <- TRUE + x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = ini:end) + return(x) +} diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 662e75cc..bedb1983 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -28,6 +28,12 @@ Analysis: method: bilinear type: to_system Workflow: + Time_aggregation: + execute: yes + set: no + method: average + ini: 2 + end: 3 Anomalies: compute: yes cross_validation: yes -- GitLab From 2ceaaca30ca423f1a24f09978c92ec2d56b3b2a9 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 8 May 2024 18:16:11 +0200 Subject: [PATCH 02/78] ini and end and list of indices --- modules/Aggregation/Aggregation.R | 86 +++++++++++++------ .../ex0_1_sample_dataset/ex0_1-recipe.yml | 8 +- 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 40018a7d..6a026e57 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -1,18 +1,21 @@ # recipe ## Time_aggregation$execute YES/NO ## Time_aggregation$method accumulated or average -## Time_aggregation$set for decadal +## Time_aggregation$user_def for decadal ## Time_aggregation$ini based on forecast time ## Time_aggregation$end based on forecast time +# For date definition of period aggregation consider using Indicators module ## ini = 2, end = 3 with monthly freq would be a 2 months agg -Aggregation(recipe, data) { +Aggregation <- function(recipe, data) { +## Do we need this checks here? ncores <- recipe$Analysis$ncores # is it already checked? NULL or number - na.rm <- recipe$Analysis$remove_NAs # is it already checked? TRUE/FALSE + na.rm <- ifelse(is.null(recipe$Analysis$remove_NAs), + TRUE, recipe$Analysis$remove_NAs) # is it already checked? TRUE/FALSE if (recipe$Analysis$Workflow$Time_aggregation$execute) { # original freq raw_freq <- tolower(recipe$Analysis$Variable$freq) - custom <- recipe$Analysis$Workflow$Time_aggregation$set + custom <- recipe$Analysis$Workflow$Time_aggregation$user_def ini <- recipe$Analysis$Workflow$Time_aggregation$ini end <- recipe$Analysis$Workflow$Time_aggregation$end method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) @@ -22,46 +25,73 @@ Aggregation(recipe, data) { # calculate aggregations from fday1 to fday2 # calculate aggregations from fweek1 to fweek2 # other cases could be indicators or other original freq - res <- sapply(1:length(data), function(x) { - if (!is.null(data[[x]])) { - result <- agg_ini_end(data[x], method = method, - na.rm = na.rm, ncores = ncores, - ini = ini, end = end) - } else { - result <- data[x] - }}, simplify = TRUE) - - } else if (!is.null(custom)) { + if (length(ini) == length(end)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method = method, + na.rm = na.rm, ncores = ncores, + ini = ini, end = end, indices = NULL) + } else { + result <- data[x] + }}, simplify = TRUE) + } + } else if (!is.ll(custom)) { # caluclate aggreagtions from fmonth1 to ftmonth2 # for decadal, it makes sense to get all seasons and annual means too - + ## Ex: January, February, March for decadal simulations aggregation + ## custom <- sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + if (is.list(custom)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method, na.rm, ncores, + indices = custom, ini = NULL, end = NULL) + } else { + result <- data[x]}}, simplify = TRUE) + } else { + stop("Why Time_aggregation$user_def is not a list?") + } } else { # higher frequencies are typically used to compute indicators # lower frequencies are not typically stored in esarchive + stop("Temporal aggregation not implemented yet") } } } -agg_ini_end <- function(x, ini, end, method, na.rm , ncores) { +agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { + # to make the code work with both options: + if (!is.null(ini) && is.null(indices)) { + # list of vectors for the indices from ini to end pairs + indices <- lapply(1:length(ini), function(x) { + ini[x]:end[x]}) + } else { + # take the firs and las element of each indices list for time_bounds saving + ini <- unlist(lapply(indices, function(x){x[1]})) + end <- unlist(lapply(indices, function(x){x[length(x)]})) + } if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y) { - mean(y[ini:end], na.rm = na.rm)}, - ncores = ncores) + function(y, ind) { + sapply(1:length(indices), function(z) { + mean(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', + ncores = ncores)$output1 } else if (method == 'accumulated') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y) { - sum(y[ini:end], na.rm = na.rm)}, - ncores = ncores) + function(y, ind) { + sapply(1:length(indices), function(z) { + sum(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', + ncores = ncores)$output1 } else { stop("Unknown method") - } - dims <- x[[1]]$dims - dims['time'] <- length(ini:end) - x[[1]]$dims <- dims - x[[1]]$coords$time <- ini:end + } + x[[1]]$dims <- dim(x[[1]]$data) + x[[1]]$coords$time <- 1:length(ini) attr(x[[1]]$coords$time, "indices") <- TRUE + x[[1]]$attrs$time_bounds$start <- x[[1]]$attrs$Dates + x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = end) x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = ini:end) + indices = ini) + return(x) } diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index bedb1983..7c8fcb2d 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -30,10 +30,12 @@ Analysis: Workflow: Time_aggregation: execute: yes - set: no + user_def: + - [1, 3] + - !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) method: average - ini: 2 - end: 3 + ini: [1, 2, 1] + end: [2, 3, 3] Anomalies: compute: yes cross_validation: yes -- GitLab From e156dfa8b29cd595353dc290c701194105639c67 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 9 May 2024 17:27:43 +0200 Subject: [PATCH 03/78] fix attrs and add example --- example_scripts/exec_timeagg.R | 41 +++++++++++ modules/Aggregation/Aggregation.R | 9 +-- modules/Visualization/R/plot_metrics.R | 9 ++- .../examples/recipe_tas_seasonal_timeagg.yml | 70 +++++++++++++++++++ 4 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 example_scripts/exec_timeagg.R create mode 100644 recipes/examples/recipe_tas_seasonal_timeagg.yml diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R new file mode 100644 index 00000000..1f34193c --- /dev/null +++ b/example_scripts/exec_timeagg.R @@ -0,0 +1,41 @@ +rm(list=ls()) +gc() +setwd("/esarchive/scratch/nperez/git/auto-s2s") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") +source("modules/Aggregtation/Aggregation.") + +# Read recipe +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe <- read_atomic_recipe(recipe_file) +## to test a single recipe: + # recipe_file <- "recipes/examples/recipe_tas_seasonal_timeagg.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" + +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +# Units transformation +source("modules/Units/Units.R") +test <- Units(recipe, data) +# Temporal aggregation +test <- Aggregation(recipe = recipe, data = data) +# Calibrate datasets +data <- Calibration(recipe, test) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Export all data to netCDF +## TODO: Fix plotting +# save_data(recipe, data, skill_metrics, probabilities) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 6a026e57..445d1d55 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -86,12 +86,13 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { } x[[1]]$dims <- dim(x[[1]]$data) x[[1]]$coords$time <- 1:length(ini) - attr(x[[1]]$coords$time, "indices") <- TRUE - x[[1]]$attrs$time_bounds$start <- x[[1]]$attrs$Dates + attr(x[[1]]$coords$time, "indices") <- TRUE + tmp_dates <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = ini) + x[[1]]$attrs$time_bounds$start <- tmp_dates x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', indices = end) - x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = ini) + x[[1]]$attrs$Dates <- tmp_dates return(x) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 92b707fa..e909a1f9 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -206,7 +206,14 @@ plot_metrics <- function(recipe, data_cube, metrics, # Define titles toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) - titles <- as.vector(months) + ## time_bounds in data_cube to know if Time_aggregation was applied + if (!is.null(data_cube$attrs$time_bounds$start)) { + info(logger, paste("Time_bounds found in data_cube", + "using them for titles")) + titles <- "What" + } else { + titles <- as.vector(months) + } ## TODO: Combine PlotLayout with PlotRobinson? suppressWarnings( PlotLayout(PlotEquiMap, c('longitude', 'latitude'), diff --git a/recipes/examples/recipe_tas_seasonal_timeagg.yml b/recipes/examples/recipe_tas_seasonal_timeagg.yml new file mode 100644 index 00000000..b62ac79a --- /dev/null +++ b/recipes/examples/recipe_tas_seasonal_timeagg.yml @@ -0,0 +1,70 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: K + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2009' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 3, 2] + end: [3, 6, 4] + Calibration: + execute: yes + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ -- GitLab From f7baf09576063ceb6fcce9a40483e5ec95c650f6 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 9 May 2024 18:44:38 +0200 Subject: [PATCH 04/78] plotting attr in time_bound --- example_scripts/exec_timeagg.R | 6 +-- modules/Aggregation/Aggregation.R | 6 ++- modules/Visualization/R/plot_metrics.R | 52 ++++++++++++++----- .../ex0_1_sample_dataset/ex0_1-recipe.yml | 4 +- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 1f34193c..3b688985 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -25,11 +25,11 @@ recipe <- prepare_outputs(recipe_file) data <- Loading(recipe) # Units transformation source("modules/Units/Units.R") -test <- Units(recipe, data) +data <- Units(recipe, data) # Temporal aggregation -test <- Aggregation(recipe = recipe, data = data) +data <- Aggregation(recipe = recipe, data = data) # Calibrate datasets -data <- Calibration(recipe, test) +data <- Calibration(recipe, data) # Compute skill metrics skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 445d1d55..c45370fd 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -64,10 +64,12 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { # list of vectors for the indices from ini to end pairs indices <- lapply(1:length(ini), function(x) { ini[x]:end[x]}) + plotting_attr <- list(ini_ftime = ini, end_ftime = end) } else { # take the firs and las element of each indices list for time_bounds saving ini <- unlist(lapply(indices, function(x){x[1]})) end <- unlist(lapply(indices, function(x){x[length(x)]})) + plotting_attr <- list(names(indices)) } if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', @@ -92,7 +94,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { x[[1]]$attrs$time_bounds$start <- tmp_dates x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', indices = end) - x[[1]]$attrs$Dates <- tmp_dates - + attributes(x[[1]]$attrs$time_bounds)$plotting_attr <- plotting_attr + x[[1]]$attrs$Dates <- tmp_dates return(x) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index e909a1f9..87baaebe 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -172,7 +172,6 @@ plot_metrics <- function(recipe, data_cube, metrics, col_sup <- NULL } - # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, @@ -207,10 +206,15 @@ plot_metrics <- function(recipe, data_cube, metrics, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) ## time_bounds in data_cube to know if Time_aggregation was applied - if (!is.null(data_cube$attrs$time_bounds$start)) { - info(logger, paste("Time_bounds found in data_cube", - "using them for titles")) - titles <- "What" + if (!is.null(attributes(data_cube$attrs$time_bounds))) { + info(recipe$Run$logger, "Using plotting attrs from time_bounds.") + titles <- unlist( + lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), + function(x) { + paste("Forecast time", + attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], + "to", + attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) } else { titles <- as.vector(months) } @@ -273,17 +277,37 @@ plot_metrics <- function(recipe, data_cube, metrics, # Loop over forecast times for (i in 1:dim(metric)[['time']]) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 + # Case without time aggregation: + if (is.null(attributes(data_cube$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Define plot title + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", months[i], "/", + hcst_period) + } else { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", + hcst_period) } - forecast_time <- sprintf("%02d", forecast_time) - # Define plot title - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", months[i], "/", - hcst_period) # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 7c8fcb2d..d36b0324 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -31,8 +31,8 @@ Analysis: Time_aggregation: execute: yes user_def: - - [1, 3] - - !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + ftimes: [1, 3] + JJA: !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) method: average ini: [1, 2, 1] end: [2, 3, 3] -- GitLab From 36e11d219bab785da5843975eef9bf1b1f0d35b2 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 11:41:00 +0200 Subject: [PATCH 05/78] working version for ini and end plots --- example_scripts/exec_timeagg.R | 9 ++-- modules/Visualization/R/plot_ensemble_mean.R | 40 +++++++++++++--- .../R/plot_most_likely_terciles_map.R | 46 +++++++++++++++---- 3 files changed, 77 insertions(+), 18 deletions(-) diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 3b688985..6444bc70 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -1,6 +1,5 @@ rm(list=ls()) gc() -setwd("/esarchive/scratch/nperez/git/auto-s2s") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -9,7 +8,8 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("tools/prepare_outputs.R") -source("modules/Aggregtation/Aggregation.") +source("modules/Units/Units.R") +source("modules/Aggregation/Aggregation.R") # Read recipe #args = commandArgs(trailingOnly = TRUE) @@ -24,7 +24,6 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) # Units transformation -source("modules/Units/Units.R") data <- Units(recipe, data) # Temporal aggregation data <- Aggregation(recipe = recipe, data = data) @@ -38,4 +37,6 @@ probabilities <- Probabilities(recipe, data) ## TODO: Fix plotting # save_data(recipe, data, skill_metrics, probabilities) # Plot data -Visualization(recipe, data, skill_metrics, probabilities, significance = T) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) + diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 59881864..cd4d9bb7 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -118,8 +118,26 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) + } else { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { @@ -174,11 +192,15 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Loop over forecast times for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -209,7 +231,13 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } # Modify base arguments base_args[[1]] <- i_var_ens_mean[i, , ] - fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + } else { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } base_args$mask <- mask_i base_args$dots <- dots_i # Plot diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index c9ce70f2..ea0f2b90 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -42,7 +42,6 @@ plot_most_likely_terciles <- function(recipe, ## TODO: Sort out decadal initial month (is it always January?) init_month <- 1 } - # Retrieve and rearrange probability bins for the forecast if (is.null(probabilities$probs_fcst$prob_b33) || is.null(probabilities$probs_fcst$prob_33_to_66) || @@ -113,8 +112,27 @@ plot_most_likely_terciles <- function(recipe, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / Initialization: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F,locale = "en_GB") + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + } else { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } + years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { ## TODO: Ensure this works for daily and sub-daily cases @@ -170,11 +188,16 @@ plot_most_likely_terciles <- function(recipe, base_args[names(output_configuration)] <- output_configuration for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) + # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -196,7 +219,14 @@ plot_most_likely_terciles <- function(recipe, format(as.Date(i_syear, format="%Y%m%d"), "%d-%m-%Y")) # Plot - fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + } else { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } + base_args$probs <- i_var_probs[i, , , ] base_args$mask <- mask_i base_args$dots <- dots_i -- GitLab From 27578fe3789f835dc9bcc8afc739ccdb97e0273e Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 15:54:00 +0200 Subject: [PATCH 06/78] tesing decadal plots and user_def param --- example_scripts/exec_timeagg.R | 2 +- example_scripts/test_decadal.R | 11 +-- modules/Aggregation/Aggregation.R | 2 +- modules/Visualization/R/plot_ensemble_mean.R | 69 ++++++++++++------- modules/Visualization/R/plot_metrics.R | 47 ++++++++----- .../R/plot_most_likely_terciles_map.R | 61 ++++++++++------ recipes/atomic_recipes/recipe_decadal.yml | 28 +++++--- .../examples/recipe_prlr_seasonal_timeagg.yml | 63 +++++++++++++++++ tools/prepare_outputs.R | 2 +- tools/read_atomic_recipe.R | 2 +- 10 files changed, 205 insertions(+), 82 deletions(-) create mode 100644 recipes/examples/recipe_prlr_seasonal_timeagg.yml diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 6444bc70..66df62b8 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -17,7 +17,7 @@ source("modules/Aggregation/Aggregation.R") #recipe <- read_atomic_recipe(recipe_file) ## to test a single recipe: # recipe_file <- "recipes/examples/recipe_tas_seasonal_timeagg.yml" - # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_timeagg.yml" recipe <- prepare_outputs(recipe_file) diff --git a/example_scripts/test_decadal.R b/example_scripts/test_decadal.R index 12daa540..de9eae6a 100644 --- a/example_scripts/test_decadal.R +++ b/example_scripts/test_decadal.R @@ -9,7 +9,7 @@ source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") - +source("modules/Aggregation/Aggregation.R") recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive @@ -17,16 +17,19 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) +data <- Aggregation(recipe = recipe, data = data) # Calibrate datasets calibrated_data <- Calibration(recipe, data) # Compute skill metrics -skill_metrics <- Skill(recipe, calibrated_data) +skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins -probabilities <- Probabilities(recipe, calibrated_data) +probabilities <- Probabilities(recipe, data) # Plot data -Visualization(recipe, calibrated_data, skill_metrics, probabilities, +Visualization(recipe = recipe, data = data, + skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index c45370fd..cb23a0dd 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -35,7 +35,7 @@ Aggregation <- function(recipe, data) { result <- data[x] }}, simplify = TRUE) } - } else if (!is.ll(custom)) { + } else if (!is.null(custom)) { # caluclate aggreagtions from fmonth1 to ftmonth2 # for decadal, it makes sense to get all seasons and annual means too ## Ex: January, February, March for decadal simulations aggregation diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index cd4d9bb7..261dcc74 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -123,20 +123,24 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) } else { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) @@ -199,7 +203,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } forecast_time <- sprintf("%02d", forecast_time) } else { - forecast_time <- months + forecast_time <- months } # Get mask subset if (!is.null(mask)) { @@ -213,14 +217,23 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { dots_i <- NULL } - # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Ensemble Mean / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + # Define plot title + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], + " / Start date: ", + i_syear) + } # Define caption if (identical(fun, PlotRobinson)) { ## TODO: Customize technical details @@ -234,9 +247,13 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o if (is.null(attributes(fcst$attrs$time_bounds))) { fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") } else { - fileout <- paste0(outfile, "_ft", - attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", - attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } } base_args$mask <- mask_i base_args$dots <- dots_i diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 87baaebe..2df06e8b 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -208,13 +208,17 @@ plot_metrics <- function(recipe, data_cube, metrics, ## time_bounds in data_cube to know if Time_aggregation was applied if (!is.null(attributes(data_cube$attrs$time_bounds))) { info(recipe$Run$logger, "Using plotting attrs from time_bounds.") - titles <- unlist( + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + titles <- unlist( lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), function(x) { paste("Forecast time", attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], "to", attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) + } else { + titles <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]] + } } else { titles <- as.vector(months) } @@ -291,22 +295,31 @@ plot_metrics <- function(recipe, data_cube, metrics, "\n", display_name, "/", months[i], "/", hcst_period) } else { - forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] - forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) - # title names: - forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) - forecast_time_ini <- month.name[forecast_time_ini] - forecast_time_end <- init_month + forecast_time_end - 1 - forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) - forecast_time_end <- month.name[forecast_time_end] - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", forecast_time_ini, "to", - forecast_time_end, "/", - hcst_period) + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", + hcst_period) + } else { + forecast_time <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]][i] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", + forecast_time, "/", + hcst_period) + } } # Modify base arguments base_args[[1]] <- metric[i, , ] diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index ea0f2b90..5455f0f6 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -117,20 +117,24 @@ plot_most_likely_terciles <- function(recipe, fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) } else { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) @@ -211,20 +215,33 @@ plot_most_likely_terciles <- function(recipe, dots_i <- NULL } # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Most Likely Tercile / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], + " / Start date: ", + i_syear) + } # Plot if (is.null(attributes(fcst$attrs$time_bounds))) { fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") } else { - fileout <- paste0(outfile, "_ft", + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } } base_args$probs <- i_var_probs[i, , , ] diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 26312b34..fe0f2479 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -18,7 +18,7 @@ Analysis: hcst_start: 1990 hcst_end: 1993 # season: 'Annual' - ftime_min: 2 + ftime_min: 1 ftime_max: 24 Region: latmin: 10 #-90 @@ -29,29 +29,39 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) + Y1: !expr 1:12 Anomalies: compute: no cross_validation: save: Calibration: method: bias - save: 'all' + save: 'none' Skill: - metric: RPSS Corr - save: 'all' + metric: RPSS EnsCorr Mean_Bias + save: 'none' Probabilities: percentiles: [[1/3, 2/3]] - save: 'all' + save: 'none' Indicators: index: FALSE Visualization: plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles - ncores: # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/nperez/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/recipes/examples/recipe_prlr_seasonal_timeagg.yml b/recipes/examples/recipe_prlr_seasonal_timeagg.yml new file mode 100644 index 00000000..e03428ac --- /dev/null +++ b/recipes/examples/recipe_prlr_seasonal_timeagg.yml @@ -0,0 +1,63 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + units: mm + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index c571a3b8..c545542a 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -28,7 +28,7 @@ prepare_outputs <- function(recipe_file, # recipe_file: path to recipe YAML file # disable_checks: If TRUE, does not perform checks on recipe # disable_uniqueID: If TRUE, does not add a unique ID to output dir - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R index fb26cb11..3cb11e7e 100644 --- a/tools/read_atomic_recipe.R +++ b/tools/read_atomic_recipe.R @@ -23,7 +23,7 @@ read_atomic_recipe <- function(recipe_file) { # recipe_file: path to recipe YAML file - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) # Create log file for atomic recipe -- GitLab From 7857d1adfa4ff54727178a5dc69741733ce45512 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 16:38:01 +0200 Subject: [PATCH 07/78] test saving --- recipes/atomic_recipes/recipe_decadal.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index fe0f2479..2028fc46 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -44,13 +44,13 @@ Analysis: save: Calibration: method: bias - save: 'none' + save: 'all' Skill: metric: RPSS EnsCorr Mean_Bias - save: 'none' + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] - save: 'none' + save: 'all' Indicators: index: FALSE Visualization: -- GitLab From 9863ac0850e01eba8c4725f58e4cee0ac90a6f2c Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 14 May 2024 17:46:01 +0200 Subject: [PATCH 08/78] fix dimensions order --- modules/Aggregation/Aggregation.R | 6 + .../recipe-seasonal_monthly_1_timeagg.yml | 61 ++++++ .../testthat/test-seasonal_monthly_timeagg.R | 177 ++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 tests/recipes/recipe-seasonal_monthly_1_timeagg.yml create mode 100644 tests/testthat/test-seasonal_monthly_timeagg.R diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index cb23a0dd..5eab4aff 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -71,6 +71,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { end <- unlist(lapply(indices, function(x){x[length(x)]})) plotting_attr <- list(names(indices)) } + original_dims <- names(dim(x[[1]]$data)) if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { @@ -86,6 +87,11 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { } else { stop("Unknown method") } + # Check in case only 1 aggregation only is requested to add time dim back: + if (!('time' %in% names(dim(data[[1]]$data)))) { + dim(x[[1]]$data) <- c( dim(x[[1]]$data), time = 1) + } + x[[1]]$data <- Reorder(x[[1]]$data, original_dims) x[[1]]$dims <- dim(x[[1]]$data) x[[1]]$coords$time <- 1:length(ini) attr(x[[1]]$coords$time, "indices") <- TRUE diff --git a/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml new file mode 100644 index 00000000..0cd5746f --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml @@ -0,0 +1,61 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 4 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Time_aggregation: + execute: yes #do we need this? + method: average #accumulated + ini: [1, 2] # aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + end: [3, 4] + + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: Robinson + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/tests/testthat/test-seasonal_monthly_timeagg.R b/tests/testthat/test-seasonal_monthly_timeagg.R new file mode 100644 index 00000000..2b08d388 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_timeagg.R @@ -0,0 +1,177 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_timeagg.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Aggregated data +suppressWarnings({invisible(capture.output( +aggregated_data <- Aggregation(recipe, data) +))}) + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- Calibration(recipe, aggregated_data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, calibrated_data) +))}) + +suppressWarnings({invisible(capture.output( +probs <- Probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +Saving(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Time Aggregation", { +expect_equal( +is.list(aggregated_data), +TRUE +) +expect_equal( +names(aggregated_data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(aggregated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(aggregated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(aggregated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 2, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(aggregated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 2, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +mean(aggregated_data$fcst$data), +291.6194, +tolerance = 0.0001 +) +expect_equal( +mean(aggregated_data$hcst$data), +290.6241, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(aggregated_data$hcst$data)[1, , 2, 3, 4]), +c(290.5897, 290.2554), +tolerance = 0.0001 +) + +expect_equal( +range(aggregated_data$fcst$data), +c(287.4032, 296.3530), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", + "enscorr_specs") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(var = 1, time = 2, latitude = 3, longitude = 3) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(skill_metrics$rpss[, , 2, 3]), +c(-1.384229, -1.147657), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$rpss_significance[, , 2, 3]), +rep(FALSE, 2) +) + +}) + +test_that("4. Saving", { +outputs <- paste0(recipe$Run$output_dir, "/outputs/") +expect_equal( +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc")), +TRUE +) +expect_equal( +length(list.files(outputs, recursive = T)), +17 +) + +}) + +test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("crpss-november.pdf", "enscorr_specs-november.pdf", "enscorr-november.pdf", + "forecast_ensemble_mean-20201101.pdf", "forecast_most_likely_tercile-20201101.pdf", + "rpss-november.pdf")), +TRUE +) +expect_equal( +length(list.files(plots, recursive = T)), +6 +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) -- GitLab From 616121f3710422f905aa4d78ee36d1b1522b2f88 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 May 2024 12:48:58 +0200 Subject: [PATCH 09/78] test time_bounds attributes --- .../testthat/test-seasonal_monthly_timeagg.R | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/testthat/test-seasonal_monthly_timeagg.R b/tests/testthat/test-seasonal_monthly_timeagg.R index 2b08d388..21977d9c 100644 --- a/tests/testthat/test-seasonal_monthly_timeagg.R +++ b/tests/testthat/test-seasonal_monthly_timeagg.R @@ -57,6 +57,38 @@ is.list(aggregated_data), TRUE ) expect_equal( +"time_bounds" %in% names(aggregated_data$hcst$attrs), +TRUE +) +expect_equal( +is.list(aggregated_data$hcst$attrs$time_bounds), +TRUE +) +expect_equal( +names(aggregated_data$hcst$attrs$time_bounds), +c("start", "end") +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$time_bounds$end) +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$Dates) +) +expect_equal( +"plotting_attr" %in% names(attributes(aggregated_data$hcst$attrs$time_bounds)), +TRUE +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$ini_ftime, +c(1,2) +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$end_ftime, +c(3,4) +) +expect_equal( names(aggregated_data), c("hcst", "fcst", "obs") ) -- GitLab From 6472d20fe272fd59c4b6a00d86723b84284ecb33 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 May 2024 17:52:56 +0200 Subject: [PATCH 10/78] check recipe --- tools/check_recipe.R | 73 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index be38ae71..68637281 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -301,7 +301,78 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- - + # Time_Aggregation + if ("Time_aggregation" %in% names(recipe$Analysis$Workflow)) { + if (!("execute" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + recipe$Analysis$Workflow$Time_aggregation$execute <- FALSE + } else { + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + if (!("method" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + "No Time_aggregation method defined in recipe.") + } else { + if (!tolower(recipe$Analysis$Workflow$Time_aggregation$method) %in% + c('average', 'accumulation')) { + error(recipe$Run$logger, + "Time_aggregation method should be either average or accumulation.") + } + } + if ((any(!c('ini','end') %in% names(recipe$Analysis$Workflow$Time_aggregation)) && + !'user_def' %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + paste("Missing parameters in Time aggregation section", + "add 'ini' and 'end' or 'user_def'.")) + } + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + if (!is.numeric(recipe$Analysis$Workflow$Time_aggregation$ini) || + !is.numeric(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + "ini and end in Time aggregation need to be numeric") + } else { + if (length(recipe$Analysis$Workflow$Time_aggregation$ini) != + length(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + paste("The numeber of ini and end parameters need to be", + "the same for Time aggregation")) + } + max_ind <- max(c(recipe$Analysis$Workflow$Time_aggregation$ini, + recipe$Analysis$Workflow$Time_aggregation$end)) + if (recipe$Analysis$Time$ftime_max < max_ind) { + error(recipe$Run$logger, + paste("Forecast time max required is smaller than indices", + "requested for time aggregation")) + } + } + } + if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if (c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL + warn(recipe$Run$logger, + "Only 'ini' and 'end' indices are used for time agg.") + } + if (is.list(recipe$Analysis$Workflow$Time_aggregation$user_def)) { + if (is.character(names(recipe$Analysis$Workflow$Time_aggregation$user_def))) { + if (!all(lapply(recipe$Analysis$Workflow$Time_aggregation$user_def, + is.numeric))) { + error(recipe$Run$logger, + "User_def indices in Time aggregation are expected", + "to be numeric.") + } + } else { + warn(recipe$Run$logger, + paste("Not names provided to the list of user_def indices", + "Time aggregation, review plots and file names.")) + } + } else { + error(recipe$Run$logger, + "user_def in Time aggregation should be a named list") + } + } + } + } + } # Calibration # If 'method' is FALSE/no/'none' or NULL, set to 'raw' ## TODO: Review this check -- GitLab From 769cff64918dda33bb52e2c231b64f0010cb70e8 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 May 2024 12:17:43 +0200 Subject: [PATCH 11/78] fix if condition --- tools/check_recipe.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 68637281..0a412b61 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -346,8 +346,8 @@ check_recipe <- function(recipe) { } } if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { - if (c('ini', 'end') %in% - tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL warn(recipe$Run$logger, "Only 'ini' and 'end' indices are used for time agg.") -- GitLab From 8921318624fe2e6d8f4128bb0ce7928a36414fb8 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 28 May 2024 12:45:27 +0200 Subject: [PATCH 12/78] split cross --- .gitignore | 1 + Skill.R | 153 ++++++++++++++++ crossval.R | 150 +++++++++++++++ full_ecvs_mask.R | 435 ++++++++++++++++++++++++++++++++++++++++++++ full_ecvs_oper.R | 294 ++++++++++++++++++++++++++++++ recipe_tas_oper.yml | 98 ++++++++++ 6 files changed, 1131 insertions(+) create mode 100644 Skill.R create mode 100644 crossval.R create mode 100644 full_ecvs_mask.R create mode 100644 full_ecvs_oper.R create mode 100644 recipe_tas_oper.yml diff --git a/.gitignore b/.gitignore index 263c4e64..f05c28f4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ out-logs/ *.swp *.swo ecsbatch.log* +*.nc modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml diff --git a/Skill.R b/Skill.R new file mode 100644 index 00000000..2c1f1d6f --- /dev/null +++ b/Skill.R @@ -0,0 +1,153 @@ +fair <- TRUE + +## START SKILL ASSESSMENT: +# RPS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, + ncores = recipe$Analysis$ncores) +cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_cal_obs_tr, + ncores = recipe$Analysis$ncores) +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 +# RPSS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', Fair = fair, nmemb = nmemb, + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, + sdate_dim = 'syear', memb_dim = 'ensemble') +RPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + Fair = fair + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +# Corr +source("modules/Skill/R/tmp/Corr.R") +enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = sdate_dim)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + +# RMSE +rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + +skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + diff --git a/crossval.R b/crossval.R new file mode 100644 index 00000000..4a725457 --- /dev/null +++ b/crossval.R @@ -0,0 +1,150 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages + +## data dimensions +sdate_dim <- dim(data$hcst$data)['syear'] +nmemb <- dim(data$hcst$data)['ensemble'] +nftime <- dim(data$hcst$data)['time'] + +cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) +outdim <- length(cross) # leave-one-out should be equal to sdate_dim + +# What we need to return? + +if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] +} else if ('region' %in% names(dim(data$hcst$data))) { + nregions <- dim(data$hcst$data)['region'] +} +ano_hcst_ev_res <- NULL +ano_obs_ev_res <- NULL +ano_obs_tr_res <- NULL + +cal_hcst_ev_res <- NULL +cal_hcst_tr_res <- NULL +obs_ev_res <- NULL +obs_tr_res <- NULL + +lims_ano_hcst_tr_res <- NULL +lims_ano_obs_tr_res <- NULL + +lims_cal_hcst_tr_res <- NULL +lims_cal_obs_tr_res <- NULL + +for (t in 1:outdim) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + if (anomalies_comp) { + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = recipe$Analysis$ncores) + #rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", "hcst_ev") + #Category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + lims_ano_hcst_tr_res <- abind(lims_ano_hcst_tr_res, lims_ano_hcst_tr, + along = length(dim(lims_ano_hcst_tr)) + 1) + lims_ano_obs_tr_res <- abind(lims_ano_obs_tr_res, lims_ano_obs_tr, + along = length(dim(lims_ano_obs_tr)) + 1) + } + if (calibraion_comp) { + cal_hcst_tr <- Calibration(hcst_tr, obs_tr, exp_cor = NULL, + cal.method = "mse_min", eval.method = "in-sample", + multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'ensemble', sdate_dim = 'syear', + dat_dim = NULL, + ncores = recipe$Analysis$ncores) + cal_hcst_ev <- Calibration(hcst_tr, obs_tr, exp_cor = hcst_ev, + cal.method = "mse_min", + eval.method = "hindcast-vs-forecast", + multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'ensemble', sdate_dim = 'syear', + dat_dim = NULL, + ncores = recipe$Analysis$ncores) + #Category limits + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + lims_cal_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + cal_hcst_tr <- Subset(cal_hcst_tr, indices = 1, along = 'syear', drop = 'selected') + cal_hcst_tr_res <- abind(cal_hcst_tr_res, cal_hcst_tr, + along = length(dim(cal_hcst_tr)) + 1) + names(dim(cal_hcst_tr_res)) <- c(names(dim(cal_hcst_tr)), names(sdate_dim)) + cal_hcst_ev <- Subset(cal_hcst_ev, indices = 1, along = 'syear', drop = 'selected') + cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, + along = length(dim(cal_hcst_ev)) + 1) + names(dim(cal_hcst_ev_res)) <- c(names(dim(cal_hcst_ev)), names(sdate_dim)) + + obs_tr_res <- abind(obs_tr_res, obs_tr, + along = length(dim(obs_tr)) + 1) + names(dim(obs_tr_res)) <- c(names(dim(obs_tr)), names(sdate_dim)) + lims_cal_hcst_tr_res <- abind(lims_cal_hcst_tr_res, lims_cal_hcst_tr, + along = length(dim(lims_cal_hcst_tr)) + 1) + names(dim(lims_cal_hcst_tr_res)) <- c(names(dim(lims_cal_hcst_tr)), names(sdate_dim)) + lims_cal_obs_tr <- Subset(lims_cal_obs_tr, indices = 1, along = 'ensemble', drop = 'selected') + lims_cal_obs_tr_res <- abind(lims_cal_obs_tr_res, lims_cal_obs_tr, + along = length(dim(lims_cal_obs_tr)) + 1) + names(dim(lims_cal_obs_tr_res)) <- c(names(dim(lims_cal_obs_tr)), names(sdate_dim)) + } + gc() + res <- list(ano_hcst_ev = ano_hcst_ev_res, + ano_obs_ev = ano_obs_ev_res, + ano_obs_tr = ano_obs_tr_res, #reference forecast for the CRPSS + lims_ano_hcst_tr = lims_ano_hcst_tr_res, + lims_ano_obs_tr = lims_ano_obs_tr_res, + cal_hcst_tr = cal_hcst_tr_res, + cal_hcst_ev = cal_hcst_ev_res, + obs_tr = obs_tr_res, + lims_cal_hcst_tr = lims_cal_hcst_tr_res, + lims_cal_obs_tr = lims_cal_obs_tr_res ) +} +info(recipe$Run$logger, + paste0("Cross-validation loop ended, returning elements:", + paste(names(res), collapse = " "))) + + + + diff --git a/full_ecvs_mask.R b/full_ecvs_mask.R new file mode 100644 index 00000000..45d7bb50 --- /dev/null +++ b/full_ecvs_mask.R @@ -0,0 +1,435 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_oper.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + +#file <- "/esarchive/scratch/nperez/NUTS3_ERA5Land-62_56E-22_81N.nc" +#file <- "/esarchive/scratch/nperez/git4/sunset/NUTS3_ERA5_-62_56E-22_81N.nc" +#mask <- nc_open(file) +#lat <- ncatt_get(mask, 'lat') +#lon <- ncatt_get(mask, 'lon') +#mask <- ncget_var(mask) + +dev.new() +source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") +bias <- Bias(exp = data$hcst$data, obs = data$obs$data, + memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) +PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "singnif") +bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, + memb_dim = 'ensemble', time_dim = 'syear') +dev.new() +PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "original") + + + +dev.new() +source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") +bias <- Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, + memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) +PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "singnif") +bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, + memb_dim = 'ensemble', time_dim = 'syear') +dev.new() +PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "original") + + + +source("/esarchive/scratch/nperez/git/Flor/NUTS3.R") +mask <- test +mask_lat <- lats +mask_lon <-lons + +# Region to subset +range(data$hcst$coords$longitude) +range(data$hcst$coords$latitude) + +mask <- SelBox(mask, lon = mask_lon, lat = mask_lat, + region = c(-20, 40, 20, 80)) +mask_lat <- mask$lat +mask_lon <- mask$lon +mask <- mask$data +all(data$hcst$coord$latitude == mask_lat) +all(data$hcst$coord$longitude == mask_lon) + +polys <- array(unique(as.vector(mask))[-1], + c(poly = length(unique(as.vector(mask))[-1]))) +## missing polys: +missing <- which(!(1:length(ids) %in% polys)) +tas_polys <- Apply(list(data$hcst$data, mask, polys), + target_dims = list(c('latitude', 'longitude'), + c('lat', 'lon'), NULL), + function(x, y, pol) { + mean(x[which(y == pol)], na.rm = TRUE)}, + ncores = 6)$output1 +data$hcst$data <- tas_polys +data$hcst$coord <- data$hcst$coord[-c(7,8)] +data$hcst$coord <- append(data$hcst$coord, list(polygons = polys)) + +polys_obs <- Apply(list(data$obs$data, mask, polys), + target_dims = list(c('latitude', 'longitude'), + c('lat', 'lon'), NULL), + function(x, y, pol) { + mean(x[which(y == pol)], na.rm = TRUE)}, + ncores = 4)$output1 +data$obs$data <- polys_obs +data$obs$coord <- data$obs$coord[-c(7,8)] +data$obs$coord <- append(data$obs$coord, list(polygons = polys)) + + +polys_fcst <- Apply(list(data$fcst$data, mask, polys), + target_dims = list(c('latitude', 'longitude'), + c('lat', 'lon'), NULL), + function(x, y, pol) { + mean(x[which(y == pol)], na.rm = TRUE)}, + ncores = 4)$output1 +data$fcst$data <- polys_fcst +data$fcst$coord <- data$fcst$coord[-c(7,8)] +data$fcst$coord <- append(data$fcst$coord, list(polygons = polys)) + + testp <- Subset(MeanDims(data$fcst$data, 'ensemble'), along = 'time', + indices = 1, drop = 'all') +data_pol <- data.frame(data = testp, NUTS_ID = ids[-missing]) +data_pol <- rbind(data_pol, data.frame(data =rep(0, length(missing)), NUTS_ID = ids[missing])) +shp <- rgdal::readOGR(shp_file) +shp <- subset(shp, LEVL_CODE == 2) + shp_data <- merge(shp, data_pol, by.x="NUTS_ID", by.y="NUTS_ID") + shp_data@data$id <- rownames(shp_data@data) + df.points <- fortify(shp_data, region = "id") + df.df <- plyr::join(df.points, shp_data@data, by = "id") + +gusa <- map_data("world") +plot_poly <- ggplot(data = df.df) + + geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + + coord_map(projection = "stereographic", + xlim = c(-15, 40), + ylim = c(35, 75)) + + geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-16, 22, 2)), + group = group), color = "grey", size = 0.01) + + geom_polygon(aes(x = long, y = lat, group = group), fill = NA, + color = "grey", size = 0.01, data = shp) + + theme_bw() + + scale_fill_manual(values = c(rev(brewer.pal(9, 'Blues')), + brewer.pal(9, 'Reds'), "black"), + drop = FALSE, name = 'tas(ºC)') + + theme(panel.background = element_rect(fill = 'azure'), + text = element_text(family = "Times")) + # Colour of ocean + xlab('Longitude') + ylab('Latitude') + + scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + + scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + + ggtitle("Forecast test") + +#reg <- SelBox(data$hcst$data, lon = as.vector(data$hcst$coords$longitude), +# lat = as.vector(data$hcst$coords$latitude), +# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), +# latdim = 'latitude', londim = 'longitude') +# +#data$hcst$data <- reg$data +#data$hcst$coords$longitude <- reg$lon +#data$hcst$coords$latitude <- reg$lat + +#reg <- SelBox(data$obs$data, lon = as.vector(data$obs$coords$longitude), +# lat = as.vector(data$obs$coords$latitude), +# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), +# latdim = 'latitude', londim = 'longitude') + +#data$obs$data <- reg$data +#data$obs$coords$longitude <- reg$lon +#data$obs$coords$latitude <- reg$lat + +# UKMO January 1993 is missing: +if (recipe$Analysis$Time$sdate == '0101') { + if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { + if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { + info(recipe$Run$logger, + "UKMO January 1993 not available") + ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end + ind <- (1:length(ind))[-which(ind == 1993)] + data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) + data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) + sdate_dim <- dim(data$hcst$data)['syear'] + } + } +} + +calibraion_comp <- TRUE +anomalies_comp <- FALSE +source("crossval.R") + +## Define FAIR option: +fair <- TRUE + +## START SKILL ASSESSMENT: +# RPS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, + ncores = recipe$Analysis$ncores) +cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_cal_obs_tr, + ncores = recipe$Analysis$ncores) +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 +# RPSS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', Fair = fair, nmemb = nmemb, + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, + sdate_dim = 'syear', memb_dim = 'ensemble') + + +shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" +shp <- sf::st_read(shp_file) # class sf +ids <- subset(shp, shp$LEVL_CODE == 3) + + +###### +library(sf) +library(ggplot2) +#library(ggplot2, lib.loc = .libPaths()[2]) +library(RColorBrewer) +library(rgeos) + +gusa <- map_data("world") + +pals <- brewer.pal(9, 'Reds')[3:9] +pals <- c("#ffffb3",pals,'#525252') + +brks <- 0:5 +brks <- seq(258, 274, 2) + +plot_poly <- list() +polys_data <- NULL + + +## Now fill each polygon with the mean value + data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), + LAU_ID = polys) + shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") + +# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame + shp_data@data$id <- rownames(shp_data@data) + df.points <- fortify(shp_data, region = "id") + df.df <- plyr::join(df.points, shp_data@data, by = "id") +ano <- df.df +ano$data <- ano$data - fmi_clim_m$data + +ggplot(data = ano) + #df.df) + + geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + + coord_map(projection = "stereographic", + xlim = c(lon_min, lon_max), + ylim = c(lat_min, lat_max)) + + # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), + geom_polygon(aes(x = long, y = lat, fill = cut(data, + breaks = seq(0, 5, 0.5)), + group = group), color = "grey",size = 0.08) + + geom_polygon(aes(x = long, y = lat, group = group), fill = NA, + color = "grey", size = 0.5, data = shp_herding) + + theme_bw() + + scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), + 'white', brewer.pal(9, 'Reds'), "black"), + drop = FALSE, name = 'tas(ºC)') + + theme(panel.background = element_rect(fill = 'azure'), + text = element_text(family = "Times")) + # Colour of ocean + xlab('Longitude') + ylab('Latitude') + + scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + + scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + + ggtitle(paste(month.name[mes], year))# + + +###### +#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, +# lat = data$hcst$coords$latitude, filled.c = F, +# fileout = "test.png") +# CRPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + Fair = fair + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + + +# Corr +source("modules/Skill/R/tmp/Corr.R") +enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = sdate_dim)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 +#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, +# lon = data$hcst$coords$longitude, +# dots = mean_bias_sign[1,1,1,1,1,,,1]) + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + +# RMSE +rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + +#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, +# drop = "selected") + +#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', +# fcst = res$ano_hcst_ev, +# obs = obs_noensdim, +# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), +# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), +# na.rm = FALSE, +# ncpus = recipe$Analysis$ncores) +if (any(is.na(rpss$sing))) { + info(recipe$Run$logger, + "RPSS NA") + + rpss$sing[is.na(rpss$sign)] <- FALSE +} +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + +skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +if (data$hcst$coords$longitude[1] != 0) { + skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +} + info(recipe$Run$logger, + paste("lons:", data$hcst$coords$longitude)) + info(recipe$Run$logger, + paste("lons:", data$obs$coords$longitude)) + + +data$hcst$coords$longitude <- -179:180 + +Visualization(recipe, data, skill_metrics, significance = TRUE) + +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + + diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R new file mode 100644 index 00000000..ba65dbd5 --- /dev/null +++ b/full_ecvs_oper.R @@ -0,0 +1,294 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_oper.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + +# UKMO January 1993 is missing: +if (recipe$Analysis$Time$sdate == '0101') { + if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { + if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { + info(recipe$Run$logger, + "UKMO January 1993 not available") + ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end + ind <- (1:length(ind))[-which(ind == 1993)] + data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) + data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) + sdate_dim <- dim(data$hcst$data)['syear'] + } + } +} + +calibraion_comp <- TRUE +anomalies_comp <- FALSE +source("crossval.R") + +## Define FAIR option: +fair <- TRUE + +## START SKILL ASSESSMENT: +# RPS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, + ncores = recipe$Analysis$ncores) +cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_cal_obs_tr, + ncores = recipe$Analysis$ncores) +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 +# RPSS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', Fair = fair, nmemb = nmemb, + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, + sdate_dim = 'syear', memb_dim = 'ensemble') + + +shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" +shp <- sf::st_read(shp_file) # class sf +ids <- subset(shp, shp$LEVL_CODE == 3) + + +###### +library(sf) +library(ggplot2) +#library(ggplot2, lib.loc = .libPaths()[2]) +library(RColorBrewer) +library(rgeos) + +gusa <- map_data("world") + +pals <- brewer.pal(9, 'Reds')[3:9] +pals <- c("#ffffb3",pals,'#525252') + +brks <- 0:5 +brks <- seq(258, 274, 2) + +plot_poly <- list() +polys_data <- NULL + + +## Now fill each polygon with the mean value + data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), + LAU_ID = polys) + shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") + +# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame + shp_data@data$id <- rownames(shp_data@data) + df.points <- fortify(shp_data, region = "id") + df.df <- plyr::join(df.points, shp_data@data, by = "id") +ano <- df.df +ano$data <- ano$data - fmi_clim_m$data + +ggplot(data = ano) + #df.df) + + geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + + coord_map(projection = "stereographic", + xlim = c(lon_min, lon_max), + ylim = c(lat_min, lat_max)) + + # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), + geom_polygon(aes(x = long, y = lat, fill = cut(data, + breaks = seq(0, 5, 0.5)), + group = group), color = "grey",size = 0.08) + + geom_polygon(aes(x = long, y = lat, group = group), fill = NA, + color = "grey", size = 0.5, data = shp_herding) + + theme_bw() + + scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), + 'white', brewer.pal(9, 'Reds'), "black"), + drop = FALSE, name = 'tas(ºC)') + + theme(panel.background = element_rect(fill = 'azure'), + text = element_text(family = "Times")) + # Colour of ocean + xlab('Longitude') + ylab('Latitude') + + scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + + scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + + ggtitle(paste(month.name[mes], year))# + + +###### +#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, +# lat = data$hcst$coords$latitude, filled.c = F, +# fileout = "test.png") +# CRPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + Fair = fair + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + + +# Corr +source("modules/Skill/R/tmp/Corr.R") +enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = sdate_dim)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 +#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, +# lon = data$hcst$coords$longitude, +# dots = mean_bias_sign[1,1,1,1,1,,,1]) + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + +# RMSE +rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + +#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, +# drop = "selected") + +#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', +# fcst = res$ano_hcst_ev, +# obs = obs_noensdim, +# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), +# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), +# na.rm = FALSE, +# ncpus = recipe$Analysis$ncores) +if (any(is.na(rpss$sing))) { + info(recipe$Run$logger, + "RPSS NA") + + rpss$sing[is.na(rpss$sign)] <- FALSE +} +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + +skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +if (data$hcst$coords$longitude[1] != 0) { + skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +} + info(recipe$Run$logger, + paste("lons:", data$hcst$coords$longitude)) + info(recipe$Run$logger, + paste("lons:", data$obs$coords$longitude)) + + +data$hcst$coords$longitude <- -179:180 + +Visualization(recipe, data, skill_metrics, significance = TRUE) + +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + + diff --git a/recipe_tas_oper.yml b/recipe_tas_oper.yml new file mode 100644 index 00000000..b7b7e0f6 --- /dev/null +++ b/recipe_tas_oper.yml @@ -0,0 +1,98 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + name: ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -20 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Calibration: + method: envos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git4/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + -- GitLab From e6ecffe7c6e9fa1fb5bc3312e3347311fca2b310 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 28 May 2024 18:26:54 +0200 Subject: [PATCH 13/78] fix pipeline and improve crossval --- crossval.R | 309 ++++++++++++++++++++++++++----------------- full_ecvs_oper.R | 3 +- recipe_tas_oper.yml | 10 +- tools/check_recipe.R | 1 - 4 files changed, 195 insertions(+), 128 deletions(-) diff --git a/crossval.R b/crossval.R index 4a725457..875b2a9c 100644 --- a/crossval.R +++ b/crossval.R @@ -1,52 +1,66 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages -## data dimensions -sdate_dim <- dim(data$hcst$data)['syear'] -nmemb <- dim(data$hcst$data)['ensemble'] -nftime <- dim(data$hcst$data)['time'] - -cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) -outdim <- length(cross) # leave-one-out should be equal to sdate_dim - -# What we need to return? - -if ('latitude' %in% names(dim(data$hcst$data))) { - nlats <- dim(data$hcst$data)['latitude'] - nlons <- dim(data$hcst$data)['longitude'] -} else if ('region' %in% names(dim(data$hcst$data))) { - nregions <- dim(data$hcst$data)['region'] -} -ano_hcst_ev_res <- NULL -ano_obs_ev_res <- NULL -ano_obs_tr_res <- NULL +full_crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$Skill$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # TODO a loop for pair of percentiles + probs <- recipe$Analysis$Workflow$Probabilities$percentiles + cal.method <- recipe$Analysis$Workflow$Calibration$method -cal_hcst_ev_res <- NULL -cal_hcst_tr_res <- NULL -obs_ev_res <- NULL -obs_tr_res <- NULL + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + nmemb <- dim(data$hcst$data)['ensemble'] + nftime <- dim(data$hcst$data)['time'] + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'latitude', 'longitude', 'unneeded', 'syear') + } else if ('region' %in% names(dim(data$hcst$data))) { + nregions <- dim(data$hcst$data)['region'] + ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', + 'region', 'ensemble', 'syear') + tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'region', 'unneeded', 'syear') + } -lims_ano_hcst_tr_res <- NULL -lims_ano_obs_tr_res <- NULL + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + outdim <- length(cross) # leave-one-out should be equal to sdate_dim -lims_cal_hcst_tr_res <- NULL -lims_cal_obs_tr_res <- NULL + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(probs, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(probs, function(X) {NULL}) + hcst_probs_ev <- lapply(probs, function(x){NULL}) + obs_probs_ev <- lapply(probs, function(x){NULL}) -for (t in 1:outdim) { - info(recipe$Run$logger, paste("crossval:", t)) + for (t in 1:outdim) { + info(recipe$Run$logger, paste("crossval:", t)) - # subset years: Subset works at BSC not at Athos - ## training indices - obs_tr <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$train.dexes) - hcst_tr <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$train.dexes) - ## evaluation indices - hcst_ev <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - obs_ev <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - if (anomalies_comp) { + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') # compute climatology: clim_obs_tr <- MeanDims(obs_tr, 'syear') clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) @@ -59,92 +73,147 @@ for (t in 1:outdim) { ncores = recipe$Analysis$ncores) ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, ncores = recipe$Analysis$ncores) - #rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", "hcst_ev") - #Category limits + # compute category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + ps <- sapply(ps, function (x) eval(parse(text = x))) + quantile(as.vector(x), ps, na.rm = TRUE)})}, + output_dims = lapply(probs, function(x) {'cat'}), + prob_lims = probs, + ncores = recipe$Analysis$ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + ps <- sapply(ps, function (x) eval(parse(text = x))) + quantile(as.vector(x), ps, na.rm = TRUE)})}, + output_dims = lapply(probs, function(x){'cat'}), + prob_lims = probs, + ncores = recipe$Analysis$ncores) + #store results ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, - along = length(dim(ano_hcst_ev)) + 1) + along = length(dim(ano_hcst_ev)) + 1) ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, - along = length(dim(ano_obs_ev)) + 1) + along = length(dim(ano_obs_ev)) + 1) ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, - along = length(dim(ano_obs_tr)) + 1) - lims_ano_hcst_tr_res <- abind(lims_ano_hcst_tr_res, lims_ano_hcst_tr, - along = length(dim(lims_ano_hcst_tr)) + 1) - lims_ano_obs_tr_res <- abind(lims_ano_obs_tr_res, lims_ano_obs_tr, - along = length(dim(lims_ano_obs_tr)) + 1) + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(probs)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } } - if (calibraion_comp) { - cal_hcst_tr <- Calibration(hcst_tr, obs_tr, exp_cor = NULL, - cal.method = "mse_min", eval.method = "in-sample", - multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'ensemble', sdate_dim = 'syear', - dat_dim = NULL, - ncores = recipe$Analysis$ncores) - cal_hcst_ev <- Calibration(hcst_tr, obs_tr, exp_cor = hcst_ev, - cal.method = "mse_min", - eval.method = "hindcast-vs-forecast", - multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'ensemble', sdate_dim = 'syear', - dat_dim = NULL, - ncores = recipe$Analysis$ncores) - #Category limits - lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 - lims_cal_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 - cal_hcst_tr <- Subset(cal_hcst_tr, indices = 1, along = 'syear', drop = 'selected') - cal_hcst_tr_res <- abind(cal_hcst_tr_res, cal_hcst_tr, - along = length(dim(cal_hcst_tr)) + 1) - names(dim(cal_hcst_tr_res)) <- c(names(dim(cal_hcst_tr)), names(sdate_dim)) - cal_hcst_ev <- Subset(cal_hcst_ev, indices = 1, along = 'syear', drop = 'selected') - cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, - along = length(dim(cal_hcst_ev)) + 1) - names(dim(cal_hcst_ev_res)) <- c(names(dim(cal_hcst_ev)), names(sdate_dim)) + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() - obs_tr_res <- abind(obs_tr_res, obs_tr, - along = length(dim(obs_tr)) + 1) - names(dim(obs_tr_res)) <- c(names(dim(obs_tr)), names(sdate_dim)) - lims_cal_hcst_tr_res <- abind(lims_cal_hcst_tr_res, lims_cal_hcst_tr, - along = length(dim(lims_cal_hcst_tr)) + 1) - names(dim(lims_cal_hcst_tr_res)) <- c(names(dim(lims_cal_hcst_tr)), names(sdate_dim)) - lims_cal_obs_tr <- Subset(lims_cal_obs_tr, indices = 1, along = 'ensemble', drop = 'selected') - lims_cal_obs_tr_res <- abind(lims_cal_obs_tr_res, lims_cal_obs_tr, - along = length(dim(lims_cal_obs_tr)) + 1) - names(dim(lims_cal_obs_tr_res)) <- c(names(dim(lims_cal_obs_tr)), names(sdate_dim)) + # add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + for(ps in 1:length(probs)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', 'extra', 'syear') + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'extra', indices = 1, drop = 'selected') } - gc() - res <- list(ano_hcst_ev = ano_hcst_ev_res, - ano_obs_ev = ano_obs_ev_res, - ano_obs_tr = ano_obs_tr_res, #reference forecast for the CRPSS - lims_ano_hcst_tr = lims_ano_hcst_tr_res, - lims_ano_obs_tr = lims_ano_obs_tr_res, - cal_hcst_tr = cal_hcst_tr_res, - cal_hcst_ev = cal_hcst_ev_res, - obs_tr = obs_tr_res, - lims_cal_hcst_tr = lims_cal_hcst_tr_res, - lims_cal_obs_tr = lims_cal_obs_tr_res ) + # Compute Probabilities + source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") + for (ps in 1:length(probs)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = recipe$Analysis$ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = recipe$Analysis$ncores) + } + # Convert to s2dv_cubes + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + if (is.null(data$fcst)) { + # Forecast anomalies: + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + } + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + result <- list() + all_names <- NULL + for (ps in 1:length(probs)) { + for (perc in 1:(length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1)) { +#ps <- 1 + if (perc == 1) { + name_elem <- paste0("below_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) + } else if (perc == length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1) { + name_elem <- paste0("above_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1]) + } else { + name_elem <- paste0("between_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1], + "_and_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) + } + result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + all_names <- c(all_names, name_elem) + } + } + names(result) <- all_names + if (!('var' %in% names(dim(result[[1]])))) { + result <- lapply(result, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = result, + data_cube = data$hcst, type = "hcst") + # TODO Forecast + probs_fcst <- NULL + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, type = "fcst") + } + } +# } + return(list(hcst = ano_hcst, obs = ano_obs_ev, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) } -info(recipe$Run$logger, - paste0("Cross-validation loop ended, returning elements:", - paste(names(res), collapse = " "))) - - - diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R index ba65dbd5..e2976eba 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_oper.R @@ -29,9 +29,8 @@ if (recipe$Analysis$Time$sdate == '0101') { } } -calibraion_comp <- TRUE -anomalies_comp <- FALSE source("crossval.R") +res <- full_crossval_anomalies(recipe = recipe, data = data) ## Define FAIR option: fair <- TRUE diff --git a/recipe_tas_oper.yml b/recipe_tas_oper.yml index b7b7e0f6..2b39add3 100644 --- a/recipe_tas_oper.yml +++ b/recipe_tas_oper.yml @@ -21,12 +21,12 @@ Analysis: hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months Region: latmin: 20 - latmax: 80 + latmax: 10 lonmin: -20 - lonmax: 40 + lonmax: 10 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: "to_reference" @@ -35,7 +35,7 @@ Analysis: Anomalies: compute: yes cross_validation: no - save: none + save: all Time_aggregation: execute: no Calibration: @@ -48,7 +48,7 @@ Analysis: cross_validation: yes Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. - save: none + save: all Indicators: index: no Visualization: diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 0b9919b8..245e4e3d 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -118,7 +118,6 @@ check_recipe <- function(recipe) { error_status <- TRUE } if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% -ls/check_recipe.R MULTIMODEL_METHODS) { error(recipe$Run$logger, paste("The specified approach for the multimodel is not valid.", -- GitLab From e5b87804ec019fcef397ab1a42396fb8644777aa Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 29 May 2024 18:56:42 +0200 Subject: [PATCH 14/78] cross val saved --- example_scripts/exec_units.R | 8 +- crossval.R => full_crossval_anomalies.R | 142 +++++++----- full_ecvs_oper.R | 273 +----------------------- recipe_tas_oper.yml | 2 +- skill_full_crossval.R | 147 +++++++++++++ 5 files changed, 249 insertions(+), 323 deletions(-) rename crossval.R => full_crossval_anomalies.R (65%) create mode 100644 skill_full_crossval.R diff --git a/example_scripts/exec_units.R b/example_scripts/exec_units.R index 819121c9..081038d2 100644 --- a/example_scripts/exec_units.R +++ b/example_scripts/exec_units.R @@ -21,16 +21,16 @@ source("tools/prepare_outputs.R") recipe <- prepare_outputs(recipe_file) # Load datasets -data <- load_datasets(recipe) +data <- Loading(recipe) # Units transformation source("modules/Units/Units.R") test <- Units(recipe, data) # Calibrate datasets -data <- calibrate_datasets(recipe, test) +data <- Calibration(recipe, test) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, data) +skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, data) +probabilities <- Probabilities(recipe, data) # Export all data to netCDF ## TODO: Fix plotting # save_data(recipe, data, skill_metrics, probabilities) diff --git a/crossval.R b/full_crossval_anomalies.R similarity index 65% rename from crossval.R rename to full_crossval_anomalies.R index 875b2a9c..649f841b 100644 --- a/crossval.R +++ b/full_crossval_anomalies.R @@ -1,5 +1,6 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") full_crossval_anomalies <- function(recipe, data) { cross.method <- recipe$Analysis$Skill$cross.method @@ -7,10 +8,15 @@ full_crossval_anomalies <- function(recipe, data) { if (is.null(cross.method)) { cross.method <- 'leave-one-out' } - # TODO a loop for pair of percentiles - probs <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) cal.method <- recipe$Analysis$Workflow$Calibration$method - + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## TODO remove if the recipe is checked: + na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] nmemb <- dim(data$hcst$data)['ensemble'] @@ -19,11 +25,13 @@ full_crossval_anomalies <- function(recipe, data) { if ('latitude' %in% names(dim(data$hcst$data))) { nlats <- dim(data$hcst$data)['latitude'] nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', 'latitude', 'longitude', 'ensemble', 'syear') tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', 'latitude', 'longitude', 'unneeded', 'syear') } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' nregions <- dim(data$hcst$data)['region'] ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', 'region', 'ensemble', 'syear') @@ -42,10 +50,12 @@ full_crossval_anomalies <- function(recipe, data) { ano_obs_ev_res <- NULL ano_obs_tr_res <- NULL # as long as probs requested in recipe: - lims_ano_hcst_tr_res <- lapply(probs, function(X) {NULL}) - lims_ano_obs_tr_res <- lapply(probs, function(X) {NULL}) - hcst_probs_ev <- lapply(probs, function(x){NULL}) - obs_probs_ev <- lapply(probs, function(x){NULL}) + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) for (t in 1:outdim) { info(recipe$Run$logger, paste("crossval:", t)) @@ -66,30 +76,28 @@ full_crossval_anomalies <- function(recipe, data) { clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) # compute anomalies: ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) # compute category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - ps <- sapply(ps, function (x) eval(parse(text = x))) - quantile(as.vector(x), ps, na.rm = TRUE)})}, - output_dims = lapply(probs, function(x) {'cat'}), - prob_lims = probs, - ncores = recipe$Analysis$ncores) + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - ps <- sapply(ps, function (x) eval(parse(text = x))) - quantile(as.vector(x), ps, na.rm = TRUE)})}, - output_dims = lapply(probs, function(x){'cat'}), - prob_lims = probs, - ncores = recipe$Analysis$ncores) + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) #store results ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, along = length(dim(ano_hcst_ev)) + 1) @@ -97,7 +105,7 @@ full_crossval_anomalies <- function(recipe, data) { along = length(dim(ano_obs_ev)) + 1) ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, along = length(dim(ano_obs_tr)) + 1) - for(ps in 1:length(probs)) { + for(ps in 1:length(categories)) { lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], @@ -108,11 +116,13 @@ full_crossval_anomalies <- function(recipe, data) { "#### Anomalies Cross-validation loop ended #####") gc() - # add dim names: + # Add dim names: names(dim(ano_hcst_ev_res)) <- ev_dim_names names(dim(ano_obs_ev_res)) <- ev_dim_names names(dim(ano_obs_tr_res)) <- tr_dim_names - for(ps in 1:length(probs)) { + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', 'time', 'latitude', 'longitude', 'syear') names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', @@ -120,37 +130,56 @@ full_crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'extra', indices = 1, drop = 'selected') } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + # Compute Probabilities - source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") - for (ps in 1:length(probs)) { + for (ps in 1:length(categories)) { hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', prob_thresholds = NULL, bin_dim_abs = 'cat', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], - ncores = recipe$Analysis$ncores) + ncores = ncores) obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, bin_dim_abs = 'cat', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_ano_obs_tr_res[[ps]], - ncores = recipe$Analysis$ncores) + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } } - # Convert to s2dv_cubes + # Convert to s2dv_cubes the resulting anomalies ano_hcst <- data$hcst ano_hcst$data <- ano_hcst_ev_res ano_obs <- data$obs ano_obs$data <- ano_obs_ev_res - if (is.null(data$fcst)) { - # Forecast anomalies: - clim_hcst <- Apply(data$hcst$data, - target_dims = c('syear', 'ensemble'), - mean, - na.rm = na.rm, - ncores = recipe$Analysis$ncores)$output1 - data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) - } + info(recipe$Run$logger, "#### Anomalies and Probabilities Done #####") if (recipe$Analysis$Workflow$Anomalies$save != 'none') { @@ -175,18 +204,19 @@ full_crossval_anomalies <- function(recipe, data) { # Save probability bins result <- list() all_names <- NULL - for (ps in 1:length(probs)) { - for (perc in 1:(length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1)) { + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { #ps <- 1 if (perc == 1) { - name_elem <- paste0("below_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) - } else if (perc == length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1) { - name_elem <- paste0("above_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1]) + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) } else { - name_elem <- paste0("between_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1], - "_and_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) + name_elem <- paste0("between_", categories[[ps]][perc-1], + "_and_", categories[[ps]][perc]) } - result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + result <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = ps, drop = 'all')), result) all_names <- c(all_names, name_elem) } } @@ -199,16 +229,28 @@ full_crossval_anomalies <- function(recipe, data) { if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = result, - data_cube = data$hcst, type = "hcst") + data_cube = data$hcst, agg = agg, + type = "hcst") # TODO Forecast probs_fcst <- NULL if (!is.null(probs_fcst)) { save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, type = "fcst") + data_cube = data$fcst, agg = agg, + type = "fcst") } } -# } - return(list(hcst = ano_hcst, obs = ano_obs_ev, fcst = data$fcst, + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, metrics = list(hcst_EM = hcst_EM), + data_cube = data$hcst, agg = agg, + module = "statistics") + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, metrics = list(fcst_EM = fcst_EM), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, obs_tr = lims_ano_obs_tr_res), diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R index e2976eba..00050965 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_oper.R @@ -14,278 +14,15 @@ data <- Units(recipe, data) data_summary(data$hcst, recipe) data_summary(data$obs, recipe) -# UKMO January 1993 is missing: -if (recipe$Analysis$Time$sdate == '0101') { - if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { - if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { - info(recipe$Run$logger, - "UKMO January 1993 not available") - ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end - ind <- (1:length(ind))[-which(ind == 1993)] - data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) - data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) - sdate_dim <- dim(data$hcst$data)['syear'] - } - } -} -source("crossval.R") +source("full_crossval_anomalies.R") res <- full_crossval_anomalies(recipe = recipe, data = data) -## Define FAIR option: -fair <- TRUE +source("skill_full_crossval.R") +skill_metrics <- skill_full_crossval(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) -## START SKILL ASSESSMENT: -# RPS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") -cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, - ncores = recipe$Analysis$ncores) -cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = res$lims_cal_obs_tr, - ncores = recipe$Analysis$ncores) -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) -source("modules/Skill/R/RPS_clim.R") -rps_clim <- Apply(list(ano_obs_probs_ev), - target_dims = c('probs', 'syear'), - RPS_clim, bin_dim_abs = 'probs', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 -# RPSS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'probs', Fair = fair, nmemb = nmemb, - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, - sdate_dim = 'syear', memb_dim = 'ensemble') - - -shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" -shp <- sf::st_read(shp_file) # class sf -ids <- subset(shp, shp$LEVL_CODE == 3) - - -###### -library(sf) -library(ggplot2) -#library(ggplot2, lib.loc = .libPaths()[2]) -library(RColorBrewer) -library(rgeos) - -gusa <- map_data("world") - -pals <- brewer.pal(9, 'Reds')[3:9] -pals <- c("#ffffb3",pals,'#525252') - -brks <- 0:5 -brks <- seq(258, 274, 2) - -plot_poly <- list() -polys_data <- NULL - - -## Now fill each polygon with the mean value - data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), - LAU_ID = polys) - shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") - -# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame - shp_data@data$id <- rownames(shp_data@data) - df.points <- fortify(shp_data, region = "id") - df.df <- plyr::join(df.points, shp_data@data, by = "id") -ano <- df.df -ano$data <- ano$data - fmi_clim_m$data - -ggplot(data = ano) + #df.df) + - geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + - coord_map(projection = "stereographic", - xlim = c(lon_min, lon_max), - ylim = c(lat_min, lat_max)) + - # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), - geom_polygon(aes(x = long, y = lat, fill = cut(data, - breaks = seq(0, 5, 0.5)), - group = group), color = "grey",size = 0.08) + - geom_polygon(aes(x = long, y = lat, group = group), fill = NA, - color = "grey", size = 0.5, data = shp_herding) + - theme_bw() + - scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), - 'white', brewer.pal(9, 'Reds'), "black"), - drop = FALSE, name = 'tas(ºC)') + - theme(panel.background = element_rect(fill = 'azure'), - text = element_text(family = "Times")) + # Colour of ocean - xlab('Longitude') + ylab('Latitude') + - scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + - scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + - ggtitle(paste(month.name[mes], year))# + - -###### -#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, -# lat = data$hcst$coords$latitude, filled.c = F, -# fileout = "test.png") -# CRPS -crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) -# Este no sé como se calcula????: -# Aquí no se puede porque estaría incluyendo información de los otros años -#source("modules/Skill/R/CRPS_clim.R") -# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí -# el resultado es igual a dentro del bucle. -crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'sample.syear', - Fair = fair - ncores = recipe$Analysis$ncores) - - -# CRPSS -ref <- res$ano_obs_tr -dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, - nftime, nlats, nlons, sdate_dim) -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - - -# Corr -source("modules/Skill/R/tmp/Corr.R") -enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) - -mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), - target_dims = list(c('syear', 'ensemble'), 'syear'), - fun = function(x,y) { - if (!(any(is.na(x)) || any(is.na(y)))) { - res <- t.test(x = y, - y = apply(x, 1, mean, na.rm = T), - alternative = "two.sided")$p.value - } else { - res <- NA - } - return(res)}, - ncores = sdate_dim)$output1 -mean_bias_sign <- mean_bias_sign <= 0.05 -#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, -# lon = data$hcst$coords$longitude, -# dots = mean_bias_sign[1,1,1,1,1,,,1]) - -# Spread error ratio -source("SprErr.R") -enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) -enssprerr_sign <- enssprerr$p.val -enssprerr_sign <- enssprerr_sign <= 0.05 -enssprerr <- enssprerr$ratio - -# RMSE -rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - -#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, -# drop = "selected") - -#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', -# fcst = res$ano_hcst_ev, -# obs = obs_noensdim, -# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), -# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), -# na.rm = FALSE, -# ncpus = recipe$Analysis$ncores) -if (any(is.na(rpss$sing))) { - info(recipe$Run$logger, - "RPSS NA") - - rpss$sing[is.na(rpss$sign)] <- FALSE -} -skill_metrics <- list(mean_bias = mean_bias, - mean_bias_significance = mean_bias_sign, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - -skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) -# Compute save metrics -source("modules/Saving/Saving.R") -#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - -recipe$Run$output_dir <- original - -source("modules/Visualization/Visualization.R") -if (data$hcst$coords$longitude[1] != 0) { - skill_metrics <- lapply(skill_metrics, function(x) { - Subset(x, along = 'longitude', indices = c(182:360, 1:181)) - }) -} - info(recipe$Run$logger, - paste("lons:", data$hcst$coords$longitude)) - info(recipe$Run$logger, - paste("lons:", data$obs$coords$longitude)) - - -data$hcst$coords$longitude <- -179:180 - -Visualization(recipe, data, skill_metrics, significance = TRUE) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) source("tools/add_logo.R") add_logo(recipe, "rsz_rsz_bsc_logo.png") diff --git a/recipe_tas_oper.yml b/recipe_tas_oper.yml index 2b39add3..34f82e63 100644 --- a/recipe_tas_oper.yml +++ b/recipe_tas_oper.yml @@ -39,7 +39,7 @@ Analysis: Time_aggregation: execute: no Calibration: - method: envos # Mandatory, str: Calibration method. See docu. + method: raw # Mandatory, str: Calibration method. See docu. cross_validation: yes save: none Skill: diff --git a/skill_full_crossval.R b/skill_full_crossval.R new file mode 100644 index 00000000..27ab47aa --- /dev/null +++ b/skill_full_crossval.R @@ -0,0 +1,147 @@ + +source("modules/Saving/Saving.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/RPS_clim.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/Corr.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-sigBias/R/Bias.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/SprErr.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/Eno.R") + +## data_crossval is the result from function full_crossval_anomalies or similar. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices +## the recipe could be used to read the Percentiles +## if fair is TRUE, the nmemb used to compute the probabilities is needed + ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference +skill_full_crossval <- function(recipe, data_crossval, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + ## START SKILL ASSESSMENT: + # RPS + rps <- RPS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) + rps_clim <- Apply(list(res$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 + # RPSS + rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', Fair = fair, nmemb = nmemb, + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + # CRPS + crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) + crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) + + # CRPSS + crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + # Corr + enscorr <- Corr(res$hcst$data, res$obs$data, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + + # Mean Bias + mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = 0.05, + ncores = recipe$Analysis$ncores) + + # Spread error ratio + enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) + enssprerr_sign <- enssprerr$p.val + enssprerr_sign <- enssprerr_sign <= 0.05 + enssprerr <- enssprerr$ratio + + # RMSE + rms <- RMS(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + + # RMSS + rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, + ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, sign = TRUE, + ncores = recipe$Analysis$ncores) + + skill_metrics <- list(mean_bias = mean_bias$bias, + mean_bias_significance = mean_bias$sig, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms, rmsss = rmss$rmsss, rmsss_significance = rmss$sign) + original <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + + skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Save metrics + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + + recipe$Run$output_dir <- original + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(test, function(x){InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} + -- GitLab From e5d7e658f436f9982a6c46b21cc536c5d0e7ace2 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 30 May 2024 11:02:34 +0200 Subject: [PATCH 15/78] cleaning code --- full_crossval_anomalies.R | 72 ++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/full_crossval_anomalies.R b/full_crossval_anomalies.R index 649f841b..d2eced11 100644 --- a/full_crossval_anomalies.R +++ b/full_crossval_anomalies.R @@ -1,9 +1,9 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") - + full_crossval_anomalies <- function(recipe, data) { - cross.method <- recipe$Analysis$Skill$cross.method + cross.method <- recipe$Analysis$cross.method # TODO move check if (is.null(cross.method)) { cross.method <- 'leave-one-out' @@ -12,38 +12,34 @@ full_crossval_anomalies <- function(recipe, data) { categories <- lapply(categories, function (x) { sapply(x, function(y) { round(eval(parse(text = y)),2)})}) - cal.method <- recipe$Analysis$Workflow$Calibration$method ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs ## TODO remove if the recipe is checked: na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] - nmemb <- dim(data$hcst$data)['ensemble'] - nftime <- dim(data$hcst$data)['time'] + orig_dims <- dim(data$hcst$data) # spatial dims if ('latitude' %in% names(dim(data$hcst$data))) { nlats <- dim(data$hcst$data)['latitude'] nlons <- dim(data$hcst$data)['longitude'] agg = 'global' - ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', - 'latitude', 'longitude', 'ensemble', 'syear') - tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', - 'latitude', 'longitude', 'unneeded', 'syear') } else if ('region' %in% names(dim(data$hcst$data))) { agg = 'region' nregions <- dim(data$hcst$data)['region'] - ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', - 'region', 'ensemble', 'syear') - tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', - 'region', 'unneeded', 'syear') } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(names(orig_dims) %in% 'syear')], + sdate_dim) + tr_dim_names <-c(orig_dims[-which(names(orig_dims) %in% 'syear')], + unneeded = 1, sdate_dim) # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, amt.points = sdate_dim, amt.points_cor = NULL) # k = ? - outdim <- length(cross) # leave-one-out should be equal to sdate_dim ## output objects ano_hcst_ev_res <- NULL @@ -57,7 +53,7 @@ full_crossval_anomalies <- function(recipe, data) { hcst_probs_ev <- lapply(categories, function(x){NULL}) obs_probs_ev <- lapply(categories, function(x){NULL}) - for (t in 1:outdim) { + for (t in 1:length(cross)) { info(recipe$Run$logger, paste("crossval:", t)) # subset years: Subset works at BSC not at Athos @@ -117,18 +113,19 @@ full_crossval_anomalies <- function(recipe, data) { gc() # Add dim names: - names(dim(ano_hcst_ev_res)) <- ev_dim_names - names(dim(ano_obs_ev_res)) <- ev_dim_names - names(dim(ano_obs_tr_res)) <- tr_dim_names + names(dim(ano_hcst_ev_res)) <- names(ev_dim_names) + names(dim(ano_obs_ev_res)) <- names(ev_dim_names) + names(dim(ano_obs_tr_res)) <- names(tr_dim_names) # To make crps_clim to work the reference forecast need to have same dims as obs: - ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') for(ps in 1:length(categories)) { - names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', - 'time', 'latitude', 'longitude', 'syear') - names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', - 'time', 'latitude', 'longitude', 'extra', 'syear') + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + names(orig_dims[-which(names(orig_dims) %in% 'ensemble')])) + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + names(tr_dim_names[-which(names(tr_dim_names) %in% 'ensemble')])) lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], - along = 'extra', indices = 1, drop = 'selected') + along = 'unneeded', indices = 1, drop = 'selected') } # Forecast anomalies: @@ -206,14 +203,13 @@ full_crossval_anomalies <- function(recipe, data) { all_names <- NULL for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { -#ps <- 1 if (perc == 1) { name_elem <- paste0("below_", categories[[ps]][perc]) } else if (perc == length(categories[[ps]]) + 1) { name_elem <- paste0("above_", categories[[ps]][perc-1]) } else { - name_elem <- paste0("between_", categories[[ps]][perc-1], - "_and_", categories[[ps]][perc]) + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) } result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) @@ -259,3 +255,25 @@ full_crossval_anomalies <- function(recipe, data) { ref_obs_tr = ano_obs_tr_res)) } + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + -- GitLab From bc457bea43820766105b1d78f320b311bfed2854 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 30 May 2024 15:11:53 +0200 Subject: [PATCH 16/78] fixes and logo --- full_crossval_anomalies.R | 24 ++++++++++++------------ full_ecvs_oper.R | 1 + rsz_rsz_bsc_logo.png | Bin 0 -> 48040 bytes skill_full_crossval.R | 3 ++- tools/add_logo.R | 11 ++++++----- 5 files changed, 21 insertions(+), 18 deletions(-) create mode 100644 rsz_rsz_bsc_logo.png diff --git a/full_crossval_anomalies.R b/full_crossval_anomalies.R index d2eced11..f65d86a7 100644 --- a/full_crossval_anomalies.R +++ b/full_crossval_anomalies.R @@ -18,7 +18,7 @@ full_crossval_anomalies <- function(recipe, data) { na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] - orig_dims <- dim(data$hcst$data) + orig_dims <- names(dim(data$hcst$data)) # spatial dims if ('latitude' %in% names(dim(data$hcst$data))) { nlats <- dim(data$hcst$data)['latitude'] @@ -31,11 +31,12 @@ full_crossval_anomalies <- function(recipe, data) { # output_dims from loop base on original dimensions ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', ## 'latitude', 'longitude', 'unneeded', 'syear' - ev_dim_names <- c(orig_dims[-which(names(orig_dims) %in% 'syear')], - sdate_dim) - tr_dim_names <-c(orig_dims[-which(names(orig_dims) %in% 'syear')], - unneeded = 1, sdate_dim) - + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, amt.points = sdate_dim, @@ -111,19 +112,18 @@ full_crossval_anomalies <- function(recipe, data) { info(recipe$Run$logger, "#### Anomalies Cross-validation loop ended #####") gc() - # Add dim names: - names(dim(ano_hcst_ev_res)) <- names(ev_dim_names) - names(dim(ano_obs_ev_res)) <- names(ev_dim_names) - names(dim(ano_obs_tr_res)) <- names(tr_dim_names) + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names # To make crps_clim to work the reference forecast need to have same dims as obs: ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') for(ps in 1:length(categories)) { names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', - names(orig_dims[-which(names(orig_dims) %in% 'ensemble')])) + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', - names(tr_dim_names[-which(names(tr_dim_names) %in% 'ensemble')])) + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R index 00050965..8a08fef0 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_oper.R @@ -24,6 +24,7 @@ skill_metrics <- skill_full_crossval(recipe = recipe, data_crossval = res, Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) +## Check logo size is appropiated for your maps: source("tools/add_logo.R") add_logo(recipe, "rsz_rsz_bsc_logo.png") diff --git a/rsz_rsz_bsc_logo.png b/rsz_rsz_bsc_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..59406d7a5f242aa87eb276bea39d6544a802b986 GIT binary patch literal 48040 zcmV)lK%c*fP)004R>004l5008;`004mK004C`008P>0026e000+ooVrmw00006 zVoOIv0RI600RN!9r;`8x00(qQO+^Rj0U8N22T|E(7ytl(07*naRCwC#eRrH)McM!N znK|Y5yS*ma^xjA#2@rbkN>PxesGx{Qlir(!CWs<}bfotd2rUVagd~uVo^0y&c6V<* zW#;+)ad$Tf1jYANdG)v7k00EeJ9EyQnRCA9nWqxILHy`>59P-gSOXw>$<3tgV!ZtE zR!kVu4|dK$d{8wuHs{djxlq|G#`PNp*|r89X&q(S&xLzZpeu)t3-=g^YcVN+y zPNWpW6app1F|_#}Ov8Ykbm&v|`OjEKZO)TT2B9^c#Es7Nd zv}qcgNGAEHfot&BhYrD{BU&(UkC8AOAKp7xouZDl4}pg zE01ST8f{~+E$&}gZ6uvCBmgPZMQ8$xF*4=5AFfczgO_?Lm&F^-;aMft9F}R43DvMK7ZI^IXbZBH9ZI3J>Ed@{7yS%7&edm~LMvo%KNJn7+X|7

g2m;UskP*ZknyAh)unB@zTNYLi98$P))niMV9L+TAhC{KUb0!xQ6p-7r0teOK zh9BMX1gc|#7kxY*M;|!ro1XJN^q~*`Mfj(Y=$=1&03}7Gc;S{yFyW*hk?Xiv`PhX_ zL?}G)T9Me472i!5>aphQc{x51haNe1bZ5dp$n$w$v7#F+M5?>GgY0s7}9n*OmIp2VTUFj-1A?y!`<=2=NIAPW`6m zy$^lp!+#0>DI|K}MGs?-WB0-O6}7bColo%T`Zi=YHF5RsLq*fZF5&BdG7B^huCdGO z>fPg92PgU-PuCjePJ~HN$kU_-Ai!(@1Q3IWASAMacq<99**4JRX=)>Zq(S7nzS0bY z3<(MZslB*Bu0CzQ!Sfo@UVEY~OOO5LRqTH10fA`b;z&nm<>~YtTZ4mp4Wkqq^SXZA_IoIQi4=#%tXv%1o6v8C63UAN( z6s7wejIm<}p&z?=?wvNIKD!L({$W0+go!}~Ca(ICJ#mFO-!Ub1uHqY&f$z#rt0x#GIQ~0BB)Lj1lhOkVFI$84qCAaHYoP zMG7z(feJ$ILEd@9Pf}h_P)4YDnNSoKks^u0LRnnVpOTb_d>2Bhr?N=rZh5!Mki7mD*{!&t6JhEj|c7yg!!snCNEPPo|O}UJf zTCB;SWJ?a-M%za=%SDr86f6&8Kt&}F-V4_Hx7U`fZ*a@J9LH_#hczQ8h)6+nB0y~; z`rCB}-f~p)nBoEInLocmPyg^cEW^Jbl~xQW8g^I-9XPX$Uwm^m7sRXR#w#x7W2O)K zrq`$sedxpgVf@1+I_U>@0RWWCw572F9SN693Id|NH7f)eesQ@uA<^#LQS0842&|R5dgy?Fo6I9fdS2cBMBJ@G)TgF)0As8^onrU zSZ$eD-_q2cbU(S>FBo?roqPVF;LWe+rR4Zfy8gT$^FKOIedt3U{{O%~M505Ey%_*d z6bsSvHEnQGJ`WjQEZ4ue6-aMWvAxHhlTPVhDW5|iQUC;yXR#fwq`)kNLD^7H+&y%7 zYtFk()3X_UsORy4O7Uo| z5oBh7zjPaZ_Z20Wd!*j{|0w@&AqbLmhatsQ64Y#4w7jH9&fkAxeD1RgRyEfYRno$T zufz@a`~_G3^!t4z+J`>$VV5wzZo|RH-UJ4tuqZ-{KG+6iG*^r*GLHIrssBL^#guTtobM< zl0G!N6o0UEMNM;a?j+aI-%&osYRw`~nhbz62;5E642A)KKx_~Zgg{ORT4NYuwIE(i z2G$kD%zC!Wx=?YfgIl+_MlOu@G#QTP+3YLwJe6sFbqJdEv?pXz71A-uvS@-}Kt`p$~odKY*_#iH<+@4gi1@nwBnT1hP3U99U??NDj+20!1*{RNT96muQ#JUN&TRqTin9A!^fxN^Ek_Z5!_w76lge z8x&~x;J4@Hh{oV&i*CYQQ;*rDR4Gb+N|r5ID<-QjCQ^c_pxj1bJfOA8@P#j4!-}Es1(1)*uze}PA zo&OgAKvP{at=XJLt|QCQ(x72SEk7v4u*MeWRnOsJAou_q-K3fCoZrSPFr;CkH5!wr zbu{{~L6Q+KHaPL_m6{5l=fX)VWv+|iv!}|dts(8L-ldMiCo7)=-9zlQ=;z52Jvvn| zkpQG2wpb>fkA%gu!$$@e&%5jW3?hT*v*fXuK>PQSCNpqfqarF7Tuu;weDFCtUb{uO zrU7^SApfPK4|f6ZwGC_OnyL|8GV}K&T#wo7pf)!n0i27gNGELt1bcUf(D;>DI%4lpp}E3uaz(is7wkm)rhwi`Pg&MerRpW z!Z0KQz>yTUt**PH9Vr)7n+a z4_^ykOA;M$24H2kit=JKK#C7K;MZf3wwcW`Lsiu8Hel>Vji5ks%k$pTE2k5cM(hhi3F z5KI#v27~h6QKOF1QuN#?uF|t-683h3r*$m}sI|Vx$Fo0;Kt&>X|8G{OGWJw3F zo_qf{J={L@;qT&alIZADZo{d6IS;?v|7uFKX1QQskzw1)_goBXZt`Zk4hBL%0g%0& z80dDOS71mxK#K+p2pnSsjQWli&m1TVCDN{86hb4m_YRVAD+51Inwt? z7=l?dl}{T)jUQd7fe?-R!S7%uw{e>zDC-+2v^)$fi827BU4?lMUDSJuy?^{WELu?y zLrM%AP)Lo5EKKRLr-35Vhh{>jA0RLgGqTJ86b!6={(9{3gFCQpTRj9ZDvOJ$zQaXf z*uao}6$}8OQilvFSZbg4nm7&%)@)!O0eW`r6!zAgG=Mcq*E|S0RY-Pi!#26Ld!${ z;{5$PK4b2K-@KIGhyP6cZAr9b7N+iZJUPDNj!ar2>?o$?r#{R8 zbrWci77QI7JF@@O(w2`NSz~m5_@R%>KDt4pXiwE(jc;BCQ( z`BKi&>tj|i=!~oj4=oQ7$3d- z5<=xe(6D(EszMo*j~D~rC#+dGAF)9bKp=!-L6r9^;*Q3)__q4&g`UT8DPdHU1nz1} zJP|_%l;OC8Mq}f~M%?@}hnFnbg6X?ohKeaC zlN1I9PMwL3AG{1FP>psu6#Gw!VcYUo(VDJ87G0RU_e_MW0=TVf@$R4hisEszP%&{5 zhE>O4heK?b1|GZQ=zss4*oXgE?2<$eIOk8;{MvhPh76?X7mVc3pY9MV?|+<+`}G-t zSDtHlJ(uOZw{PKhl5W%M!3yj|znBPr5$=~GDfp^&(HU`^m!Rw}V+LzlsZB{S_w58<$`tO!g@bf}A5 zS0Aes2fMBJ-46cUZdT}Vh2k&Vd3PXFe(c+S=iV_ewuht&AW!hpKx;4{{!&_O|1Onr zwsp4p(hdrA&A(Oy;87>vM(wQ~3;@luZ}?v=(ycFij0=vro{A2fA$+Bj0qm7Xs1LN_ z{Frx6*uo%?X^NJjBIAH;j+X^(TLi)yjrCdKv^#+3GlDh+3T+G;98t}U^&BcJL#`uB zohc=KA21A{WnueB8Q^>VVz~t1rbBEG-rk1;R0Y-MK<~ki0bx^4$Z!x&ebl^ zw2aFS-&?dcWQA(aGQuXs%R{{XFw)Q6`vhY9epdjjY)Mnv+ueoAXOafIfNSFlbnbs$JP8{njiS#X+j9 ziR#WoM(5HFnHIRLyhzk-OA5E$0eqif=kI^W;HcVITgSE?pyQ9tjvYXb@=Bg7hMtqDFTSF3U?1zVj?@6ch*YDcj4^6vfBy8Nt%wY&;7nc|Gzm6xee-h=>aVap{|$ZfQ&k zR#1gS)@g}^{~&9?wCVHjojhRozx@35T=9$%Xxo_82pfbAzvj||uOomtlOj1{WL)nx zXe6F_^J93aB4(YVmd`HgU99p$eobJ_0LaU^Xm4XKDsn|fGQh}3V31yN1sBY^i8M2V zLD4W#-J9=gU_G63!!3B^isS!J=#TsGpMhPH=w7GZg7xhVH7sf7K>uPRmt()6$UMC> z;s1p-D-h+k@Lwb%2Bd*ZtiU+fXEo1;Fk9dG+{56(2$Y%z5&0#h)`Z4J?`5yYAcY;s zcsp+?5M&@guM8L%{DF{Y-nKqthT{e|&2G}bVjbJ((9<$${cGRVWxf8QfZU!cZx>{_ ztH1x=@4bJ2X}a>+rkDi?WLaW)tjIX0b$Q1J2$NAVut0Zox;hp%X~ghi*a4FP;Qc#( z^bajie7}n@dCWj6l*G@@sh7wo6)d;^m`$r61At+Q%>ydTbA?i!ty$Wwv(x_#=R<2{ zC`7mBTzR4NgQ8}*SKiv>9;h^ivk;b0eo7c}bwz*kEhpu4v}Xm1Eu*MC!-x4klOecc zV3nM^ajSQnX4^5R0VW_>+5>yoN+ z_x>Z&Dfa+|7y#DEfGs{a|Cg2XF8%f_GId8EsVWjkZR{aK-7Ivld!3+^FOtg+38vS7T=k4Fs z5ThjYhT>xT$+nhUb9qHn);D#i@`^}VTeEwRVvJ;xme5|Bgj`cSAn;aeJGI$vmq>43 zubDynoqRMud(C&h>36da{~`DiiQdbMyHB{DYW5h%OW$21wPYoIHM+6ho$Grj%1fI& z>MVPiPZNs0ANxi zAj4sC*GsQY{KbCJr?_PAVM0Npa$wlcq*ZZqoqKmSr@zA*+)bc6lls>t#(%Kw7m+7d z29pLLg#kYt5;t~zaO8Cqc?N?HD5j08Ta~Z{?KNvK9N%RC`1p@M`G*$im}?%RkS08S z-CqH`!`8G57P^j}3IKtSd@K9m4-Ny?aoHgs9n#j~Jg+@Ar9~Vsln1)5o)fT*8Ht4V zjPG&P_OXLW5SbP}uc{6l*3_8W8V;G??&$EIQ9dIWHfp8BnpBrMR0yEF!rI%(aYa|s z`?Ie&+MS7uNe~$ZHpPpK;{y@nlSB3n8h?Gd^XhCyU#JuUJ#!}n5D16|4?MW?tQW6- z$^)|6+Wo+}ZcgtB0wNK4mB-@xScIl`c6j%B9*^aoY+po>5(tLGb*Z^;TpiluPy}u9 zt&W8EtX7Ob*k~f4A)8hcdp^toTYNNmV(j28_1R?3;Yo>h?>Wy$O|R~w`~EG9R`>50 zIJl`Ly~Va{(YjzQUvm3Ne9M`KepBycAO6Fz6EmFptC#T3oAasND#gGEW93JU0(Fa3 zVE0*Xh-m`CBBC;01y%xQ*sxBgs%|OJ_2?~B%(3#!>)w;%q_o}XkB^9 z^7+}UopjO$ZL{|bOy2$EnHL?FY!`e=c1 zV@IohA8QyOz_jo=YYO|G9s~yLpjhNA*fK4=N6poljJ}2yAT6x4ZSkz@@FBjZr&69Q zFPDbAsiMrP-mo$AvFGT1BnidE=D}meL_YrT!;aOSgGvw}4QvyVx;&RhLxZIODkw7c zPA1g{ra?EQQ|eL>xd1F7pqas}K>#Zu$&5zjo}G&_^TQJZA7!(8IuXIJ=wUNxzMV^Y zSNR?f1p%>`ac);f?k+NfQCt%6O;e!ZwadQgce4-wA?R^l$CGy~Ye?Fr;Zx?XY2?i}$>G7)FdK)J3H+E-WgLg+%~2|y_Jc9=u*Q1lUDx4J zpl+QkgL;W)%oRgWq=&I`K;LSo3SOc!d$0R;(M15m+fKuEK1W18&X{r6`qE8a%dLCDTz=nyo zsw(S{Z=KX{hGEef5J+jj&G9U^$$|3GljzklOPe})v~^*q*w6bM*fI~uwVeb`Q-;ed&YE_;OD zyW@S>K^9{s#QkxTi!0J8bp>nAr$Yj5Clu;7v@jq95RJ-tu=3qD)3O5EXOHUM z*uXRQFSNIA$v);fJi0q6Nw*}sBMFfrViApsYrEEFe*+=RX~zsvTkEyRW?VlUGLB2M z`Oo?u7jE|5)Kwtz&WPj_3i`ajG%9SjEF*C>$7Nwr2L| z?9v0YMimH3DMUA{PrWWDPvULcGS6t8R{$_m6uHkqB(`tY1vajH{Gv9`#bMprAu`g`{eznkeuuVn zc>UJbxu>kyr?0XJ_dG^->!^M3Lvrr^nCj}EzhcUEr*OO^;?@- zXvN|ViJD6N_NW2~G8mG7>iQ&k4ahZy?_24wSkfqysjMF>Fpuf# zRF5eifgN0y{_i3$SQ{WH-_{zfc-dB-_riSawZ|kn^Y+*N_NkZ= zHyxaF%y5KFi;)GKoSM|lCsG<>^{hk7pW5{LpXb%pW@lR#&F{`uN?JV~sx0>&Hpsb`Xs8p?_T>r3{#c9d3J0T84FM`PymR;7V$n^N%7ZZQ~9 z01!kO(yBgY*%&fBR9=JB9{nwM#_;j60aA;?ZuwYBLu`T}X0w#DwGht;OFSlQ`HV2} zEGl=07+m;To9-#pFYqp z8&EB)#*E;P7OlX2=N*bEr{4KDE=joY(f4q~#rIO@NAGCfbQ5XC6ME)AX}iDXR#~VxpObi82TxQ-5>Ih(PT&_rbajyj!2p?`-bSXOMx$ zo@p{oTB8=MIO<#Hj6STU#`<+>vAnmmMBWn#>f6y!53PAZ&r%weSc(B7At}Gb1_O@c z&*++SC2iiA{i*VK2mls>G9~54+sTwSN$Y%?Lm{wr=)mYkXKMo5=iPd8Hy8%Z8~vRj zRgS|avLY4+yuuRmKw;2y)506vog~MQv}xmu_ikDD^jfjUgkl~)c>tci`jl_x8SBG; zBnSY?4mlgcDhAX1#0a2vp+vjuM`}X1rqb&CeE4R!F=@x`E|CIQhQjjqne~}-P-+^Z zPbpP;iAzI<8oI7Qm91Q!obS3A+T%a^1*d!lGZ`k;SN5~_)=I4hYzg1xaMaLBqh{;o z+^YKhzN2kv4BEUYvqIm zys$8C&aSV|EmJ-;Nst{7e*!q?bP&%R*t})=XBP4`u{{;?3OZu0zHhSy8xZRpuk#)g$tTD*M<-(EV&{9oH%!8Ui8^ zq`wG4doIU6)e05>5e(5Evt9b^P{cB>{@GqWnFcLfvxT3z>ZE`5yVr;R639a@e-x9Z zjlxTRSx5u>DJm&5RMoI(zotg#4_Z+O#02VQhMjeS!Z0XVR%QLbsxaH)aU01%e{7js zE7vW~=%mNL&gFEEMPKA!_nnM6ECf-k!1zH+lQT~%!#wz)koxcs8|aY*2h02J-te^R z=t(3ENGJ_JC%uXxy!MJe0M<=$#cx<+%; z?fmUCU&qYDPQ})x9|6dG>GB=%aai`!W#kv{Dbq=%m8Sp3^GlzE)p|NPiOd3<@3y3#y?v#U>MY1Tx8u+QfVa)KD2!4+O^3- zrLj9RWIhoMOKFf3ipXD2oo-&U>G}2As4lolDIP!s1Z;Xz6x;78PmRxJ)y0}YhJ-5( z{&mxwwl%?;@FZXJ$b8xXoI^-k)zeY0rxm7#Ac{`nP{0GD!iDp^*4PW^L*xsIU!DU80JS77DCL z^YlcUKVPYQ{iIz)3MqhPFeo0&E=+t6!{hRRlSk>f&n=UVEBsiIc|}*3zP`uleusWv zzJsqizN_E=UfX}E9(!9hs@fF za_hBpM{a(;|~x7RS+^a z#EJs*e*VOTiR<>e9N}oNGMn@!ux4diGCh9Jehc4yY@^C%GIZ>X!?Ez`_6dnjcShFN zk%FLX3`fk5w!ZZ0Is~T3pWHl&UV44=?w+GUBp~8(dwFLfQ(sydDs63bCTWH?Ov;rM zmBM7^z>?2f5ARHSBVu7|bHD2FEA0)P)yl&F5I7Px*LNhG4MGsLY_8+MWBQ}^ zwQK(guT~%ae*x|Li~G>DDMd}04BFnj9B1GCka_>ZwI92V-Ys8CV;2Ucd}NYWQEu;^ zb9kv~ko3crZhfy#pk1lJ^r9DYS$$LwS@y=P?~rHIFws&|6PRHa2R9_@l0s>NT;Ep` znA*|q&+FEM?r=^1dfV-9&QJ93wbfu2rX|+S+^cXJo1%05@}$^%>S(=oV*@RI_$OZ? z(&<0^4WdOsn6^Ppn>w)g;Y*pBL1kwEEf4OzkjCjGh?spvMSBjBL#NfKEuU{f$La*x zl~JA9kRZ1uldr1UWBvjNWx?K29{IiT6poq<5M|RTgd-t-=i^p#J?3zTM5d`3R$;M* z*3_`Lr8msCaCls7`{)`Srtfj#^r5n2ZD&3-))0iC-@W1xh1D@?pMSk3Iu+k}=m@mW zZ9pV!G1-!yeZP)}4Tw_Rx(-oR6H{Q&rk5_^!UJz0t--0JCu{mgbnUYX7Uj!TOOweFS!k+2OcXNS1ZfJ0qyPHTRSJW0D4`D z2pS9#2#S|;A1^%$6{BR~*szvD2+vbW8e-o>oA=K4{@9s@85+bQ5;rbQZOhyu3vBbK zBP#souU$dF`#LcF+p}|7ePHi4zIUWwNEGt-NUrDjKT{|A_m2IowmW3E3P;2(>8+Vd zDNY72-{cQGVmO^NzL0Oa>HIG(&{5a@1@F9GOC-Rdut~{ISM18RriFm8B*I}!H@|iX zXvld8_b=pV)S}LISAYa*rMRfrVmIv~o$^UaLMSY`<p z2rvi2mhRY^;NprP%z#13ge$ZH43p4rNL-ihU(74tXhK;mNSju+3$P?91zguIz>mgal#{p_P@%}lw4-(?#81hDGHfAMSAhyNzf%(MSUAAYtRNES*Y96#7UbXPX5 z&*}B)>_v}rJeUM18k65upugItFxu-~1t`8rKI8j}7F{;;1T!!H17g{fhA1ov-q! zZ|^}~Rw1FYXx-2O5e}d<7DRb001@=z1>y*+EHWtttr)?uiJC!0cx%pj9=XR5qP7fD z85bKiY=z^Sh(&G0>>&Jb0MmA>g6!(Rrn(dyPeBkcZd5fIp0BBa|#t>sxiD@0a8v1$t@XJmOw0GAm_TsW|>Vh?-8-=xvTzt z&qg2q<3X{be@biP(ek;FoonxRoOOuIhyH70|pHyB)qD7PI zYI0mp2Td`)vC&gMUc$hCe`>$|8h3&c<)i;;)~aP=sJ2z ze(T$#PyIXiXUD_;tFU9TYCwiVbboqr=b0#z#wGV0te(94J$NCPw>|&cU3QP`eF>D( z$gbRq(!(c{{&XvJeJf7<`2oD!lyb6ygjLI0_>sHbBUMxc9$JdF$FF0JG*)bG0k)^H z!kC)U|Isb1*xc0n-0W~y7*kW;+h*m~RscH$y>HJ5gVFu}{&kmaYTo5F#t$s}pN)A- z%Z}&kmEp$>_@B+e*TwjgZ@{t*j7h_YavmX_)6g0STEOH%L6i&{jpGlRfl(zEO5&m3 zanZl?ne4+q1q6JTDh?havc6Wf#2&4!{#@OY-IKqtZ|vTVhQi|h%(9L%Q5iHUW|gVN zPueAtt{<)r-wj(<_D(VgvTz`*d7WS6sw$dJE&=p6Of?_Z*k zRsHzxpX{^KDa&@xCw8tHR*6kpI!OdfE-0{R%Z4t=u4J{Uju8w(3@RqWR+u=oRCjeb z?4%W$oaTb!Vl2G>=l|wM>;GR|_UG3D0L#{HqYc|yxVRud$+U)Mf*~}lR0^Lrfqh?* z+oc36Kv)8iLego=n0R zUT1R$y)5pKY=6Z)*-(b=Zb&FfcCgBI9((v!o zNOtJ{)5~<($P%^ky)D$znx%{{xWft4!j~TWrk}I_uW-|uhvJ1-7gB7RW)mKL_@^!S z?4fm7{_s5Zsv1~Ma`3}LN7Kmi2zbTK3af8cdfAPt>;0QCBc7wSmw(C3?MNn5-}J5Y z;cI{foqjiMd}5V=lT-Hee$P2see9R@q1|AHVN(0hp}}#9tg08j5XvXYIli(?%xY`% z-qUKQ(jGhY6_QX|X3fgv^xROulv-&h&B`<|y`{ySs}&o&w0L*-|37*r^*KFaF&pZr|U4HaVUx(<>vryc>i2SU_zHN&Ehm6ooA8!sP zI@Nb_Idw2JRWg``z_vipc&T57e&+M#Ayr-Dq3-=neAbmm(jP89l2^0?>71r@o3>)+ z$O;T9vYCi*>7QO9!;~03q7toL4(_}43T$j0iR7xa2=*Juf;y>k++LWlTOr67O##e{t>!iu(7b93_PJmbURihwhn8GKqo$0I))Fg!0;hu5K*z7VyRG{oi? z{>1$cI2v6Y3Z0AZVCyZ0`GZqP#7yilqXg;XSbV;58IoJu;7@sw@e)uv{XD9iFcuq< z9T+;Wid))}fM(R+*O#ULUkQrtb1fBDN-m8B!%G);e(rl1*qbwSdo%Pz4F!Vo?cB%L z90;5zN)IXFmTg&ytnwq3!Ru2g|B9ZB^gBlZw>8?BlcHBCDQGJWANxZ%P`SLmb-gbVczWBm<|Axh$0Dv?R zM7+>AE}ik84F@fwOCNimZu#CmJDv2~mp)96>+`a8O~PkiOCc(oo1EvJ9Ck-{ zcJ}V;FbSq$NIcRt@5s{)x`rcrPL{cxf)oPLnADEP8tjpig=8jJra*P6RLz|}vRRf& z0k%b4SRUlMEm^@HBM>!sw+Y2MHn@UU&0kNq-v3=*@WQ)v-=kYaXNN=ZiOFOtEDh-P z*=sZg7g70y{>Jz*f# z?g-XEFlKUgeyu9nf0*cK%L!Hjfr#L-6HE1hXV&n|lOIQL#!%^dOhkmr%8+ho%8;8< zWCSHDszNH6@!3l|u!9DrlfGmIEKBgw4cvIfHMD6cg z!cV-r7ANgD;@`n5|6hUn9sVof`@Rl^j4_)xrlYU3V@76pJX1tjF z&Udo_Ze@G-tKV~SdhahK3CSB&@_oa`uTss&+SGY2B3FnMBV+qmPrfSEhs&Gzn(`yPrw4R42I;R z+0Q#pL3zOV$t4rig85BYyrG`)!Ebt-f9Lwg>D_hf(ey!1U`s{?cQ1GPc!{P+%OFb@a2ONoFT~qHzxcRYiQx`A5>4EuCoV=;We!gxyl`<+?a^Bgi`K7d2_yhM)QeXOqC(8BDyBH85;=aa`!X6}aodki=f zpRu19IkH@z`GawozhE0ijf(Rx4?6>L(vJxMspgEvwiH1Gx$59b$x1SH`)t;IxT?F{ z2&i;?Khg5xYBl`)eQ49`>xH{&qXq?hiUj1{5z{j+)c8PDOU{YJ3oDB5SQ5j=Q65gl~MEm&8Z0$h)|>5q8f zqYZTIX@8RNv>&R7KAOq;Cj&q*DE^dfc7GfjBtc9Ebtd$AU{a=q&F;rP`Z>`>`tbAj zz^k|2!@&67C)chiAX#R!VTZ*5lWktGHb`yF4Un;Z8VpjNvy%_3WeU3Bd;Pxn^yBI? zPyHVeMIXKzRCdr!Mk?$2mJr8ucKOe<27|t6m1ls7Nq}N;d4jL-v~5X43f7URp+jM# zV$G`5O5f9k-3c&uW)jYaB^DQ(2WMP8I})&@kQ!OnR{>K_ZE1GrYsKc5d_i|Q$1V&( zU&W!lI#p`Jt z7M5;U-}#Q`aSaj7KmfO767IGA`-czkJRKt<5emy?IajT z?AcwNeoHW9%eHn$#X^|mdOVoHR2Y$qTeH5*Wa)UVI1mlfo6U9U`IY@6!@4s17^Nr{ z4)TXT{rS+@*PZ?jqivP0IsC9a9f!*a!0^H0g`cfYPRQmk)3#{C@cyBv+Z1)RZfwKA z!Np~5&6&NlhLQ%niptP?EzP+!SnuX}7{UN-i#A3=MqMhaXVy2nw|K4|1_I)7^A9Bv zdL~+AEoy6ahA_AS1dc|;=C*`h9WrU0K$|9 zx%8V)f2SAU_6O_LSDO#ZIM~e)+*ML&JS#zFHlrpoL)$iO-Mr0PIATaKt$yz3{OsHf zblj|=-%tjh|6Y(a?P}Bam1hTx^RgLzdvB=cPL(mKTc;`fngPLynT)qW`4EO>P%7st zTZ;YKJG?opfSr;p=ZUAsD&IV6RCIJ(SGLX1NzvS#74WqmDX`8>rPQ6!-O)|EBu{L2 zmtqD<${7aL^DffjbvF^Lxlk)a^P62J^k^r#qije3(!fU3pjAS1BN1$uslQS@M)_O} zrhFmd?H)Sl@pC1?kdn3-2CX)L1^|Y~=2a!%t)vG zj~S4++;gQq@3eD(W%uT<%#7HqtEsFgOp&mm9ZwBhUz=L!`&sgMi#k7O`hC%U6#!$8WS2kB^BIAlQE%B|epjda?YtVpIx%R`y0&)rDbK^u?WI5% zhDoilnDGM;UTS~)H5BY~=vk?hx=|@E%D2mZ&<%q&6&0B$gkxe+?bFK>;w#pC9$J3GCfD8*F(X(1rOVR?a+xNXenNc@vex;}DUJxU0I z6*Sgn)83H0S2QRPpt&PPg^$|2F_$3XA9Z!Ad$nfRwpe%6w+2jn>X(Z%!Re*%=5o9b z5hD^cu7GBlOsQ)*pE6PcU}z@Orfo+YU$xt7&osQ`x_Sl)AQ~|)@7$8PDO4GFC70EQ zg9xFh@nJUY4^#?O=#Hm|M2!nvSKT&xT)bfA^3Erm93R4ro@lt$wkfD;X1QFZdW!L#PJn|7;dFsqQ68$GbxNQ#WW*0)x!0i<~d$L;e>}>OIz7S|Es|at6 z6kA{+IMI<4*)9)`qepaaY}L+gO}&Bhkdi)I{>Vjb_3yE0P9+&>VysO;`IrJt+czS> zF0AgogsuQWFd(1(Xu-jwhm4HQ7(XRGb^MfqspF>^yzJmqy*?xL@xoKKeLQI2frA4R!x4F#5ZF$jyK>d)&FD{B zGmvxi?!J~V4U-O^Xz+JW+;@lOgaKt5<0kj#&z@K(RzJOLlVQ;W~~kPj-3710;l?PANFu+~U6MIy{s$V3^oqTG#+4_&F{~ zcKUxf;IOh_eBVRA+tsE1;JYYfCRi5MN&{`I!Jf+l6AAC`4NqN4Lrs6a3)pmbK zT_?wT)o#dqFVW#5o7K~lLf&jZSoGoxb(z7(_N(xG3}g*32*MDH43pXnORNJ(b|)`S zha&R*KtTLq`6mfd8Y5T}0|d|4^RVex@DzqP1H&Xj zqhA-RKTS2Iy42;7n7o7p=fhM%J)y4e3><9lSz0WgSn*+^3+00?Y-yFK8f#XLSuHor zZ`ziwOI{F(%8N-LKW|^@iq%c*0Fm+dz*TzV4uCtY^uwESWL{~*iIAKr*AXW-7*X5eheMdi@K*EqOiG=n6#xMW1jK93!s|vB7Mf=X z2?ALAK98?D?1q}w#%w7ot^r7sfDDG{;(qr(p|rno85;f>Xri$EYDsBPr%=&GW9uOFlp4+b$4!3c%q zZR5rUYAg%$d&kS%KJ~28<++?bxjTALh9mMOXJKM|ywLo4x3=-2%; zQg1)*-PY&RwGpqj2SiX%9^{b8H2&0EaortFrJC~@%D-^E?dRzAyUdT52#Cea&!nj_ zu%-~ZpLVxE^mN#zX9A&Pz|0tN+V82TGKB2MFVKXURqqRf8o?xmB*bFY#qAxsp`p@}vUbXd(1#lxqJ$*!tZl(hsts}mC^7X48gvwiO7-=;6P{4*?H(*OW?>hhB?{9C`m_l}st$IKc>@7;Tr9x*sdwawwPn+C%1hdw_8FOs{y9FIk4#QYHTzGVr9?efeYrgFQrL^sbrU*Prl6z zR=ODLl2AU6?|v>sC?p?9f7W$=*PIuh`|jx_Z#$0Oy~i6O7?OWlwK7w;;JtMHzPlTH zPT$QK?JkftthCk&y8&P!fJn@^t?1C3SP-oif>K}z00E_h$$1h4z``)7_JI9Me_v4@ ztrdg}m_dRO3W`5}`dM-d5gL0`DnwX?$^GEwylFj;tReA6}1g+7dqG_1ESLOQTk}i0zp58;arbVA2R0vHboSdH2UFHQs zV9S!{>smB0No!bOkd$bw=^wc5(~rCAr9sa!Ia_8(6l{YAxk7ms;pnS3p>V#Bl7nEOb~%J7ae;3vE^Fv*xsNA$%_KjAv83) zBelkWZuN|3uv#+U=+bJX(GSQs4cpc#*JFIXDpi>;iboJc2nFR9_H_TA)shCSu`Kba zWr;H&A<+lq-DuZ(3pm=uv^bZJQ$Fxt+Ft@1s_8*4eZhD zn3h=E-sTO_i~1>`gXV?@_~=~F z1kdBiJ@1;7_!L88aC2%ZcN;~8z{-tVP&a+g;sWJkEV|SBTDGy+cU2s{PY;}msz9o~ zHjPZDbAZX8NtXlKrQSEhWNS0PYkNw`>{guz(*PcFHd-`zS<73g*4%Oj_!hNs9icgxx zYsD2{f&ir}N^I}Fla9q(ceGM@MJ^=`dRdxOuM~${o4q%+>wLPcE&5S#cfH8-^>lz> z81%_Ldl$T(n)^CsGg?17YaUiFPu`eLsY~1(*W|K#&s0)h)cC|&9Jc>piB+8tq}Pxj zfd6K)(;msaq_aH}YTpca4pFFcB>8p534&ZclMfM_@fQ_7%FbQnAw!!945XmNk3 zGMT~U2bYZZJUz5`rUcbyi43iIaL@92J`d|RFfzz-_|m*2$q<4zw!U{wZL$mFSR)R= zXAPus-rgN^9zn88ovjpNAcmB*cGF9jtpLI8S!FvQo)ePx8#+98@h@{1{N;3YjLO2JiW&|&9voW1Ppk- z9;bRpNz&HVx364PcF>T1O7Q?7Uv$qfu<)-hPB*p2upSv8=jf^R@1J259XR7C&(r%N zZ`46S+2((hWUxjw(XPf8S4EC=UHz>duMpD`OI+7SdzT)h6c0u>TWY2%%fg_O zFBuupnn(0_t(dm47!4U{&3PgxhH1&AxojF6wmKC`;T(cgnGBr z`Pashq6+~Dn1ozs26vvd2uoJAkXG96?YLvV9uVcMh}|J936c_SPLWUo+UGcWI(7?f zUkabxelG5QAy-h<1Bybf&{+@2d0t_ZepwW!vx^FiGvdY8+2x0g#>DXjx~Rk&oJy-( zw904J_q85WpHP+6C)|sY{ff=zx)PObpmPTgws$Klv-UDA@qn*IKgSW1vsp1k`6%dq zc;~$EVrwos>u|GlK*S$1xp1(X)8Ef~D(2mpvN?TJaC*h!w-;{vFt}Uc;#^kW*8QSy zpVyt{X1miWGa_V1L!Lv>zDCK+y#N4zIPY7Sbn0FhJvyutb2d`z+gHuAESdvAfXT^b z^!FPZX=!VV|FM(fL!iMz0(Ow@P%g4W6v)pj`Cbk`-moTB>*V+#5L|#DC9Orj3*mV@ zMd#<9n1QZN|5{_}NvpGIeKs@{2|^$s{v4QoH6k(d4Iuz%Ae&Yfu;z!>uIiksNPvR<|wr2Y}(_RGw@Y5P~*sef5W1J7#aD=pGZ~vwyA6m^QuM?RBBjX?>wF^~lxf zwEBtfalUl1P4{nnzJ9e~(jtIKD^O>r|M2ZsEnnf}^eix77~~Zf*;h|IV_IBm4Cs0M zguo}#5O8v8a*r}a5|-2CtF*7>P_0qk?XuTaUS_WN*)LZO)2dsoM1YgygUhGgyDHJ< zJ)#vP0AX9=ovpK*KVgO9?!|PqMlhLBueG>>MgP=4@x>XK}HN13} ze|ILW?_avAt3dfY7)(qAX5U|O<*x^sN?~x1#3>~$HB14eu)X#b(YF2r?YgEnp8U)X zh!;U*u!c@|`In@U>MmxA0<47qEXkh*N(@8_Ol*GcJlcEOkUpc*zX&2a_&@?Y_O)`i z8+N*X-9d=TtK7&XgJ9VPiX$OR8?+lHkJwuXl148n?5qaJOOM2$ z;4ue#D&g)_KBVW`JIC!4fgmL>4GfGR8Z}ra(lmBlS<}9I_rF)SW$#UI_8+u-)i{1q z9Pw(SxTzuch?~Vz5w-0dFEw7_sk+#>pSJ&Fnspba^ebL@~ z$A<(%^f<(J;uRtDS(LAPe&UUfB;xkJUjQ{mUheJ=Sc#27cYioh|gj%b1hg~LNW}&OHdYq=cy^( zX_{p?D(4yoT76fKU}h;KWGE!=yWpDPx8*+GDq3c*c{vmk*GLmeYmVl!`c%*5834nw z#r$}I`5lZahVs=c0z*iKA$bLjy<6E+t7T!4)RXosK)-71+F+1g5`rZ&hw|4WLzyj715HO91Aq2w?$VC!>^87uzb);knY+F62quKG*6y5z1w`{RF6qX+;R}a;SBScKL zO)E+&?6V4I6(C+Z*PL463j{wk%pi1f$1} zXb61R)#;7|xPXKvLvqVrdlW6d@9~Z?fF25=VMye{0j#!c0nhdJB;<9vrsUP1yqIVQ z3TfoH@CnhVINP@IrX3J36_uFhNQq7`1Q8Mi#n$Tb$^c5sgBtNOpw!oiAL+yY1V*x9 zD~J}dIc;$I%l@WCmMWI}mGjnRgUBcfEUclaA-7JRj+tVrEUs(+HYE z=6cky+oOo`J$fC~doL|W5Mr^wXX%VWJZ$kQtBlLGwiUE@B3wPXko^F4YEP#dH|FHF z7Pq;)GW@+Qo6_(3o*s}-uSb41!^hjx|6Cl4%j-u?ELczw@v|#d6j4)aPP%58cHg74 zbLjA*A3XTLrlGFG(};keZMG;M$Yed}ln39E{kwHnJ6gg71bun@yZZYJ^F$0-3c}QR zotow6#iu7PKM4n(d>y~NWCIz7(EY1Q#lT_Jo1)R=L3P`B4>zZF&uJ<*Nv8vrSRRg> z@3Q8m&=KRLf~SH(UGC)#+s&CS5gvsE}KdXu`F3Fh+vyy!J1bOHwNzh(x|-u zEnr!6cX4rGQAd0BL@>)>(ENDxn815iAG?63pEi)!*9JxF+*Q|B96I<}H>dV+eW%1A zBW(oC6;nn<=Du-fivv81qQg)6UCz-@2qFpxXhkxrP#CiGph8=(*qEfc*RM^&}y_~z7QhvxV?It^9yxUaUzHZ>LBSg@)knmbpju;u+qkfySpW`~CBtaXdy*^q* zAJ}Dq=C+m;;t;Dz%WrGV%C<%Nu*1i0T2?OYtq(tr31zQo{ zXWvU!El&>=l5?e%!8;o3GqatHIzuTg01*sHD>ps=+*+9Xi(!Yw`LOf%!@|$Ee$!m@ zUkgIy9Ep^pLNoh4o=N-P?M*(ib99XYAfpj^c{1VL1S!lw-1LX(GY@##b@e0$azdiDyv#l&lkwMfv^kc}k{d3yu1lxXl^`fVkUg3KA)tk% zm8L;+!V&pg(I|J`ny0dC4G5cVyWnhvNdQP;+J};Tn zw*znh7(|dk3$KE?6@UeT>nPeo`52lXJ8w6d?IM^!NJ3R*WI|g97J}*+MJVAKTLEEl@s(q(Qp4#G33W{?zkW+LCnTuwvdg ze}k+ZTd6%yGfAShmaGVcO{Eo6(;I)}=&T=+@)fIOvnZ)7rn=eJDQ3nM_rF20!a@c> z9(j;T^22|fPl=?%!hq;lou=5Jpkf9!y)z#`I|DAorXMbfs)9Pxra2G^3eGDwH376` zOQJV4G!VDMU+zCZe|PN9aq8@w@$h$FXSCh%rHA~de|%^A^A6yxcbA~NL}Ko}n+5!p z%9=W3uIuPML4-(5UY^S6o8V~~B&`v)7yyMWms;Pe)0NYzMf1{DW&&#xgpj>`UOQ6! z;0vc>!HOpO!{s+{|D#VAsgwiG8#XC1=hB^|l+&+ynn1+b_eh#H0B~86LH#Ri20+d2 z8Prt9`2Eiss4?dj^+OvLmZ1IEXlsXLngro*x&|LQUAmyV#NT8rley^j$c`OvP zjAIWP>03cb5B&1@zU})+@-LD z8Lo-?>DShQ`2hr{U;9BWr}pfrBeXM#GMOgT4ILgDok)3I?M)s9LzeWF_9ghc$)tK* zYiLk+^wF+KRD&U`0KM%lxb{GlUMQ<~k1ETOgSKpPKT|$O0b(IRC1u7zuFG#Z4%^vw z$B9&g&q^iSKiDC@r>rFWSI5;w!H{fjZ*@*gcKSaD;0Xyc6sGf?9N$jhfb00V;^N?` z?Fs)OW*`=qXXaeo9}Wsj7~~4@)b=*-L*>H&0L!MOhaOQj1Az15!*v2(zBUI2KkyvA z*G_Y?OA-a7L0(0_@YrP1S*v}B2@^_r{#|EvPs9uWKrk9YV9Z1+=wHIIh)rwPbc$?R z6SQU!$P7r7mIZYEhJ+56hAAAC9156Jzcnq~oFZvSl$Ql{SE7p?k4Q=(l_n<^vySTO z^q*E5j%8uvs9~Xf=RP#9y?Dj~)5-cAikLiga+&T-s~kj!N@F@TT_op!V?irBR2(UW=5kDj7fJivuhAC9mEO()Y;}r ztr$!Q@?NmGv;;B8ynTE1;-?mf>#l@zdx zi|nJ?+B36B2;0plWdtNDDgvsrE62&kc0x*W;izJfXmbT?AP_Qm@BmA1*qF+HpapYD zxv4F;8Y$hzhSZ|1-4&k4FioH^YG}*UXl?g|uNkH!&VK5s>ir9-tt~6H2819kDm8We z#tavh*dm?LLMesPk}xv9>sORU%eHJz-{-n~05n3~+chN&Y7B?OuR7mfbr)h4;u2%aKfH@RUeN+4>tL?~hS8_nfOtuk5-S@( z2J2bJF6IxeT}tWcr()nfd$4TX%JmId<+vJ!CH6cs62HNLj`RSjg7rf9nP=I%*O=Uz$dotweD-9j!EA zNJep3Z5gu1IHb2UxS#&{Jotf=kW|&EHs(^v9w(r-HAh{WTbM=AA8_JfXnX7dN~VV( zx8Oz}02XfTARz^&4mpc2{Og7E`0dZJx8Mb3${;*71Aa*uZHZ|(%4iiH3 zH!MTAnOsM4S#ZOuwb{1#jKN}Xq2!0Id7sC|K${2u1amGp;+y80|5{M#K{pxcoa_s{NhU z?tHg~ay-Zg%Pm`8d~QQ%`cdZj4~+HRx@S|t(gj@$To=Ouz%bA{ba-f*g1ND?Eo*w6 zuIgBkWTb9qYuYscLj*%4JKyinz$40@e$LBImXGL3ANV(tLgn)cShRlV;J`E$@;cfZ z1ckDMt?ys)rR~><6K;X9O-NScdL|0|R&2G0@otqVTC~|i*e3j7pJH73!&5Nwdv~I# zr45mw!M-9iynH19py=?+AVW6D5af5HG5U$~*sW{F!Xtk}5X5%SA}8nN$3`HmyT5lF zCtddl9>3}o0KopookI)PIS3T@2a$kg4X0r}zIDJPymr^UYz-QTx{c3&)6e%VVHn@; z&E?gz4$OoI`n&=b0@w3@CG0#xo|{GbZm2o zh6NO22Jxa4kW~!DWcCk;(R-9>-v_&$9<6)g*9o&WSi0qI!L`a~j%ATId z_zMha$Z$wnjg z>9jt#CsgzcYLQ*fYe39%KE7ra(OER?gu(19VB@pD+=)bs54ZvVU|A+LtnUI$gDZ!W zini9Qa91=kYR*!BpQq?`6ThV@1X|tCFNaDCZ^hKw zQx>NkdXfM|V#c=}Up*HO2aJ~HjCy_PVZ3qa2KnOqTb|Wb}_bkH?^!ntR3(2-0JP1;(u-hJ@F=d)N6H z0L08R{*+q^>l)lWQz?Cg>+qD`em!FV$hPUdSX^AW+o-}Nv+q2&r!RIp{Wk^M>T}b& zy40nvqxa62g{B>Hs_rrs9?)HjDj1RvXKRzEAz~U&J~CZB@^EaLjAk6U@7fPoiqyKRJEI$2*N6=vpzGkPrjH>TYo(z`I)<3>2iiSrt83qT0 zNxwMvc>c+?uj7FmzD<95=_5=U+7HXN&gM_%Bq-UI0~jy@0@Z^fXsOF#pZ%uu(4m8A z>gd6EV)gh0r~Z>4`{Mz83cyBi{C?y}6WY}fhJcM$Osr1e@>gHrBZzRoFYd+LxBvKG{T}{Z zoO{PpD5;90z9~s7A6+6-sIA5k(A+`9TmDw;MDho5^C-g;sltVn>vK?|!M z{OLD6^#6KL@wa|QZEvn3q%+#yec%gDP9OcH#L7VTwj8;k!(w9-iR=aeLj)`dW>3fC z!KxLjI#>9-fL1EyK)yvf4-gp#rr@6_{T5XfC5p-pMNf__F?x z;0e+l*rnDre_3761^Fz(47V^Uwt$mhbq~XhFEI|t`k5;F$#T+ATXA0E1zgSxd}l;qVj^AOSc6~7z{L%2#ZG^ zcxd>`CwzMy^&en!?bE;H!G~Q#$&My?!*++`8LWjx{hc}l-DYH}hhftj@1P(aMAC}G zX9IR5go+{y87<*9Hn9;fA$^w`=Pt%AFI|F5j(Q%aUr-@d)^kbD)!d!EsAFw|k3}fHphpKnq*(SU5iJq5A zsoV2qMRv$1y3@9|`~R7~#w`KeHSZ~3cA39$66m}O{SL;g9h_0S)e55bx1LOZy(Ln1 z)fYX)%f0vhRSBlI7pu#wIs{NuZ0wQF_#Z;Cyzk@d^!Mms0J9kY?s$4GmaJ~3hp&17 z;8(eLzw2cx<)~=XJS5Sj{^EP6$ft?!9uwtrZH6QAH>tU`7o&fr-0jE#I@Og$-R86~ zge2F~BnhnH@zgyA>yA{Km*4YKy6TOW@#`Cw2+w5_#N5AsgxlLZa&n3+OTse29SKK; zLNHh8v4*ENzIOtb^uHgm zAz>DhD3gKkeUN1duBV(KK>UOl{nn&?F=c-E$}}y5T$D^t;lBKI{?%fGQ8Y zS#)M{Drm~R+uQy3SnZI}u9shW%7chsrn=l;!xSdpKHR6X*GS})AE^x9m`bV3wkLJ& zxk6s=XWF#*$Ri78e(Q=c?wNakLI8rCU|BtOxieLkUd2S z-JWRCB5q6G^7_t3r5|7SFr{)nzqP1dCbOQ3M9eAeE$;iuLkI*P1Vb7~vPM8F7TxY% zM!d*8B%SfzEQ$rq*18TIhz67ZsB2Rr|KNqoX#M-2^TG$`$o{(x(rX{R2eIjgLis+Y zKY0znwCzs-5zV-FGN1p$0d&rOtEsKa6TYwgc!706XTtw2m|+FPNAALBX95Z8f9Q`y z^Xe|PD#MzXscXSafc6;xRUCYss2EhK`}rxn`N<}DE7miGBynd?9vZ>GqX1@5MR7Tr zJ6f1PP=mW)8p$SAR~xt=gV`^*c^h?+YTI2u3I@t|=%mAIW6Z z=&la`5#@6M0EEKEBbiN!Q$f*B3#QbyE(jVtMwJ{PQE1AUJZdUE-D->7;)x`_ti(6mnT2p#(smyp?syG@fP9*h_ zo)5z^kQg#RzP-NI{f_7H)RF>uZ(FCg(2ykhSJ-;R)0cl!??@l|@HasdPQHyeU}D*O zTP4~ZKQyZFzD!!5*;7dEi;nY#Nt-52jEzsEbIC*t$T9^EKHlVxZARd=mphhvE|2`O zUX(Qn5|OAl&GGo*XxKK|-o8v>{FM?(&kt4xew$6J3%;^c^M4~4ngol20`ohmtp8*< zU>P0nUhZH1;2gT?j01Wl(M#@q4Y&OGmsGgliNf=hvJC8zNT?6AVhw^33W+~w)@6Q( zkT4LHenGMQ+l-?R0f4YA=5>77aMrM6s-pFE>aJ{#Pt$sPk{KZhp^*G-a_&_ZANQ*V zc+KY>Rjbz|7yGWRkcRLBLAf-Y@+JW!yUS&({{4az6A5=jqSJqdH7O8ePx2UP5{in= z2_?m%u6CpQtn2XJc^z|4$~SVu5pizU(vEwrkugNV=Fy2x{{`(c?69%XG_Wq!Ic`c}sS;rg{Y_g=l#F89aDLr}5$g4LQpaFLw9Qy<6M7^u^2!8k?`;k%10Mcv?OHgIeyT^&PUgb zTmHGkls>fN9a(XcSjUlU8d5q|@rc!u_s0<;_>OVcXW!aILrk zAYb_`vuS@fDJh41v3J+^AzNE=-!_9r%CL?3B=YM_0=^ZNZ<_(}>ybkP^|iI`OHMXV zpjH6SM#8vO2x?RwTj`YkUHQz?-5CVe;*)Pn6LnDaA+sWZRPpSe;a9!Si%J+tpjG9^ChwLb|cBFJitr9J>Z<}uz zG_SkI&i1Fi|x_IA+(zhu!Ca3ls!s*lAL$l zwV|-_A_PGxSgE9cpx5HU^La}5gJlSTY&mIlO)D4s{F@@o^MFmwtQy0MA>6>1^^;3;JNzHZ=YOd`JY_l zhxWYzPBNoAwrJ7t`nn~iMQ`-vpxr?#BLG?{RIOXzdB~PcIi$KA@#(@FIaVVDP5BwZ zA@OJ0u{hk9^ND`>@9rV(lnuWJyE^v2ZTrRXe6ii%YTGjn`FoR6Qm(AbexR_QiAY@P zH{aS91ywPeb;sK~Es#=#Wox%~PYOY4nA<;X#4H3GyH}v&(*(ssW<+KA5X&w;Bp5W0 z96hS2%yW6V(l9_!zQ>gt*Cs!alP7a3;pHp4=Ii$rq1nx6ThBY+l5L7N+O}lhEQ$p; z3!)6V;~;EXF7teXTk6U2$Y?{0;JJGn zpZV(pTQBq+OhC6qc&Cwm70LDdy7SZ2U z+Zr>cr&Ib)X7&Yh{$7bL|Ebjbk;-b;SNico=oskA_-Qj(UOASFE**?zD$EYk~6MGcHUNCYGFmUhl4OI@jh5LG6<%D zG!gj#Vv&gbTI}G%2e-AVetF$2ltKE1XrT>umH6}1wK7@}(k;(l#x&y!8qzP;rw{GJ z|5=EY_x=QR4H-7ZMqw9(xnqlxbuYJlZd&w0cbk0C+wBRPl~#-ubC#z>F%I+=qmt%@AE;Gho3yXXlh(5Uy8BA%lPD(i;PGAxH%UN ziQnd>QToq*X}-!6D^H|S8k8w1vu`aqs1hZk3i$2Y*W>gw8YIRXr{5<1>it}YVUYr-`a`$ zR+nhS!@6%4ny9p*!kp30XGlTRn!i^@AShmz27RZb%>HS(B=A==9(XETWIxdQcEdYL z$=$Su4PppMn}-gMZy7Nzh~m5CHJV6~|%9 z%u(3%*w0biKY-}o1qg=a-9m!%#-8pieV!ZyKyW>M+{pb)D@*!G-*3s%Yk%F7D|agp zvHgc46sBJbiJpA*+mWyTEjrrWv)dP>(6p|T+QLM`heQ;A+9W#Oz4|fBruVwXrN6XI z{!#@{+V!^ktG4a`o3>xIZQs@LT^-BModOX80rAJJi`qYHtxFiKZ(pH?>=(iPFShZ$ z$4&dv0#y}75t=#yGxw;}+0CA4o3r{o+otCM7y$XckMdNPz9y5>=P89C04Ys!V+F?f zh5L_2C@N=5NjYdjI;~C))`VZH+v>g3wk`K=S7UZgHj%rd>D8-pT}cdO<`N)Z4A3-q zy=h6fuAYLP;%5S%fB*1z{jP*tz#8Fh*IF-|^nX-W=e~ISh4Vt;i1Cz=02Gkw^sf@2 z=Uba{^E>LZvr|o(w{r>SC+HVJHtkRD(ey|`wM*__-oEI*&GN`YYA|F}H3ohACMF^f z6@Jt2NFVy}Hz5Gv%-dhZx>cLF{9CufGDs&I9NF=9)4aSin$K0ZgA+OdP`;0{hHbf% zo7QE~*pQYZX?H}Uq`K-n*|E5Dwq?_EJ#Dwg)90h0JeR*)wpVOw+3<+Jb#b#8KXn+F zA3TT(_q%~35qX)EP}`F`@8F;LkF>>6XKCDw!V4GE{AsOIY(mjuu`;b?%;moLl|(>C{$KhKMaQGrF`9a z7TtPG2+3sHSXoqJ9^SgR{f$JsYqmFgOCnKumSG`5AS;{Ehq{iQN&svd^9$m}x8&5T zSZO_7cdK25q}qnpuiUcfqhk%$JQz$5aEE*o!PO?jqZHkM{LLFNDy~BnadZlAJrd=%D5b| zS4?ks>PogJUSc>7`O(tA4aqM5@;rG%_LA`5(oOF8_dlW&1$KSjuR8v>j{U8+3@s!? zyigwJx%%0lZ5rieVV}Ts#_41E{DThKXp3c!UUvbPZsLPVPe8kx3N9m-W-nuCxJKnuhMR)s&$P|WjWt94*ySy1pvVvIW zgw$&f-LLxNM=zV}A{gQD)RK|uEcXKg;h=0xZ*2X1%6|RqW$V&YT#qdhkYy$Ir%j)? zwDRUVao|OFT8o!-PUBp*R7=YZ#jI70om4Bbj?{~5Rn_4mM`W^UDj?Ka`PEmB z{Pdi??$B4yzKCwVcG(oyQ4ta>!Vz;-sySH)L(<9L*%yypa_E1yI;Ri+U!nJn-{a?h zf@s`^P1!hA4;A?M^;U_dtREf~`$IaV&+3jx-_a3X0}!#8xUj1+cdHe*Om{=V!x48P zxb9jb+vWPPV*C7L(!Z58@=n>?LpoUqqF8}EHs|UWq5)}68d>PI=CZW-{slPup#!W3 z?rEIuMilY758$U88DOdyJOP(J_cXSxuH*X8KA_6V zAU7w{wEn%#NcIb3{kkqHDY11)d5i`OE@h=P{lE6!Gs>>w+8Uj!YKN0U=d4z2<(v~j z7?8*!nGD7znH)qgIY$FFHrYgj$zXC43Lu0~mQW6@&bj;KutTl)LxOvKKYZ;Q-u3<7 z^Bkkc=&}2Z+SRqso@1?AwdVADBv!51gO*)ABER9D{cW`iNSi(iU}CaKMv6`DN~IN& z4o#j?hilH7%|t}kU-~Gnz5PtK@(#{9a@;@qz4#~oiN70v5qeF%_!%IZq;H;Hje*CG zpn?cgCWne0J2ES58#O=3)$PN6|0J{x7#x^VP#I`y-O(!=-(AUvTzM2e{q=fc{vEv2 zZuvNuRR{l@ZlryFK!(^hVQP5B5f_#8zIjiNC=Tjs)WmdM-tJldlnO~sQIKYRZy{PyYf zw5!Pxxnzcf>47I`sI==<*@kYy*4w!J@ViA(L$PSt3K;P=wmcz7O-JK%|9Ele z|1aRE+unxlFic(0w$?6eYwZMVVsyj-A{td!hG@t@C;;qeN^#I9Vd@&be&W&}9q%vR z_5&_@3}XgX;J23_O#s+G?N%+5bDVHQKO-KqpJ)DFBJYPiP8T5Q^NA-i+fwJF%%jcz ziPr^1cu##oDmbpD$pLLmxuv#+Di9R#gG`(YK>B?2hs?(0FEA*e4IE&)yBF`|z=&GS zc3meD)(`6Laz5C1)aw5*!Ta%E`TrI8UaVx_c7$$VcU7(DAQ=reBpUlnwBp?Xz2#lIH5YuOl#PVfZv1`d1DAL$!{+6o7&cc`L zp1{=Uze1|tNLU+}V_?XG;j`;7SZ~1Z-&~FIF$bflIEaRJh1M^=1v~&+KLmN1bp6tssHJx_r#yS{QlQ}EyJ^K z+@ojx#b@t$@xy=p+`pGkWPotP>4*RIn+(1X*wqejFDeh6O)Z~v0(gv1x&3MKXb!Gi z(1n99Okwl@8<8O=A{32q+s+ku@7|yNw|qnYZ^J1U--=|G|6rgwS3ZNiYwBsQVW3J9n)hY}8gi~@jQ_P< z1}E%G>-k@$=3ps7p{RC%&ik(Wz3b(ExnqVB z0wNOC&hG8bKkGGhJ@f6&4$&O;O`fjhT*C>4pU&^=kOf0(2HFY= zw0W*l8$FV=mhJ5hOroEkIR}s?|KaMR|K@`)oxTtqUvC9K2uvG8GT`Hg7u1tK3e9e4%+LRVxkrHn%M z<$K5!0&R^tBSm!R+zfy6=uLn37v3y59r=L;5Qq{4v8F4{l!BAr)w^;~HE~BT)ZRT@ zcGPT2r7T%k6>=M%zl>*61iR0|8GX4zS*}C16&mKibN_)N($G?xRg)03?g8| zt$+7?|1Ace{|xQ?^D+os=i*TVDVMe6*wLl#sE5u)2;i@8{u3>^?UWzp*#GB`p?!NR zW&6?`EDcMKsd3k;J<2=0nr0t5hDk9#zVX<Tuq&%10GQsi?M#XrBn)#pD*2b@2FLqgDpH@q+MyT4ONf5hhdn88F+ zLn;)JheF_M&0)Q3OOyS;Z+=2|JYFvosjL$SYNw?V?z75)_Csn9{Lhl;U#s5#hzY>= z1u@M)TR2Qd2uW*MXK4fK0T^}mFOb~37FFR2_=S!So;sW7 zUHUvVZ`*~G<-k*1fo-o{%*QPJ1DU4a60e8KiUX|cl9sI4i*Z!}-1X{Kn0gQ8d;Mx! zKSoQMX!5Kf_Ou^v@$i=KI_dC}N8p|3wt?dvut|qe zQjDJ*I|I*Mb2pYOeVQBUTj`z~Ucq6fOvi)Iu0U-thyHa#&=oeYeQg7Vj;%r4wl;Xf z0eG?wlI`^fmiNcDKVOa7Bk#iA_s-*KXTD19EiDj^i;}7etXuvK*A1_ zQYR^C{{tqPbL=!6e%K7$|L})s+r1kHA2u1=w(i2{iFL?la{R?xOUNuPK^`IKb^;{> z2ji?WXW)jL|G>%Jdw|ZRD4BFM#kg1#7;#2zJnRmL;v9|@f%A?=NVfgHk-}1qiTu6P13|iW{;qw=QV;vw(L-B|a zs0j(QY~6$Hdpl4)v=$R5mE)@~cOn_LQPIB)En6BuRvez&|AFK0oI+?n31uV@VD_1hp-1U`Iy}+Lk_vua>XGteMvW_=ZOueLto9nqYW~F@4S$ z967BJ7u@|B4mz?QFMGF*RJ;o$bO?_Z&5NJsNMt<4V$A@)<%tV!r_~><2P~6mLZ!U; zjKh`WKDlJ|PIv8#3qeFU^r~mDZDT8t?ttShLu&7H7(eAWSef3xu5x_Lb$`VBU#%f$ zyUmedC0a6LImN}E1H0O-Ph1!Jze%rMFr?2fp3AIB{R|bP4W3fq?)cNS{ELTQpch}> zBdw(61dGiZQYm%&zCshr{l}*E*(w{3^WXRAYb~be)<@ksMct%|ki=WJLxK2^O ziBV8zZ^-}vI(tb(K~!NHKCf4Z%+jZ?Q?t+g4UMh#;+fAIVE9e+B{EzUmeig$h?p?A zl=<8%R`Kud7)-nBi}{!5){!9Mm)_nY`g$|U2*|T^DZ0{q_6Nl!9vLY1tL~mYE(!)h z-Lc$pkwWhQu{uu0Yr3wbQ^_24$6N}S;Pa0x!hM&mM`(5_(wd*ELk>0dC^B@x^Tzw} z;qn+MhVuKgvCL{vJng(uT^tfEy?rcoADPnP)OwrY&(Z;7j^KE7BTeh?#S33{Ag^?) zD$#jwN1V$d0g?fSgG!^_tu_HbRe@sN8^VtIcCIan2I6V!_`$W|*Q6#hUQ?s?&IGpX zN&st`xoF;C>h86uqSQy7y=m9)(dOy`Zg*dPsjiVQd?x2@rm7;1n_GLhBuG?Z_Htvq zjF`c~Fa-&z3|;h#C#{R5P~`+-hvnYta?is}-PxW(pCO_l57*V^kWLq(qbJF_F35F( zjq7*u@Ueqv%0M4pT;79hPElnOU?-;lVe-l>}i*F7rv+-R6>FLZK(Y9Z8pcrk`e=Ud^qa_t1_On_LDe!-^X= z=lJIP&c?FO7f~vmK`z&a9i2K!DLJz^%@xDjsH>nBQrBqO=l};II=_3%g*5GF52LP1 z$J^h;kj^ocMVVU~dil9K567#Ec2iGp25s>yA2wq+z4OsGJZfkSwWP8bRa%H-Z#!iT z-P^Aunky_Rc7J{0kyLWn?dVA`c?{-xBZ0TxyBQVL*C1Qs=W5-ct!)koB8;jPyt7#$ zmvhz~w*{tLGL*~^%zwBsgNIf4a@X4AgQ`MD4uyk`L`Z7d? zA;F!Eis8xAsLCISM88MfN%vyU_Ffbf_^EACI~(H*HOpZq6xA+BCe)+MrSKl@lyCZ%?J%8}>z{{Qcp|eZKN0?X0Ty z9BM|q^{K{~-t^fm_L0{=MPGi^sabj3juhxqy1T5GElUjmkpr;&!QAfuZ4w3kRywct z`!Y$7mlju4n`d}KqPerl(YiLL93v`e?93{J16~Gz&+b3#?}|bJVE6&&(d2^-UbeQB zh8;G9o4;!D?%WxF4qz!Hy`poz!y&W6vDFotCZ4ryJYEnr^-OO@#n-ee)C5`ggoGY) zk@==xjh{V`cE;0`4|+&*yl$q|LwZp{1d2?!F1c*yrwYO^;@eZdfKQhtC2cC{RETV6 zMj25r`@*J3_2x+T8E&w`i`cf94A%HLnT4t>^i#skVOVVxUp?|Zm>(3rnXi6F%<8i9 zE5_b+#v4xI!E;1BnRg9=8ANnXdift`q4Fp#T;WskUA+o@J!Fg@D27d|R2x2P;6S-w zrrY|M{f6ST_d=g|y)-;Tc}qC~zqpK9cv5NW9z#kax3<+rMU>$)g*U8Ic5{zgI=Pmt zohy;6sH1$!DxbmaJaMjW8iJr!o3KaX84XNjBWgD7!T?+arlXIVu0;cut?N-^G zJ2~5(k&yvmmSn2Z(h#j!`Yj48D~MfJg$m4?j`rNUOnAr8?8O9v~uRLI#UGoM`V=it^;w6_lrhWI<4+cK1-UDl8P) zh?7$I;x~v)xJ1+)HAt=e-FldM65)}xqOU8j3`en>bVPVy5FMMF6%}d34eL?3@GJB_ zcnUa{XJ|q4k1izP4=X_^W>$}s7kJKW>a_A6UCGqm1lu+%Zp%<)N`)A8K$-e_Su6WY zDN^w?*wxkG<(Hy<`mbfQ(npz$f%^?LV*P9|c(3P>j0`-^Hlhr8_Bt5Ep;7Yag12!BA_f5C+n^Y*;f(vs;U*|omMf$7qenbm-! zI37#?hY0lI`(DPLqz`|5x(A)Dmi+LsE!ep;{(FG>GR@iJQ{Tnk(!7DMH7Q^9di66M zhtG?KwX%Y8pB-Pf(+y55Db!27{YOspXqjz$)TDE#ie9u}Sa~N#_VW{K*?wAOZ-K45 zZeK^55}VtgNjcfJiJ>#1)fz3l>>!b~({Al>zptdq6NrZLYSQ=sSa+*pU8$<+qjlE4 zEa&+XJ#^98V_5BtVcX6a`DRV#@a$J63gR_V((*^1YCTnqpTzmBtvOi-nl6Va<}G64 zDG-%K?1_?34vh{B&Ky+0Cb_v}8a|&CPc5l(%d34-^XUBpb8aee)^xfhWgg|O8jDL$ zv>!X?h^G5hvmsp8tjM&gh+g3vP&>f%cp~h2s!4P#x{~j^=VZ#dx(XKP(KOszZ;dYZ zHmp%~14E*5b+fAI??qQlK!P(ar_vBNx!7?S{b?a5X;w&A0 z$W-QSkFsJCeHn$$j{Gl4icfP%`<6t4mdBUv{xbV*`*{TDYgfb;96I9`)Qu_EWZ!%o zaL_QkI^kN{^u=ko;}hOCo;Y7;DA#rWImU6%f=Ub%li2PkuhmiGmN=)V^g)KC|ng= zHdQ4YVnZm=wxt)x92ix*-`-NG73$ST|D*)5rR#XYFhB9u$8qKSDw|@RJNH;Vn;FbLX z0|T>)i(_3$H(^9kOqNJ4-Oe{%FcM~vY8vM~Qt5nGX(w$vD~~$)3V!@Ihfu01t0GlFKZ=Y2;lWqw5!)SK+++euAy6L#=!Iq$@Qi7?eLxC*4O~2gbjgLEX2%svBr2EYy#U zC*5z6a`fVXA-8c|qblrQEPKA#&de`yWX`SUr_<_A$L1(NYQID2z82LVjOp;>Y50#t zmH3Z*cKdGyckzxjb6_5P*QJ24l z6L0?`Uc2Q!iq1JnWOI(I3pLYr_`Xr&qX(|~{d%kL;K7OtaURHgU4KYm>Odovw=KV4dsGv&E}M6sGCkrrPylg&@IE$3PrM8xbjq*#*ps_CxU!v_SDY4?VJ-+ZuPXR0Gu zEUs3}vmKBvnS-V&@3C$Aytu#!q;t;Y1l69(xCc54r4oomL;AV7oZAx$n5A8P+4~)! z5Fl5Biu--WwehsuBEV1Tnpho=<)73HsXh1L0q)N(|3cUa+YMLvPjwu1stwp0vl9+_ z-b|+KQ!hMw%rTEVw+0g8R+ll#RS+5>ubbLMDF>}pg{FA(rC+Fn=0B>%Gq&T==piBC zq~q2?*h&@_nRL)7!&$P;ni=uwds7hJY{ot}r}Tvpll4^AwmpVCRS0-H>f=wB4hY}q zxGJ5=yR%%7rZbChz*yKD%WcW!6hcws2<7tmHX|>Yi%eZUp2?~|89HkH9(hL2&^2Jsz`+GqcPH~{E0wRzrkq<`aIs=CbwXb#tY4YUxNRO& z8xQaiIm>Z!7EfW9%RB~MEF_K0*?hQiQLYKRmrB@on||^ZM}1eju1dwz_6!$7*O)a; zimREh$>%r5IXQbl_lC?(WfP-%I+wLI!r8hmZ9{YO-F@m#2D#w?z1g|C`}*+M;)5MW zUFCq%66_a*#i=h}xyL`}Bp6Z~=velxHd7D`Ll@XG`^bLResJ}dcH!LGyF6a~rC7?^ zEWu~HE+1>N=#x+tx-Oqdv}Im0hG#@LReY7EiN0jonrbVmlnQz%pr2>i*yS-vh=1<| zZdr6YPK{P#>VyIO=a;wAhRq2s8&{-&I+^+EmJ5A;@#KC2b@!7NfuLN6BYnNjJK=~n zC2!mI1HWz3`d3Jb6d7DMzSuP+X{ogHhvH)W5ZypM82h7IA`mOvLSzkEqZR$$uK=?`-E6Y5_oJ$+Ar^lz^J z*tH+M{sW=%{;n$mLrNV`SRn+$% zCQtwvQd&y4KKLN_b*3rTmxE$Dl8BbBS=E=1%pIf+J8Y=k_UTgY`t(=Mu!5+_rtOE~ zS*I~DByy+gVo1QRou~)=M=|hzGG*VFZMTrkVpckbU+Sj#BwAt~9S(XfbX@v*zk_O` zlcp5iBQGHSvLjQv5i0??p!SRI$R3FjR=ay!3 zYIeiQ)((H6xZPz;jF$OMG9tbOx~@HtE84GaXjIx^98MqQ$W=$G!voSWYsJBUFt zm2pq$Y0aXmFZ;Y=e3wh|;kiWqD3DC$@_c%(H36 zZCjlY44CI|fR6Tf#SOWY>vm{@ zw}1q>Drzc(L-eTYs(Id^_J>Hh=RjXndOxjCBkj;Pjd8a*owny%mNK$ENwjrlL^5U} zopp{#q#b)=9qEa*x>S*<_JGf{64sHB@{L&1{(Y{)LNKJyw=H#*u8Ye8QO|LPDKE@h z>YC~*Zy5<$Vv}~YkSH^~=1+qG&(hAG^lL)OFLLWP9+m6P%x6aV&K*4qTUK`?opfG; zpmo{h+m6XMCg&@r&RER8zpu|mJYi2tX82OW5Nk<_qaoyDIh#Hp34PS;^5a55{WPWM z!EFk0sno9HxHCbw8UTVuRNsf8F_c?E2DaWy5S+J)~>m>R?bmc0j-IiHhl%(7|U8nSWrF$I3Y` z2q`|u3IEZ#yqxbi82sgX4OamlAd^)`o$<4gJ9N-ljzed7!{#pnLyJx~bnP7x#8M@B6*-0A{uqf84J1v~^Q^cPAe( zr@##eM>7N3(#mSzG@oC*LE?LJJK}vrj9_1{^RmCj|H$McioGMIMx5Zpu*L@Au=xXA zA8A^{hpi3SulJl*Qf$of_~_3Nv=nfH$^rG~gKK#x|{ zc*dtYtQ$8iizQokW#z{!FLj^()0rw-TtFRZhriv>!tdUFCSJeo)c@42^O+cKx?}|W zKAoXSC|8Xzbov&Kf|a$^wiTTObaK`F+pu|WANit&6Yy&znQ7X9+VDl4pWKu^Z@@5Z z>_N3J5-2cfdPRwHdAyrk{{56S=(&jj{6ptk37%=L1#MMtIgh7=j)XME!Zgoig>+O>X7*O0shfqoogE zj8u-=ss}VkNiSw|?%7$_LHYd38YwA1dPL#wRF;!ozc{X^rDI!BrpKG^iB&3wb>N(u zuD(S6Rw2aW>9o7V*Ok=UdQ@szu?aeAmzzedN%8P zY&+b#aa(+pH(>U6EW5tzBliZ)6UfN{QAi2n9W?G@Y_}+8K?She|27Tb8=0 zyT|$5NgE4i&58;^fR%SANkQw$lt3)!K70REO$hQA6d21It;iI1)z03}j=7@mtBuLZ zNfihdXm2rKjha?e0mT@gm^YhI6Vep+$QflGCMpg`yo=!!f*AflKwn&Ra1jJ^EELp_i>=?Z zA!sFi9UbvfR?LUZt8V)E^OkFvu`CgH{^ZFg^^y>yI4W-}2=Et$hg>$91a0Z=N$Gh@ zUFh@Z=Qv5#-n+Wph__`of%vQ5-s}M*PkE@E9hHY`T{SdN-|aDVkiaf5yATp!!rvrO z09f_pg{ZIZMBcJlnv&viTNTVMW{f#cOSWYe27=;Bq3yE^-@iw>pGZ*@>vP`^`ou|2 zR@p7}ELGYoH*CmrV&&aVCYG0b-&)m``TBtqA`$I$%|HwGwRr9`cvNuHX?4;1HOUn_ zH>T$#`|?ijx){Frv|dJv4OMY)4Ll<&W&0<~SLRkMKc=`?o9gq6-)jc75<%=ERHFT7 zpw+%&6cqL;%kSIVr0*rFz(1tufD4cV{$5E61h6DY&<$)02IS2(wcfFrhU|q6OS|{@ zLQ+r(Jv=)t#VoG^)ZU&-M887Eg2>9@gBP_)U2BEw4SjdG!mM7Vmu#q})V0QzQjb zAfSD6>`h~E%8{R=sVOy%8Qj{EOC%HdiL5xkdt+bBR)FoOi9WCPoH4)$>)Xw695vf< z+}A7z&`h0O5J-|Bf}Z#jvu0yQtf4+8ckPK&Dz4Dk zYt3>Qy?tqSyQXWp3n*nd&<(*Pu%R#J%v4HML=L#&*$o}7FNJ4Meko(;e#=nViZas~ zGcJNeJPpfJ1Xpu?A+IpoK5S}BDd+x7#yX~~ByfV`@K2Y2*>`?sPZC0k5~;~8{(ue? z_?=(gwx++UP|)&icYQiz|3o*$TQvF=HYShOt*#7Io=t$Q_RegzYx77+d~Vohyt1dW z`wwBi_JWi=+~*bTuFGR3;q7q0AX3Y31%pOIi1&OEFN(^8s-mJ$?p>4K(Yi9R7R)mU z{AV+$p|C8#wgQAu5`@&=lvrnb7jeY4nGCAc*XYJF&V@5s`bIkj44&Bd8nOCm7A*&R|d%XG^fru9+^<^|6iN6={lq6yejfvU1LCd;#WqViE zS9^192@o)Z2wTmtfs(2a|6L>s039FQgnZUQsyE3#pH8~pRMmr{uJr3#GHE{;EzpkE zbm{=J-0vYzAh7GOzpu~zQ?SBw#|v)`<-SEXsca^z)f{st^~A|7m^&KADPq`;(hKI@qkXfa(^&Rc8Xdjr{+B*Uon{bx zrY>Ko8&+^-xWN2Xq|kFsS()$3sbfONw{+%9KxB1%+S>HS_3KP0uOKurl!Iqz@GFSsqi?3Mtl(9v2PgEcHB*+#K4Eo*u2}d zGFJ(R+oiyc*jKmgWW|#;Nvk>UB9*dLlv!KHH! zu3nw7+)=uTrzcJ=zBCv$uMZb^t|=?`UR*c8f6g1f+nn9ICMPyLx0PC7Tgq;hp6iV} z(~ap>9Mnvors>|c_H1LoXFO!vYQn(D(I_hp3>+NZ>{^bYNrtA;WPm$RM>EI-ZrAU!LI(oDhf}e(K;b0n+P}P0^qM!H(wj5DZgJk&?DQ`TUlk z>#BlL{q!x3+hbB~>xCw$3XDxMsDmK@rb$wY@0`vw6fgvgCNtzkIBZIh%E7U%Q4Az{ z(zffmTgsc){=C4k&dUeh7WI4gKE7+nXg*_U?4+mNT%!>O>h@V{4o8RrutIq zxHAV!Vo_IF>Hh{q6O_Uhqs$L)$YY;;Vy#rn{d|7y8%YeWA=KYZqQJg*jU!%kZ0h5v z&!9m=ig@0Ffo@UAr)RVF`~CWRrkGy(bYE(||6no^l!8ho-0ROhbLR(xkH4$dab2fn zcSeSNkk7nwDj#%6wQF0@wyo^yNoR5k%PP(BzJPp0>gWTY0|ezjW-|PX<|b=xAfVqi zbVgAb2f|KwuTt^m4h~j@N$65`Hzl;L3y1XTRi&?2jT!Ru1t*q`Dk;;a27}^f0l#*i z$BQ?mDZVicY&T7`YdX3#16>}IS`7o6HIMkx>%+^zki0Dzq@UK*c*b3E zZU_5@-b>zq2F zZdZx}zkPC5ZJ;RZ#4^fmIUs+0YmfEtK^M=ANI{}y$!S}pkR6uAQz00!?!3En(O$$7 z)|ISyBR|-6q29fHZkiS9q|bMMR(f_P@@B%a_(a!b zrBA*UmZfH}!X~K`q(+dMgr*6wlx?=7M!-Xe_u9^%Ufhnpgu2#s5PbJfd$N90>~(C^ zeEw-mqIpMeL*tgN=C;<{ZH-Ntsh)}uR&Q=2j42nWD+e+6GUFJlD~V)VD(~!!=W|VQ zWU|)r9*_2QERij;T=s2R)sVXAu8GzGheUmjL)U}xH5wccXq;(kwGs-wtywF-;zhWQ zT4?I>g|J_*SBzkLyXE>kf!*Eh*}a{u*{L0!`71Wnr=zaRp_UJx+12!IQttRDl{AEe znX`v`y(wh!%8DNJ;h3(tdh3HnE{-ce<#U+W(V0cp=S|jx8Re$qsEc$>JQqt^!=xtm zF8Zj;^3-3VN~e~Fl|o5jfp<;z^E2V*95}Wb@7ih`G!symC^b5ike`wC6i8EcXN8k0L>&(*)X zrKSD0m*F@#-FDO_jGPNWBHq)P8|85V$mWF=oPFEG`gZI2lkS}CS4>q!1)lYWAsTFl zhoi9x`IdxDVXyn?CynO=oZ9i;o<_x7=k>|2b)8{3Qq^0E6B-%p1hu;~1B#=YIKx#0UfPV5sb z(KH=B!Jxe6q8lonyKrtlw|eAK%~N7ZmEAqdqC@wBwou8dvuZRCToHPtNDn zDUO3tO0k66SH6-ub(*GlzNkq1L*uH{cK9Sg?`7eJWKsWsAO<@2B$W4=nQqCf{?ukZ zfwSI?;fZ;F#MSIYBr2%!@af;7-sE_<5K{zC^=9SaAYWM^24K76HqhI~EkmvWf;=vd3;1w(A z#(M_i(T@`%mULXNS6=NZbrKPMM8q*b3}VIOO;cVZO>JdyiRNqAoqm}J9RZV9C37^B zh=b`>J;wzn7d;~-eV$4=FFOfaKKIgOTzJ`rF95p8VzJ2p|H&oVN=8IxZ@vLpyeEPe5QOsYP2Ys zu%6d6q51s=VoBTad&O;z!`GFU=oPV~t7o$6V?*Qjbcs%AvOJfk`L0q+a*dfwgZ+J% z+72%WnEZ;RsLXLNKNK`ib!@vdn{#i@X4D}A1_VN_?b%nDxhdqus+2{Om{E}VYTXIp z+2f|V3b&{BCJxrh13H-PKtQ|5bul2>o4djnF`s7U;-Ci~dn@4xH zCwjnMLcu|;$ zTW^rSCcn;`QWhOTKtA>5EvFMLK-Do*UMh!{C-piA?L@p5AWbn2ARu+f zjG|LEZBLl3UDjt(@Je0sE@-m9CbXl{Y3pp^s6E1Cyrmhm!^ztXhDJ?k8}l{7T8GuG z*)|8k3gcE+VMf5j>lqu>O5xC=BH!_uRBobe^SpRh`kY8{z_6JtL&IUYynCa^;MWKw zayE|BH1UvQtJl3=?SM?yxnI}#9YazqW6=>(;!DfnyLICGgeK#Op6s2WLK9NZc+28n zr`OgWh-i&K>m4dQ`{s8%rg3XJlj|a>zpm-D$H_Z;O`V(47UoGQHUNBcb)`SLcW?S* zP4aSGM}teEmKDt+#W&<~?#HGn@60)Tpy{RC%L~108k;lk>pCws1e%o;6}C+WJBq){ zwK->nEA?Y6N8R8x@SvUXW~zoe7g<)wn_GKc(I==aQ!|X;?8V zN1?epP4B+99d^dyvVI}a-I)=>gi~GZuW4(|-IUAm>B?baKbbmUND?dzLoALKXxFuG zNv%Y0mSnXl%)$VMk10`2yL&}{QFD(Tgmf#9QM0yXj}Cr&AxUz$J-2g|Hh9*64DKL#JFtMIh5EnAdQB~ zGUJ_sAjLa+T-n;^xY4i?OvQ313%0A3Fm_ZF`(A?~^4VB&GJu_(J;}aP77WAIoxPs+ z&h(g~h<|;1cP{VuYi0GF_Q?UCzNFCSbX$%JB(wI`iqZn%*b>`z#Z)AsjdiX3kwj3^ z@P5))pIR+Tb@}sS2dH>onm6x`Q*TDHu+oztXir}<+Y|Je6bzb3#?lPZ3Q6$Dy0L}J z*Q~W1jqKreCVRY;WSd^k8&Xm}nRRC_JbBowH=p;F8=6yi2<+TUMY7N5Gv9C=yQwfp z)t2Q(ZN|p7R2U=ri_qTs_^E=OG9~qG6(ydxa=B#QqZ{S9yedoN?R8a!L77NqT_G{t z)MchGVeR$!!`_~r)M)}vKcRb8jvZ!x{_W=60oVuV^75~m8-m98|u>=#OH$dk|C(%8AX-`uINo7xLI+tVizJ0&cP%|DqE zee2O1{LY~l^pd85^La)#XHh7ivo9cho3>^az|9R|mX6Sk^~W6_`tZ3&I@u@!u2cKE&y==nz1+;cb66hT$vMvD!udz z4$hu<1i98EMKTtM_+Gd3`DVZ7)ETB!dmH+878UqJENxP*Dep#x`X{)#+~ELh-SoWQ z8+SGZ1EeK0juQ!*!<~G70qyIISsd??Ux&jz2>X4bZI^pp=*@~Pc)WTUDYv|-BfGA; zUmf{#d$bH2lN!J5`#LhQ$c{wbW9HouyWZ` zg(S=HcwY_}S)U_ihgZs2al{{7vom#yX^MtK%>6hR(g(4XIavx)n(2941E*3p&;H4z z$SZ$d9CN~9F-7IG^FShRdOUx&ENg2;ku1vOtlGwp_N)f4qeHL1$XLFt>qMdK!7Q}K zaM*mqZ@Mv$E~AN*HMsu$Tfd!h#si|IJFWzyzowC=p*glS9P*0M!-xFmNE86B{N4L_ z;KIW}M3{2&1GG10AyDYyfLEtITl-|PF{xlkRWjH=E)lb@wk#}AE}Q$)dXz zdZi$$KhV56wh11MWVb_ltye_Ke9G`h^z2Crr(U|b-kTX%gO%}3;>`2f`*n2W$GR?! zv@Cao>tc)~6thG^YT|33$t$Z%wdLm?TK?SyXB=l;a>t+Ow>MnCM_uwbEnWE~5~xE( zL`O-P2g$4rAq0XR!mizEBmh+A*I>x3Q?dBLpCT~mY@Be;k!Waa##C3w;@G2n0RJx0V24;o`*9Ixqn{rD>l~X2e zkp2d99rI*vPt=+h4;Wb#hbP2Ei zVlh4b*5kbB-OuRLw?D%-AKuSIgrP@2Ao6`S1K_esKV5O}I6m!!&*jLG5v7?L7iV`N zEemPuinlRo_;CI%d>~CJH}FM^g^oX*fgwlThh59t!M(p@fZ-qVGZLYGAVus3a)7&E zWro1IouioLqvC*4UCsG@2g?8o78wLlphi#g(Fv0U?*CJYT07zlfPK0{I$3OW~niy&ZliL zVq7_|e{TaLm!$CQiO6Lv_WMZm_2dDNV#O5kXdq2+wl6P6POCsmLm&6Va)7JY3J0LE zA}H1XvJHvHG)-(zrmTlqu@)!`xGA#OA7@!q8C3_3w((aNX4pqvYT_MKJ7Dl@&p3T+;v&V3-+-Eg28p9S9rKQ8?o=oN>-9 z+;!%mJYdE>2$uwC@9GBV8ga6?6tL`lwTWXamtuHYA3 zIk(^NSg&<<-r^Ho2m54E_FIk!4H?s*x55$q@f&Zi`R2ljXY5P=D}O2U%3@JmVzP38 z&gL|!xZj1FXZR$=R*c86GuNQ1K;t=MBi@(4&X#p|W{V4aZb5k=_Z>C4@VjuKnQmzZ z-M{c$KIyt=DI6}rrk#zbD@vm=??>L105NY}niaK?FC?IF)q~4e#^O-z(gC4FbGTl_pW+_P6w#&B5eB%6m*x8VzoqH3)&~=r{DJEvo zo^nOdr>oKskjc5QELVX*j6!&VVNS-HM6x#{iGXl{&u#l4pb%-MQHMs6NM_k4Nd#g; zgRmXmC#<0q1^galG8TJGjcmsiBSw{}&%WyB;vkFe&a~8Z!IhOk)f2a1DW>FSFT2bU zDmrAINX7F)GbNW5=qjCeRdLj$bjBv7l+yfR=$xabww!cwHu=InRZyTSWm!xBDJD|N zQM_L+$LH0BAc0iQQc=G_yPLAo&N~zgd(_bW9<{#Sk^v8?u!j&TD&Sn+;-aXJYD$8< zYJDT+ESGiRP-}-R?YvE%fT=1AB`cvJnaV1#KrohltWEx?PYoLsRRM#zz9CLK z-oBP!TeA_DU-uSy4QAo>Py6_QZyyAkE8wA73*&K}eROd%a6pcLqiS@_u&u;#PmVVPAlCg}ChUD^6 zpGsx&D(sg+2!YO?l=2!HwR8u8q{gNF(<0uNl|l*>7J1yJWJsYWL2v|D4q(ySm6b$5 zFzR!MeRwT59(fx>2;s5-Gb?ElOkE?}a;Tvrp~4kIX;M`hi@t2p?C*N*(3FY?rJ%R+ zj{3^fq>)YMIT-b-;FrHqd#Buhu$S3~wCL_mOQ{Ktgr)2EnIsI2*>YJ2Q+=-^4MTFQ zkKOWO61`n%DJ7w>&~#l_Lp-m<;?H$=@~8xt)&@kpDDM|ka)b(nniU}BFK)v0Q{^}>8En|G(QI~7Kc9-z87#-X4S+O|1% za5kroupOSGT+}GF&nJikLDR$!^&n~8+mHWWX*)6SxO*^mNFkcL5=g}o zbjYD|@%GCvV(;RoVGcWkHRPyl*aUDOh;pyOfDx7W;O*70IyS>oHkPffZDbY=L)YRx z&^_mpfXvU%FQ=2O(KUxnkZT!`w? z$Ks(gCvm831SX#IFf}fF61$eZj;W_Sh;@rzfWQAF^nUXcKoaFMeg;^CRO4#Y&NvMX z11peQyauxWbXFTaB4L!EaQt92KYuoe6plReP85$Ci5H(+0#n*B!V%85E+cQrFw_-0 zh?X$kfASg3IsRPqA2|)r-0~%e76FX!{EBUCS`(f|s#v*IMMe3xUi| zc>Fr6?ytz#Zyv@E>W`CdpTnnB4uu3jp3^b#@FC=~7p?0)hP8JQ-~I62bk`lPqiFgG z7}l=Y!g?_osLAtB*fJcYoPMeNu=DuWnL6()eH4!jPz;;>d-Rp3y42TF&};%$qbpx%sw z!H1p;qi+L_Jo~4(>!SN%_tnFQRzUiT(f8#Y(2FNS44;o;kBzpaufWYE0MxR7!fbLf z?Vy|uSu+vorPrXSydRFa>L$FlC=b2yEnKnbGrnv5xuo1GR1K@c?nQS&^le1#*o)B} z(BQ~A*zMmzmucp{RTLU@FoKyda%u^hRxiPzk!ND`OecURP&IS{4!iJStbG4v z6jcrQ?;+9eal$pvV&|qjDr&ueiqo3rBx=ZqdaVa;$`q-VEM30&PUn@=9->08pFM$m zaMzaJvDpkJxh{`(9E?ydk6?mV3Z!)%Uy~3k0$#DCvP9c-)a>MV;E(s8 zheNJ=9pA0{7H!EO1{Vs%S)*jqMbvaqR;X~qq(V+)lFSsL1I7VW*!|B+;MZ+gU zdPAg0u&o42%`6lHX;&wgCAY1d@?#>HuK>X5b>%*1^yd(pCaJzUp8|FP5Q zyX~8xOARBd%FyuLSFGy>3||;EvyO%mIsgVKmsrUCo}T*-=h|wYU_wBG#ty9HNAAB3 z(+@g=7A;?ix*-*qFk>SB_797wqPiM#^Cv8}e1wt-^J$k}jgpWbhr~W%_w1KIsYdF{ z=x{;2Wp@*5J3mCTcO*45HDk>DGZA0>2OhcLm&m31KupBMOkgNx=#nUE0^P|RcYOW_ zbcIJyOHUuFWeRD{!?Chy6p;?k88?tZ9-TEQ;J7Z%IA9PiKJ7T1_}!a$XZYdh>NlTS zd-^bD)^vafgu?Ds><&MfT3rpQUxW`FHVA=el$Jd4dk7^luOxxbe{~Zd z@X1zkZ5Q75wcNe_J!BGnhz&fNcGP#E@bCp_K7KqOyDo;p-VW-`x#(6F7g;{IrVRqf z<$rN}yY{G&_-`fA?{UI)PvNDz&W1r%_}MM@P_{6PrSGmrM^d7^#6)k8C9~}*#)2|; zz`+sPbLb}A&v(JI`VZo@mz>CsUD*=OShZ;jm9|TT0TYRV)t;>v9bLD|V`S3}ogP|v z#vy#oV;|Gf^)Z;pp(qS&=muI_;|Rb;QN+eEQ;YD#Ys=8PaU+lX$!vW0;j{l}4*Vzn ziT@h5{7297FLBsiK(kAb#0U{-Qo`v> zv9BUPzdLt4@9D|mj`NQCN6+#%91IP9w5lduFp8vK48%*;wL-njSdfAlo}0w>?}0+<2EQRMgQ z=xIwpdcE)lJWz_6mBQ>s^; z|HMD>PyGKJ{|_^He^}ERxX}Or03~!qSaf7zbY(hYa%Ew3WdJfTGBhnPF)cDSR4_R@ zH846gIV&(QIxsLR$YGQK001R)MObuXVRU6WZEs|0W_bWIFfuePFflDMHdHV Date: Thu, 30 May 2024 16:42:51 +0200 Subject: [PATCH 17/78] folder Crossval --- Skill.R | 153 ----- full_ecvs_oper.R => full_ecvs_anomalies.R | 10 +- full_ecvs_mask.R | 435 ------------ .../CrossVal/Crossval_anomalies.R | 6 +- .../CrossVal/Crossval_skill.R | 19 +- modules/CrossVal/R/CRPS_clim.R | 35 + modules/CrossVal/R/RPS_clim.R | 39 ++ modules/CrossVal/R/tmp/Bias.R | 216 ++++++ modules/CrossVal/R/tmp/Corr.R | 484 +++++++++++++ modules/CrossVal/R/tmp/Eno.R | 103 +++ modules/CrossVal/R/tmp/GetProbs.R | 353 ++++++++++ modules/CrossVal/R/tmp/RPS.R | 408 +++++++++++ modules/CrossVal/R/tmp/RPSS.R | 638 ++++++++++++++++++ modules/CrossVal/R/tmp/RandomWalkTest.R | 184 +++++ modules/CrossVal/R/tmp/SprErr.R | 227 +++++++ modules/CrossVal/recipe_crossval_ecvs.yml | 183 +++++ recipe_tas_oper.yml => recipe_tas.yml | 5 +- 17 files changed, 2890 insertions(+), 608 deletions(-) delete mode 100644 Skill.R rename full_ecvs_oper.R => full_ecvs_anomalies.R (74%) delete mode 100644 full_ecvs_mask.R rename full_crossval_anomalies.R => modules/CrossVal/Crossval_anomalies.R (98%) rename skill_full_crossval.R => modules/CrossVal/Crossval_skill.R (88%) create mode 100644 modules/CrossVal/R/CRPS_clim.R create mode 100644 modules/CrossVal/R/RPS_clim.R create mode 100644 modules/CrossVal/R/tmp/Bias.R create mode 100644 modules/CrossVal/R/tmp/Corr.R create mode 100644 modules/CrossVal/R/tmp/Eno.R create mode 100644 modules/CrossVal/R/tmp/GetProbs.R create mode 100644 modules/CrossVal/R/tmp/RPS.R create mode 100644 modules/CrossVal/R/tmp/RPSS.R create mode 100644 modules/CrossVal/R/tmp/RandomWalkTest.R create mode 100644 modules/CrossVal/R/tmp/SprErr.R create mode 100644 modules/CrossVal/recipe_crossval_ecvs.yml rename recipe_tas_oper.yml => recipe_tas.yml (95%) diff --git a/Skill.R b/Skill.R deleted file mode 100644 index 2c1f1d6f..00000000 --- a/Skill.R +++ /dev/null @@ -1,153 +0,0 @@ -fair <- TRUE - -## START SKILL ASSESSMENT: -# RPS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") -cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, - ncores = recipe$Analysis$ncores) -cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = res$lims_cal_obs_tr, - ncores = recipe$Analysis$ncores) -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) -source("modules/Skill/R/RPS_clim.R") -rps_clim <- Apply(list(ano_obs_probs_ev), - target_dims = c('probs', 'syear'), - RPS_clim, bin_dim_abs = 'probs', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 -# RPSS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'probs', Fair = fair, nmemb = nmemb, - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, - sdate_dim = 'syear', memb_dim = 'ensemble') -RPS -crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) -# Este no sé como se calcula????: -# Aquí no se puede porque estaría incluyendo información de los otros años -#source("modules/Skill/R/CRPS_clim.R") -# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí -# el resultado es igual a dentro del bucle. -crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'sample.syear', - Fair = fair - ncores = recipe$Analysis$ncores) - - -# CRPSS -ref <- res$ano_obs_tr -dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, - nftime, nlats, nlons, sdate_dim) -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -# Corr -source("modules/Skill/R/tmp/Corr.R") -enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) - -mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), - target_dims = list(c('syear', 'ensemble'), 'syear'), - fun = function(x,y) { - if (!(any(is.na(x)) || any(is.na(y)))) { - res <- t.test(x = y, - y = apply(x, 1, mean, na.rm = T), - alternative = "two.sided")$p.value - } else { - res <- NA - } - return(res)}, - ncores = sdate_dim)$output1 -mean_bias_sign <- mean_bias_sign <= 0.05 - -# Spread error ratio -source("SprErr.R") -enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) -enssprerr_sign <- enssprerr$p.val -enssprerr_sign <- enssprerr_sign <= 0.05 -enssprerr <- enssprerr$ratio - -# RMSE -rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - -skill_metrics <- list(mean_bias = mean_bias, - mean_bias_significance = mean_bias_sign, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - -skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) -# Compute save metrics -source("modules/Saving/Saving.R") -#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - -recipe$Run$output_dir <- original - diff --git a/full_ecvs_oper.R b/full_ecvs_anomalies.R similarity index 74% rename from full_ecvs_oper.R rename to full_ecvs_anomalies.R index 8a08fef0..44c7e57d 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_anomalies.R @@ -5,7 +5,7 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_tas_oper.yml" +#recipe_file <- "recipe_tas.yml" recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) # Load datasets @@ -15,11 +15,11 @@ data_summary(data$hcst, recipe) data_summary(data$obs, recipe) -source("full_crossval_anomalies.R") -res <- full_crossval_anomalies(recipe = recipe, data = data) +source("modules/Crossval/Crossval_anomalies.R") +res <- Crossval_anomalies(recipe = recipe, data = data) -source("skill_full_crossval.R") -skill_metrics <- skill_full_crossval(recipe = recipe, data_crossval = res, +source("modules/Crossval/Crossval_skill.R") +skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) diff --git a/full_ecvs_mask.R b/full_ecvs_mask.R deleted file mode 100644 index 45d7bb50..00000000 --- a/full_ecvs_mask.R +++ /dev/null @@ -1,435 +0,0 @@ - -source("modules/Loading/Loading.R") -source("modules/Saving/Saving.R") -source("modules/Units/Units.R") -source("modules/Visualization/Visualization.R") -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -#recipe_file <- "recipe_tas_oper.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) -# Load datasets -data <- Loading(recipe) -data <- Units(recipe, data) -data_summary(data$hcst, recipe) -data_summary(data$obs, recipe) - -#file <- "/esarchive/scratch/nperez/NUTS3_ERA5Land-62_56E-22_81N.nc" -#file <- "/esarchive/scratch/nperez/git4/sunset/NUTS3_ERA5_-62_56E-22_81N.nc" -#mask <- nc_open(file) -#lat <- ncatt_get(mask, 'lat') -#lon <- ncatt_get(mask, 'lon') -#mask <- ncget_var(mask) - -dev.new() -source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") -bias <- Bias(exp = data$hcst$data, obs = data$obs$data, - memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) -PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "singnif") -bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, - memb_dim = 'ensemble', time_dim = 'syear') -dev.new() -PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "original") - - - -dev.new() -source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") -bias <- Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, - memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) -PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "singnif") -bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, - memb_dim = 'ensemble', time_dim = 'syear') -dev.new() -PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "original") - - - -source("/esarchive/scratch/nperez/git/Flor/NUTS3.R") -mask <- test -mask_lat <- lats -mask_lon <-lons - -# Region to subset -range(data$hcst$coords$longitude) -range(data$hcst$coords$latitude) - -mask <- SelBox(mask, lon = mask_lon, lat = mask_lat, - region = c(-20, 40, 20, 80)) -mask_lat <- mask$lat -mask_lon <- mask$lon -mask <- mask$data -all(data$hcst$coord$latitude == mask_lat) -all(data$hcst$coord$longitude == mask_lon) - -polys <- array(unique(as.vector(mask))[-1], - c(poly = length(unique(as.vector(mask))[-1]))) -## missing polys: -missing <- which(!(1:length(ids) %in% polys)) -tas_polys <- Apply(list(data$hcst$data, mask, polys), - target_dims = list(c('latitude', 'longitude'), - c('lat', 'lon'), NULL), - function(x, y, pol) { - mean(x[which(y == pol)], na.rm = TRUE)}, - ncores = 6)$output1 -data$hcst$data <- tas_polys -data$hcst$coord <- data$hcst$coord[-c(7,8)] -data$hcst$coord <- append(data$hcst$coord, list(polygons = polys)) - -polys_obs <- Apply(list(data$obs$data, mask, polys), - target_dims = list(c('latitude', 'longitude'), - c('lat', 'lon'), NULL), - function(x, y, pol) { - mean(x[which(y == pol)], na.rm = TRUE)}, - ncores = 4)$output1 -data$obs$data <- polys_obs -data$obs$coord <- data$obs$coord[-c(7,8)] -data$obs$coord <- append(data$obs$coord, list(polygons = polys)) - - -polys_fcst <- Apply(list(data$fcst$data, mask, polys), - target_dims = list(c('latitude', 'longitude'), - c('lat', 'lon'), NULL), - function(x, y, pol) { - mean(x[which(y == pol)], na.rm = TRUE)}, - ncores = 4)$output1 -data$fcst$data <- polys_fcst -data$fcst$coord <- data$fcst$coord[-c(7,8)] -data$fcst$coord <- append(data$fcst$coord, list(polygons = polys)) - - testp <- Subset(MeanDims(data$fcst$data, 'ensemble'), along = 'time', - indices = 1, drop = 'all') -data_pol <- data.frame(data = testp, NUTS_ID = ids[-missing]) -data_pol <- rbind(data_pol, data.frame(data =rep(0, length(missing)), NUTS_ID = ids[missing])) -shp <- rgdal::readOGR(shp_file) -shp <- subset(shp, LEVL_CODE == 2) - shp_data <- merge(shp, data_pol, by.x="NUTS_ID", by.y="NUTS_ID") - shp_data@data$id <- rownames(shp_data@data) - df.points <- fortify(shp_data, region = "id") - df.df <- plyr::join(df.points, shp_data@data, by = "id") - -gusa <- map_data("world") -plot_poly <- ggplot(data = df.df) + - geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + - coord_map(projection = "stereographic", - xlim = c(-15, 40), - ylim = c(35, 75)) + - geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-16, 22, 2)), - group = group), color = "grey", size = 0.01) + - geom_polygon(aes(x = long, y = lat, group = group), fill = NA, - color = "grey", size = 0.01, data = shp) + - theme_bw() + - scale_fill_manual(values = c(rev(brewer.pal(9, 'Blues')), - brewer.pal(9, 'Reds'), "black"), - drop = FALSE, name = 'tas(ºC)') + - theme(panel.background = element_rect(fill = 'azure'), - text = element_text(family = "Times")) + # Colour of ocean - xlab('Longitude') + ylab('Latitude') + - scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + - scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + - ggtitle("Forecast test") - -#reg <- SelBox(data$hcst$data, lon = as.vector(data$hcst$coords$longitude), -# lat = as.vector(data$hcst$coords$latitude), -# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), -# latdim = 'latitude', londim = 'longitude') -# -#data$hcst$data <- reg$data -#data$hcst$coords$longitude <- reg$lon -#data$hcst$coords$latitude <- reg$lat - -#reg <- SelBox(data$obs$data, lon = as.vector(data$obs$coords$longitude), -# lat = as.vector(data$obs$coords$latitude), -# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), -# latdim = 'latitude', londim = 'longitude') - -#data$obs$data <- reg$data -#data$obs$coords$longitude <- reg$lon -#data$obs$coords$latitude <- reg$lat - -# UKMO January 1993 is missing: -if (recipe$Analysis$Time$sdate == '0101') { - if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { - if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { - info(recipe$Run$logger, - "UKMO January 1993 not available") - ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end - ind <- (1:length(ind))[-which(ind == 1993)] - data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) - data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) - sdate_dim <- dim(data$hcst$data)['syear'] - } - } -} - -calibraion_comp <- TRUE -anomalies_comp <- FALSE -source("crossval.R") - -## Define FAIR option: -fair <- TRUE - -## START SKILL ASSESSMENT: -# RPS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") -cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, - ncores = recipe$Analysis$ncores) -cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = res$lims_cal_obs_tr, - ncores = recipe$Analysis$ncores) -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) -source("modules/Skill/R/RPS_clim.R") -rps_clim <- Apply(list(ano_obs_probs_ev), - target_dims = c('probs', 'syear'), - RPS_clim, bin_dim_abs = 'probs', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 -# RPSS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'probs', Fair = fair, nmemb = nmemb, - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, - sdate_dim = 'syear', memb_dim = 'ensemble') - - -shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" -shp <- sf::st_read(shp_file) # class sf -ids <- subset(shp, shp$LEVL_CODE == 3) - - -###### -library(sf) -library(ggplot2) -#library(ggplot2, lib.loc = .libPaths()[2]) -library(RColorBrewer) -library(rgeos) - -gusa <- map_data("world") - -pals <- brewer.pal(9, 'Reds')[3:9] -pals <- c("#ffffb3",pals,'#525252') - -brks <- 0:5 -brks <- seq(258, 274, 2) - -plot_poly <- list() -polys_data <- NULL - - -## Now fill each polygon with the mean value - data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), - LAU_ID = polys) - shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") - -# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame - shp_data@data$id <- rownames(shp_data@data) - df.points <- fortify(shp_data, region = "id") - df.df <- plyr::join(df.points, shp_data@data, by = "id") -ano <- df.df -ano$data <- ano$data - fmi_clim_m$data - -ggplot(data = ano) + #df.df) + - geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + - coord_map(projection = "stereographic", - xlim = c(lon_min, lon_max), - ylim = c(lat_min, lat_max)) + - # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), - geom_polygon(aes(x = long, y = lat, fill = cut(data, - breaks = seq(0, 5, 0.5)), - group = group), color = "grey",size = 0.08) + - geom_polygon(aes(x = long, y = lat, group = group), fill = NA, - color = "grey", size = 0.5, data = shp_herding) + - theme_bw() + - scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), - 'white', brewer.pal(9, 'Reds'), "black"), - drop = FALSE, name = 'tas(ºC)') + - theme(panel.background = element_rect(fill = 'azure'), - text = element_text(family = "Times")) + # Colour of ocean - xlab('Longitude') + ylab('Latitude') + - scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + - scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + - ggtitle(paste(month.name[mes], year))# + - -###### -#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, -# lat = data$hcst$coords$latitude, filled.c = F, -# fileout = "test.png") -# CRPS -crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) -# Este no sé como se calcula????: -# Aquí no se puede porque estaría incluyendo información de los otros años -#source("modules/Skill/R/CRPS_clim.R") -# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí -# el resultado es igual a dentro del bucle. -crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'sample.syear', - Fair = fair - ncores = recipe$Analysis$ncores) - - -# CRPSS -ref <- res$ano_obs_tr -dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, - nftime, nlats, nlons, sdate_dim) -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - - -# Corr -source("modules/Skill/R/tmp/Corr.R") -enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) - -mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), - target_dims = list(c('syear', 'ensemble'), 'syear'), - fun = function(x,y) { - if (!(any(is.na(x)) || any(is.na(y)))) { - res <- t.test(x = y, - y = apply(x, 1, mean, na.rm = T), - alternative = "two.sided")$p.value - } else { - res <- NA - } - return(res)}, - ncores = sdate_dim)$output1 -mean_bias_sign <- mean_bias_sign <= 0.05 -#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, -# lon = data$hcst$coords$longitude, -# dots = mean_bias_sign[1,1,1,1,1,,,1]) - -# Spread error ratio -source("SprErr.R") -enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) -enssprerr_sign <- enssprerr$p.val -enssprerr_sign <- enssprerr_sign <= 0.05 -enssprerr <- enssprerr$ratio - -# RMSE -rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - -#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, -# drop = "selected") - -#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', -# fcst = res$ano_hcst_ev, -# obs = obs_noensdim, -# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), -# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), -# na.rm = FALSE, -# ncpus = recipe$Analysis$ncores) -if (any(is.na(rpss$sing))) { - info(recipe$Run$logger, - "RPSS NA") - - rpss$sing[is.na(rpss$sign)] <- FALSE -} -skill_metrics <- list(mean_bias = mean_bias, - mean_bias_significance = mean_bias_sign, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - -skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) -# Compute save metrics -source("modules/Saving/Saving.R") -#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - -recipe$Run$output_dir <- original - -source("modules/Visualization/Visualization.R") -if (data$hcst$coords$longitude[1] != 0) { - skill_metrics <- lapply(skill_metrics, function(x) { - Subset(x, along = 'longitude', indices = c(182:360, 1:181)) - }) -} - info(recipe$Run$logger, - paste("lons:", data$hcst$coords$longitude)) - info(recipe$Run$logger, - paste("lons:", data$obs$coords$longitude)) - - -data$hcst$coords$longitude <- -179:180 - -Visualization(recipe, data, skill_metrics, significance = TRUE) - -source("tools/add_logo.R") -add_logo(recipe, "rsz_rsz_bsc_logo.png") - - diff --git a/full_crossval_anomalies.R b/modules/CrossVal/Crossval_anomalies.R similarity index 98% rename from full_crossval_anomalies.R rename to modules/CrossVal/Crossval_anomalies.R index f65d86a7..b097faa6 100644 --- a/full_crossval_anomalies.R +++ b/modules/CrossVal/Crossval_anomalies.R @@ -1,8 +1,8 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +source("modules/Crossval/R/tmp/GetProbs.R") -full_crossval_anomalies <- function(recipe, data) { +Crossval_anomalies <- function(recipe, data) { cross.method <- recipe$Analysis$cross.method # TODO move check if (is.null(cross.method)) { @@ -14,8 +14,6 @@ full_crossval_anomalies <- function(recipe, data) { round(eval(parse(text = y)),2)})}) ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs - ## TODO remove if the recipe is checked: - na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] orig_dims <- names(dim(data$hcst$data)) diff --git a/skill_full_crossval.R b/modules/CrossVal/Crossval_skill.R similarity index 88% rename from skill_full_crossval.R rename to modules/CrossVal/Crossval_skill.R index 053192a8..c142b94f 100644 --- a/skill_full_crossval.R +++ b/modules/CrossVal/Crossval_skill.R @@ -1,13 +1,14 @@ source("modules/Saving/Saving.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/RPS_clim.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/Corr.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-sigBias/R/Bias.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/SprErr.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/Eno.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Corssval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") ## data_crossval is the result from function full_crossval_anomalies or similar. ## this is a list with the required elements: @@ -32,7 +33,7 @@ source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/Eno.R ## the recipe could be used to read the Percentiles ## if fair is TRUE, the nmemb used to compute the probabilities is needed ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -skill_full_crossval <- function(recipe, data_crossval, +Crossval_skill <- function(recipe, data_crossval, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { ## START SKILL ASSESSMENT: # RPS diff --git a/modules/CrossVal/R/CRPS_clim.R b/modules/CrossVal/R/CRPS_clim.R new file mode 100644 index 00000000..0e6bef65 --- /dev/null +++ b/modules/CrossVal/R/CRPS_clim.R @@ -0,0 +1,35 @@ +# CRPS version for climatology +CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross.val= TRUE){ + time_dim <- names(dim(obs)) + obs_time_len <- dim(obs)[time_dim] + + if (isFALSE(clim.cross.val)) { + # Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), + dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, + dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb] + # obs: [sdate] + crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = NULL, + Fair = FALSE) + + # crps_ref should be [sdate] + if (return_mean == TRUE) { + return(mean(crps_ref)) + } else { + return(crps_ref) + } +} + diff --git a/modules/CrossVal/R/RPS_clim.R b/modules/CrossVal/R/RPS_clim.R new file mode 100644 index 00000000..6deab3ec --- /dev/null +++ b/modules/CrossVal/R/RPS_clim.R @@ -0,0 +1,39 @@ +# RPS version for climatology +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), cross.val = TRUE, + Fair = FALSE, bin_dim_abs = NULL, return_mean = TRUE) { + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:length(obs) + } + if (is.null(bin_dim_abs)) { + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + } else { + obs_probs <- obs + } + # clim_probs: [bin, sdate] + clim_probs <- c(prob_thresholds[1], + diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + ## See explanation in https://freva.met.fu-berlin.de/about/problems/ + R <- dim(obs)[2] #years + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref <- rps_ref + adjustment + } + + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } +} diff --git a/modules/CrossVal/R/tmp/Bias.R b/modules/CrossVal/R/tmp/Bias.R new file mode 100644 index 00000000..b9292cae --- /dev/null +++ b/modules/CrossVal/R/tmp/Bias.R @@ -0,0 +1,216 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## alpha + if (!is.null(alpha)) { + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL) { + # exp and obs: [sdate, (dat)] + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + if (!is.null(alpha)) { + if (!absolute) { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sig <- pval <= alpha + } + } + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sig <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i,j] <- t.test(x = obs[,j], y = exp[,i], + alternative = "two.sided")$p.value + sig[i,j] <- pval <= alpha + } + } + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sig = sig)) + } else { + return(bias) + } +} diff --git a/modules/CrossVal/R/tmp/Corr.R b/modules/CrossVal/R/tmp/Corr.R new file mode 100644 index 00000000..744ff109 --- /dev/null +++ b/modules/CrossVal/R/tmp/Corr.R @@ -0,0 +1,484 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', dat_dim = 'dataset', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, +#' dat_dim = 'dataset') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(memb_dim)) { + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR[, ] <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { +# if (is.null(dat_dim) & !is.null(memb_dim)) { + + if (length(dim(CORR)) == 2) { + dim(CORR) <- NULL + if (pval) { + dim(p.val) <- NULL + } + if (conf) { + dim(conflow) <- NULL + dim(confhigh) <- NULL + } + if (sign) { + dim(signif) <- NULL + } + } else { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + if (sign) { + dim(signif) <- dim(signif)[3:length(dim(signif))] + } + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} diff --git a/modules/CrossVal/R/tmp/Eno.R b/modules/CrossVal/R/tmp/Eno.R new file mode 100644 index 00000000..cb927602 --- /dev/null +++ b/modules/CrossVal/R/tmp/Eno.R @@ -0,0 +1,103 @@ +#'Compute effective sample size with classical method +#' +#'Compute the number of effective samples along one dimension of an array. This +#'effective number of independent observations can be used in +#'statistical/inference tests.\cr +#'The calculation is based on eno function from Caio Coelho from rclim.txt. +#' +#'@param data A numeric array with named dimensions. +#'@param time_dim A function indicating the dimension along which to compute +#' the effective sample size. The default value is 'sdate'. +#'@param na.action A function. It can be na.pass (missing values are allowed) +#' or na.fail (no missing values are allowed). See details in stats::acf(). +#' The default value is na.pass. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return An array with the same dimension as parameter 'data' except the +#' time_dim dimension, which is removed after the computation. The array +#' indicates the number of effective sample along time_dim. +#' +#'@examples +#'set.seed(1) +#'data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, +#' ftime = 4, lat = 10, lon = 10)) +#'na <- floor(runif(40, min = 1, max = 800)) +#'data[na] <- NA +#'res <- Eno(data) +#' +#'@importFrom stats acf na.pass na.fail +#'@import multiApply +#'@export +Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## na.action + if (as.character(substitute(na.action)) != "na.pass" & + as.character(substitute(na.action)) != "na.fail") { + stop("Parameter 'na.action' must be a function either na.pass or na.fail.") + } + if (as.character(substitute(na.action)) == "na.fail" && anyNA(data)) { + stop("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Eno + + eno <- Apply(data = list(data), + target_dims = time_dim, + output_dims = NULL, + fun = .Eno, + na.action = na.action, + ncores = ncores)$output1 + + return(eno) +} + +.Eno <- function(x, na.action) { + n <- length(sort(x)) + if (n > 1) { + a <- acf(x, lag.max = n - 1, plot = FALSE, + na.action = na.action)$acf[2:n, 1, 1] + s <- 0 + for (k in 1:(n - 1)) { + s <- s + (((n - k) / n) * a[k]) + } + eno <- min(n / (1 + (2 * s)), n) + } else { + eno <- NA + } + + return(eno) +} + diff --git a/modules/CrossVal/R/tmp/GetProbs.R b/modules/CrossVal/R/tmp/GetProbs.R new file mode 100644 index 00000000..2a538892 --- /dev/null +++ b/modules/CrossVal/R/tmp/GetProbs.R @@ -0,0 +1,353 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- seq_len(dim(data)[time_dim]) + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (!all(dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + !all(names(dim(weights)) %in% namesdim_weights)) { + stop("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".") + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.") + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | +# any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of ", +# "'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in seq_len(dim(data)[1])) { + if (is.null(weights)) { + tmp <- which(indices_for_quantiles != i_time) + quantiles[, i_time] <- + quantile(x = as.vector(data[indices_for_quantiles[tmp], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + tmp <- which(indices_for_quantiles != i_time) + sorted_arrays <- + .sorted_distributions(data[indices_for_quantiles[tmp], ], + weights[indices_for_quantiles[tmp], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, + prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, lendim = dim(data)[1], + posdim = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in seq_len(dim(data)[1])) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in seq_len(dim(quantiles)[1])) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - + append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop("Probability in i_time = ", i_time, " is out of [0, 1].") + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / + cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list(data = data_vector[sorter], cumulative_weights = cumulative_weights)) +} + diff --git a/modules/CrossVal/R/tmp/RPS.R b/modules/CrossVal/R/tmp/RPS.R new file mode 100644 index 00000000..0ed599ac --- /dev/null +++ b/modules/CrossVal/R/tmp/RPS.R @@ -0,0 +1,408 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'and 1.\cr +#'The function first calculates the probabilities for forecasts and observations, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. The fraction of acceptable +#'NAs can be adjusted. +#' +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilities, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPS (the +#' potential RPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param nmemb A numeric value indicating the number of members used to compute the probabilities. This parameter is necessary when calculating FairRPS from probabilities. Default is NULL. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'# Use synthetic data +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + if (Fair) { + if (!is.null(cat_dim)) { + if (cat_dim %in% names(dim(exp))) { + if (is.null(nmemb)) { + stop("Parameter 'nmemb' necessary to compute Fair", + "score from probabilities") + } + } + } + } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights) & is.null(cat_dim)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + if (is.null(dat_dim)) { + if (length(dim(weights)) != 2 | !all(names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim] | + dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + + } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + fun = .RPS, + dat_dim = dat_dim, time_dim = time_dim, + memb_dim = memb_dim, cat_dim = cat_dim, + prob_thresholds = prob_thresholds, nmemb = nmemb, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, cross.val = cross.val, + na.rm = na.rm, ncores = ncores)$output1 + + if (return_mean) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + + return(rps) +} + + +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, na.rm = FALSE) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[, , i] + obs_data <- obs[, , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + + # Find the fraction of NAs + ## If any member/bin is NA at this time step, it is not good value. + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (f_NAs <= sum(good_values) / length(obs_mean)) { + + exp_data <- exp_data[good_values, , drop = F] + obs_data <- obs_data[good_values, , drop = F] + + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[which(good_values), , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights #NULL + } + + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, + cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + if (all(names(dim(exp_data)) == c(time_dim, memb_dim)) || + all(names(dim(exp_data)) == c(time_dim, cat_dim))) { + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } + } + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + + # rps: [sdate, nexp, nobs] + rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + } + } else { + R <- nmemb + } + warning("Applying fair correction.") + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- colSums(adjustment) + rps[, i, j] <- rps[, i, j] + adjustment + } + + } else { ## not enough values different from NA + + rps[, i, j] <- NA_real_ + + } + + } + } + + if (is.null(dat_dim)) { + dim(rps) <- dim(exp)[time_dim] + } + + return(rps) +} + diff --git a/modules/CrossVal/R/tmp/RPSS.R b/modules/CrossVal/R/tmp/RPSS.R new file mode 100644 index 00000000..fc9931ad --- /dev/null +++ b/modules/CrossVal/R/tmp/RPSS.R @@ -0,0 +1,638 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill.\cr +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as +#'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained +#'based on a Random Walk test at the specified confidence level (DelSole and +#'Tippett, 2016).\cr +#'The function accepts either the ensemble members or the probabilities of +#'each data as inputs. If there is more than one dataset, RPSS will be +#'computed for each pair of exp and obs data. The NA ratio of data will be +#'examined before the calculation. If the ratio is higher than the threshold +#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are +#'counted by per-pair method, which means that only the time steps that all the +#'datasets have values count as non-NA values. +#' +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of either the reference forecast with at +#' least time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probabilities can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiment, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. If the data are probabilities, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights_exp A named numerical array of the forecast ensemble weights +#' for probability calculation. The dimension should include 'memb_dim', +#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +#' lengths must be equal to 'exp' dimension lengths. The default value is NULL, +#' which means no weighting is applied. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. +#'@param weights_ref Same as 'weights_exp' but for the reference forecast. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistics categories in cross-validation. The default value is +#' FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details. The default is 'two.sided.approx', which is +#' the default of \code{RandomWalkTest()}. +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array of RPSS with dimensions c(nexp, nobs, the rest dimensions +#' of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#' experiment (i.e., dat_dim in exp), and nobs is the number of observation +#' i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as $rpss. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'set.seed(1) +#'exp <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'set.seed(2) +#'obs <- array(rnorm(300), dim = c(lat = 3, lon = 2, sdate = 50)) +#'set.seed(3) +#'ref <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'weights <- sapply(1:dim(exp)['sdate'], function(i) { +#' n <- abs(rnorm(10)) +#' n/sum(n) +#' }) +#'dim(weights) <- c(member = 10, sdate = 50) +#'# Use data as input +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +#'res <- RPSS(exp = exp, obs = obs, alpha = 0.01, sig_method.type = 'two.sided') +#' +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, +#' cat_dim = 'bin') +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = FALSE, + sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights_exp + if (!is.null(weights_exp) & is.null(cat_dim)) { + if (!is.array(weights_exp) | !is.numeric(weights_exp)) + stop("Parameter 'weights_exp' must be a named numeric array.") + + if (is.null(dat_dim)) { + if (length(dim(weights_exp)) != 2 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_exp' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_exp)) != 3 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_exp' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim] | + dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_exp) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' is probability already, so parameter ", + "'weights_exp' is not used. Change 'weights_exp' to NULL.")) + weights_exp <- NULL + } + ## weights_ref + if (!is.null(weights_ref) & is.null(cat_dim)) { + if (!is.array(weights_ref) | !is.numeric(weights_ref)) + stop("Parameter 'weights_ref' must be a named numeric array.") + + if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { + if (length(dim(weights_ref)) != 2 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_ref' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_ref)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_ref)) != 3 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_ref' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] | + dim(weights_ref)[time_dim] != dim(ref)[time_dim] | + dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_ref) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'ref' is probability already, so parameter ", + "'weights_ref' is not used. Change 'weights_ref' to NULL.")) + weights_ref <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## sig_method.type + #NOTE: These are the types of RandomWalkTest() + if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") + } + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPSS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs, + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + + return(output) + +} + +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # Calculate RPS + + if (!is.null(ref)) { + + # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref + ## Insert memb_dim in obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + ## Insert dat_dim + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), dat = nobs) + dim(exp) <- c(dim(exp), dat = nexp) + if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp) + } + if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) { + nref <- 1 + dim(ref) <- c(dim(ref), dat = nref) + if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref) + } else { + nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp + } + + # Find good values then calculate RPS + rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + for (k in 1:nref) { + if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps + next + } + exp_data <- exp[, , i, drop = F] + obs_data <- obs[, , j, drop = F] + ref_data <- ref[, , k, drop = F] + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + ref_mean <- rowMeans(ref_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean) + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + if (f_NAs <= sum(good_values) / length(good_values)) { + rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb, + weights = weights_exp[good_values, , i], + cross.val = cross.val, na.rm = na.rm) + rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb_ref, + weights = weights_ref[good_values, , k], + na.rm = na.rm, cross.val = cross.val) + } + } + } + } + + } else { # ref is NULL + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, weights = weights_exp, + cross.val = cross.val, na.rm = na.rm) + + # RPS of the reference forecast + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), nobs = nobs) + dim(exp) <- c(dim(exp), nexp = nexp) + dim(rps_exp) <- dim(rps_ref) + } + + for (i in 1:nexp) { + for (j in 1:nobs) { + # Use good values only + good_values <- !is.na(rps_exp[, i, j]) + if (f_NAs <= sum(good_values) / length(good_values)) { + obs_data <- obs[good_values, , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) + + if (is.null(cat_dim)) { # calculate probs + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + obs_probs <- .GetProbs(data = obs_data, + indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, + weights = NULL, cross.val = cross.val) + } else { + obs_probs <- t(obs_data) + } + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) + } + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(obs)[1] #number of years + } + } else { + R <- nmemb_ref + } + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref[, i, j] <- rps_ref[, i, j] + adjustment + } + } + } + } + + if (is.null(dat_dim)) { + dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim] + } + +#---------------------------------------------- + # Calculate RPSS + + if (!is.null(dat_dim)) { + # rps_exp and rps_ref: [sdate, nexp, nobs] + rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE) + rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE) + rpss <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (!all(is.na(rps_exp_mean))) { + for (i in 1:nexp) { + for (j in 1:nobs) { + rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] + ind_nonNA <- !is.na(rps_exp[, i, j]) + if (!any(ind_nonNA)) { + sign[i, j] <- NA + } else { + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], + skill_B = rps_ref[ind_nonNA, i, j], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + } + } + + # Turn NaN into NA + if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA + + } else { # dat_dim is NULL + + ind_nonNA <- !is.na(rps_exp) + if (!any(ind_nonNA)) { + rpss <- NA + sign <- NA + } else { + # rps_exp and rps_ref: [sdate] + rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], + skill_B = rps_ref[ind_nonNA], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + + return(list(rpss = rpss, sign = sign)) +} diff --git a/modules/CrossVal/R/tmp/RandomWalkTest.R b/modules/CrossVal/R/tmp/RandomWalkTest.R new file mode 100644 index 00000000..16d89f6d --- /dev/null +++ b/modules/CrossVal/R/tmp/RandomWalkTest.R @@ -0,0 +1,184 @@ +#'Random Walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common observational reference) based on Random Walks (DelSole and Tippett, +#'2016). +#' +#'@param skill_A A numerical array of the time series of the scores obtained +#' with the forecaster A. +#'@param skill_B A numerical array of the time series of the scores obtained +#' with the forecaster B. The dimensions should be identical as parameter +#' 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param test.type A character string indicating the type of significance test. +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster +#' B are significantly different in terms of skill with a two-sided test using +#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +#' whether forecaster A and forecaster B are significantly different in terms +#' of skill with an exact two-sided test), "greater" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for negatively oriented scores), or "less" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for positively oriented scores). The default value is +#' "two.sided.approx". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test (output "sign"). The default value is 0.05. +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test based on 'alpha'. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill negatively oriented, i.e., the lower the +#' better). If $score is positive, forecaster A has been better more times +#' than forecaster B. If $score is negative, forecaster B has been better more +#' times than forecaster A. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance with the same dimensions +#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim". Returned only if "pval" is TRUE. +#'} +#' +#'@details +#' Null and alternative hypothesis for "two-sided" test (regardless of the +#' orientation of the scores):\cr +#' H0: forecaster A and forecaster B are not different in terms of skill\cr +#' H1: forecaster A and forecaster B are different in terms of skill +#' +#' Null and alternative hypothesis for one-sided "greater" (for negatively +#' oriented scores, i.e., the lower the better) and "less" (for positively +#' oriented scores, i.e., the higher the better) tests:\cr +#' H0: forecaster A is not better than forecaster B\cr +#' H1: forecaster A is better than forecaster B +#' +#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while +#' the ROC score is a positively oriented score. +#' +#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +#' level: significant if the difference between the number of times that +#' forecaster A has been better than forecaster B and forecaster B has been +#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). +#' +#'@references +#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' scores_A <- abs(fcst_A - reference) +#' scores_B <- abs(fcst_B - reference) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) +#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', + test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, + sign = FALSE, ncores = NULL) { + + # Check inputs + ## skill_A and skill_B + if (is.null(skill_A) | is.null(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if (!is.numeric(skill_A) | !is.numeric(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A), dim(skill_B))) { + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## test.type + if (!test.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + } + if (test.type == 'two.sided.approx') { + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + if (pval) { + .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.") + } + sign <- TRUE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', + alpha = 0.05, pval = TRUE, sign = FALSE) { + #skill_A and skill_B: [sdate] + + N.eff <- length(skill_A) + + A_better <- sum(skill_B > skill_A) + B_better <- sum(skill_B < skill_A) + + output <- NULL + output$score <- A_better - B_better + + if (test.type == 'two.sided.approx') { + output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + + } else { + + if (!is.na(output$score)) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + alternative = test.type)$p.value + + } else { + p.val <- NA + } + + if (pval) { + output$p.val <- p.val + } + if (sign) { + output$sign <- !is.na(p.val) & p.val <= alpha + } + + } + + return(output) +} diff --git a/modules/CrossVal/R/tmp/SprErr.R b/modules/CrossVal/R/tmp/SprErr.R new file mode 100644 index 00000000..33642eab --- /dev/null +++ b/modules/CrossVal/R/tmp/SprErr.R @@ -0,0 +1,227 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} + diff --git a/modules/CrossVal/recipe_crossval_ecvs.yml b/modules/CrossVal/recipe_crossval_ecvs.yml new file mode 100644 index 00000000..3ec15379 --- /dev/null +++ b/modules/CrossVal/recipe_crossval_ecvs.yml @@ -0,0 +1,183 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipe_tas_oper.yml b/recipe_tas.yml similarity index 95% rename from recipe_tas_oper.yml rename to recipe_tas.yml index 34f82e63..e9f7e028 100644 --- a/recipe_tas_oper.yml +++ b/recipe_tas.yml @@ -47,7 +47,7 @@ Analysis: save: 'all' cross_validation: yes Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. save: all Indicators: index: no @@ -56,6 +56,7 @@ Analysis: multi_panel: no dots: both projection: Robinson + file_format: 'PNG' #projection: robinson Scorecards: execute: no # yes/no @@ -73,7 +74,7 @@ Analysis: col2_width: NULL calculate_diff: FALSE ncores: 4 # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: scorecards logo: yes Run: -- GitLab From a3a49538bf509007d92cb5532f4d5c902988f006 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 31 May 2024 12:32:57 +0200 Subject: [PATCH 18/78] consistent names and rmss --- modules/CrossVal/Crossval_skill.R | 149 -------------- .../Crossval_anomalies.R | 0 modules/Crossval/Crossval_skill.R | 171 ++++++++++++++++ modules/{CrossVal => Crossval}/R/CRPS_clim.R | 0 modules/{CrossVal => Crossval}/R/RPS_clim.R | 0 modules/{CrossVal => Crossval}/R/tmp/Bias.R | 0 modules/{CrossVal => Crossval}/R/tmp/Corr.R | 0 modules/{CrossVal => Crossval}/R/tmp/Eno.R | 0 .../{CrossVal => Crossval}/R/tmp/GetProbs.R | 0 modules/{CrossVal => Crossval}/R/tmp/RPS.R | 0 modules/{CrossVal => Crossval}/R/tmp/RPSS.R | 0 .../R/tmp/RandomWalkTest.R | 0 modules/{CrossVal => Crossval}/R/tmp/SprErr.R | 0 .../recipe_crossval_ecvs.yml | 5 +- .../Crossval/recipe_crossval_ecvs_global.yml | 184 ++++++++++++++++++ tools/check_recipe.R | 2 +- 16 files changed, 359 insertions(+), 152 deletions(-) delete mode 100644 modules/CrossVal/Crossval_skill.R rename modules/{CrossVal => Crossval}/Crossval_anomalies.R (100%) create mode 100644 modules/Crossval/Crossval_skill.R rename modules/{CrossVal => Crossval}/R/CRPS_clim.R (100%) rename modules/{CrossVal => Crossval}/R/RPS_clim.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/Bias.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/Corr.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/Eno.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/GetProbs.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/RPS.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/RPSS.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/RandomWalkTest.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/SprErr.R (100%) rename modules/{CrossVal => Crossval}/recipe_crossval_ecvs.yml (98%) create mode 100644 modules/Crossval/recipe_crossval_ecvs_global.yml diff --git a/modules/CrossVal/Crossval_skill.R b/modules/CrossVal/Crossval_skill.R deleted file mode 100644 index c142b94f..00000000 --- a/modules/CrossVal/Crossval_skill.R +++ /dev/null @@ -1,149 +0,0 @@ - -source("modules/Saving/Saving.R") -source("modules/Crossval/R/tmp/RPS.R") -source("modules/Crossval/R/RPS_clim.R") -source("modules/Crossval/R/CRPS_clim.R") -source("modules/Crossval/R/tmp/RPSS.R") -source("modules/Crossval/R/tmp/RandomWalkTest.R") -source("modules/Corssval/R/tmp/Corr.R") -source("modules/Crossval/R/tmp/Bias.R") -source("modules/Crossval/R/tmp/SprErr.R") -source("modules/Crossval/R/tmp/Eno.R") - -## data_crossval is the result from function full_crossval_anomalies or similar. -## this is a list with the required elements: - ## probs is a list with - ## probs$hcst_ev and probs$obs_ev - ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles - ## each element will be an array with 'cat' dimension - ## the same for probs$obs_ev - ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices - ## in this case cross validated anomalies - ## obs is a s2dv_cube for the post-processed obs - ## in this case cross validated anomalies - ## fcst is a s2dv_cube for the post-processed fcst - ## in this case cross anomalies with the full hindcast period - ## this object is not required for skill assessment - ## hcst.full_val and obs.full_val are the original data to compute mean bias - ## cat_lims used to compute the probabilities - ## this object is not required for skill assessment - ## ref_obs_tr is an array with the cross-validate observed anomalies - ## to be used as reference forecast in the CRPSS and CRPS_clim - ## it is computed from the training indices -## the recipe could be used to read the Percentiles -## if fair is TRUE, the nmemb used to compute the probabilities is needed - ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -Crossval_skill <- function(recipe, data_crossval, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { - ## START SKILL ASSESSMENT: - # RPS - rps <- RPS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], memb_dim = NULL, - cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) - rps_clim <- Apply(list(res$probs$obs_ev[[1]]), - target_dims = c('cat', 'syear'), - RPS_clim, bin_dim_abs = 'cat', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 - # RPSS - rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'cat', Fair = fair, nmemb = nmemb, - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - # CRPS - crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) - crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) - - # CRPSS - crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - # Corr - enscorr <- Corr(res$hcst$data, res$obs$data, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - - # Mean Bias - mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = 0.05, - ncores = recipe$Analysis$ncores) - - # Spread error ratio - enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) - enssprerr_sign <- enssprerr$p.val - enssprerr_sign <- enssprerr_sign <= 0.05 - enssprerr <- enssprerr$ratio - - # RMSE - rms <- RMS(exp = res$hcst$data, obs = res$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - - # RMSS - rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, - ref = res$ref_obs_tr, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, sign = TRUE, - ncores = recipe$Analysis$ncores) - - skill_metrics <- list(mean_bias = mean_bias$bias, - mean_bias_significance = mean_bias$sig, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms, rmsss = rmss$rmsss, rmsss_significance = rmss$sign) - original <- recipe$Run$output_dir - recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - - skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) - # Save metrics - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - - recipe$Run$output_dir <- original - # reduce dimension to work with Visualization module: - skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) - skill_metrics <- lapply(skill_metrics, function(x){ - InsertDim(x, pos = 1, len = 1, name = 'var')}) - return(skill_metrics) -} - diff --git a/modules/CrossVal/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R similarity index 100% rename from modules/CrossVal/Crossval_anomalies.R rename to modules/Crossval/Crossval_anomalies.R diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R new file mode 100644 index 00000000..58007a5a --- /dev/null +++ b/modules/Crossval/Crossval_skill.R @@ -0,0 +1,171 @@ + +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") + +## data_crossval is the result from function full_crossval_anomalies or similar. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices +## the recipe could be used to read the Percentiles +## if fair is TRUE, the nmemb used to compute the probabilities is needed + ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference +Crossval_skill <- function(recipe, data_crossval, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + ncores <- recipe$Analysis$ncores + alpha <- recipe$Analysis$Skill$alpha + na.rm <- recipe$Analysis$remove_NAs + if (is.null(alpha)) { + alpha <- 0.05 + } + ## START SKILL ASSESSMENT: + skill_metrics <- list() + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = res$probs$hcst_ev[[1]], + obs = res$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(res$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = 0.1, # un use param when providing probs + indices_for_clim = NULL, + Fair = fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + + if ('crps' %in% requested_metrics) { + crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = ncores) + skill_metrics$crps <- crps + crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, ncores = ncores) + skill_metrics$crps_clim <- crps_clim + } + if ('crpss' %in% requested_metrics) { + crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + } + + if ('enscorr' %in% requested_metrics) { + enscorr <- Corr(res$hcst$data, res$obs$data, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha, + ncores = ncores) + skill_metrics$enscorr <- enscorr + } + if ('mean_bias' %in% requested_metrics) { + mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('enssprerr' %in% requested_metrics) { + enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = ncores) + skill_metrics$SprErr <- enssprerr$ratio + skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha + } + if ('rms' %in% requested_metrics) { + rms <- RMS(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + if ('rmss' %in% requested_metrics) { + rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, + ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + original <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + + skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Save metrics + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + + recipe$Run$output_dir <- original + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} + diff --git a/modules/CrossVal/R/CRPS_clim.R b/modules/Crossval/R/CRPS_clim.R similarity index 100% rename from modules/CrossVal/R/CRPS_clim.R rename to modules/Crossval/R/CRPS_clim.R diff --git a/modules/CrossVal/R/RPS_clim.R b/modules/Crossval/R/RPS_clim.R similarity index 100% rename from modules/CrossVal/R/RPS_clim.R rename to modules/Crossval/R/RPS_clim.R diff --git a/modules/CrossVal/R/tmp/Bias.R b/modules/Crossval/R/tmp/Bias.R similarity index 100% rename from modules/CrossVal/R/tmp/Bias.R rename to modules/Crossval/R/tmp/Bias.R diff --git a/modules/CrossVal/R/tmp/Corr.R b/modules/Crossval/R/tmp/Corr.R similarity index 100% rename from modules/CrossVal/R/tmp/Corr.R rename to modules/Crossval/R/tmp/Corr.R diff --git a/modules/CrossVal/R/tmp/Eno.R b/modules/Crossval/R/tmp/Eno.R similarity index 100% rename from modules/CrossVal/R/tmp/Eno.R rename to modules/Crossval/R/tmp/Eno.R diff --git a/modules/CrossVal/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R similarity index 100% rename from modules/CrossVal/R/tmp/GetProbs.R rename to modules/Crossval/R/tmp/GetProbs.R diff --git a/modules/CrossVal/R/tmp/RPS.R b/modules/Crossval/R/tmp/RPS.R similarity index 100% rename from modules/CrossVal/R/tmp/RPS.R rename to modules/Crossval/R/tmp/RPS.R diff --git a/modules/CrossVal/R/tmp/RPSS.R b/modules/Crossval/R/tmp/RPSS.R similarity index 100% rename from modules/CrossVal/R/tmp/RPSS.R rename to modules/Crossval/R/tmp/RPSS.R diff --git a/modules/CrossVal/R/tmp/RandomWalkTest.R b/modules/Crossval/R/tmp/RandomWalkTest.R similarity index 100% rename from modules/CrossVal/R/tmp/RandomWalkTest.R rename to modules/Crossval/R/tmp/RandomWalkTest.R diff --git a/modules/CrossVal/R/tmp/SprErr.R b/modules/Crossval/R/tmp/SprErr.R similarity index 100% rename from modules/CrossVal/R/tmp/SprErr.R rename to modules/Crossval/R/tmp/SprErr.R diff --git a/modules/CrossVal/recipe_crossval_ecvs.yml b/modules/Crossval/recipe_crossval_ecvs.yml similarity index 98% rename from modules/CrossVal/recipe_crossval_ecvs.yml rename to modules/Crossval/recipe_crossval_ecvs.yml index 3ec15379..2cc339c3 100644 --- a/modules/CrossVal/recipe_crossval_ecvs.yml +++ b/modules/Crossval/recipe_crossval_ecvs.yml @@ -116,6 +116,7 @@ Analysis: # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} Skill: metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 save: 'all' # Options: 'all', 'none' (Mandatory, str) Statistics: metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) @@ -169,7 +170,7 @@ Run: auto_conf: script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script expid: a6wq # replace with your EXPID - hpc_user: bsc032762 # replace with your hpc username + hpc_user: bsc032339 # replace with your hpc username wallclock: 01:00 # wallclock for single-model jobs, hh:mm wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. processors_per_job: 4 # processors to request for each single-model job. @@ -178,6 +179,6 @@ Run: custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. platform: nord3v2 # platform (for now, only nord3v2 is available) email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: victoria.agudetse@bsc.es # replace with your email address + email_address: nuria.perez@bsc.es # replace with your email address notify_completed: yes # notify me by email when a job finishes notify_failed: yes # notify me by email when a job fails diff --git a/modules/Crossval/recipe_crossval_ecvs_global.yml b/modules/Crossval/recipe_crossval_ecvs_global.yml new file mode 100644 index 00000000..830225af --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs_global.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr rps crps rms rmss # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 245e4e3d..5c5ab721 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -596,7 +596,7 @@ check_recipe <- function(recipe) { "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", "rps_clim_syear", "crps_clim", "crps_clim_syear", "enscorr_specs", "frps_specs", "rpss_specs", - "frpss_specs", "bss10_specs", "bss90_specs", "rms") + "frpss_specs", "bss10_specs", "bss90_specs", "rms", "rmss") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { error(recipe$Run$logger, -- GitLab From 34b136ab231da67d4b18995ae08de4a5950e7016 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 4 Jun 2024 14:34:51 +0200 Subject: [PATCH 19/78] multimodel pool and crps --- modules/Crossval/Crossval_anomalies.R | 10 +- modules/Crossval/recipe_crossval_ecvs.yml | 2 +- modules/Multimodel/Multimodel.R | 103 +++++++-- modules/Multimodel/dims_multimodel.R | 6 +- modules/Multimodel/load_multimodel.R | 146 ++++++------ modules/Multimodel/load_multimodel_mean.R | 260 ++++++++++++++++++++++ recipe_tas.yml | 13 +- testing_multimodel.R | 137 ++++++++++++ 8 files changed, 560 insertions(+), 117 deletions(-) create mode 100644 modules/Multimodel/load_multimodel_mean.R create mode 100644 testing_multimodel.R diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index b097faa6..fa421f22 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -234,13 +234,17 @@ Crossval_anomalies <- function(recipe, data) { } } # Save ensemble mean for multimodel option: - hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, metrics = list(hcst_EM = hcst_EM), + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), data_cube = data$hcst, agg = agg, module = "statistics") if (!is.null(data$fcst)) { fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, metrics = list(fcst_EM = fcst_EM), + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), data_cube = data$fcst, agg = agg, module = "statistics") } diff --git a/modules/Crossval/recipe_crossval_ecvs.yml b/modules/Crossval/recipe_crossval_ecvs.yml index 2cc339c3..e23495ec 100644 --- a/modules/Crossval/recipe_crossval_ecvs.yml +++ b/modules/Crossval/recipe_crossval_ecvs.yml @@ -50,7 +50,7 @@ Analysis: # ... fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) - hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + hcst_end: '1996' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) Region: diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 192e9292..7a9eb670 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -13,32 +13,93 @@ source('modules/Multimodel/load_multimodel_splitted.R') source('modules/Multimodel/dims_multimodel.R') source('modules/Multimodel/build_multimodel.R') source('modules/Multimodel/clean_multimodel.R') +library(startR) +library(CSTools) +library(yaml) +recipe <- read_yaml("recipe_tas.yml") +source("modules/Saving/R/get_dir.R") +source("modules/Loading/Loading.R") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604104736/") +#recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240531165840/") +# DON'T KNOW HOW TO SOLVE LOGGER: +recipe$Run$Loglevel <- 'INFO' +recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" +recipe$Run$logger <- logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) Multimodel <- function(recipe) { # recipe: auto-s2s recipe as provided by read_yaml - - # Loading data saved in the jobs for individual models - if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ - # Load data splitting by system - data_aux <- load_multimodel_splitted(recipe) - - data <- data_aux$data - - files_hcst <- data_aux$files_hcst - files_fcst <- data_aux$files_fcst - files_obs <- data_aux$files_obs - - rm(data_aux) - - } else { - # Load data without splitting - data <- load_multimodel(recipe) - files_hcst <- data$hcst$attrs$source_files - files_fcst <- data$fcst$attrs$source_files - files_obs <- data$obs$attrs$source_files + ncores <- recipe$Analysis$ncores + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + na.rm <- recipe$Analysis$remove_NAs + skill_metrics <- list() + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + # Load data splitting by system individual members: + if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { + data_aux <- load_multimodel(recipe) + datos <- append(list(obs = data_aux$obs$data), lapply(data_aux$hcst, function(x) x$data)) + names(datos)[-1] <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + # Compute crps /crpss and return forecast for visualization purposes + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + sdate_dim <- dim(datos[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + } + # What about spread or spread-to error ratio? + rm(datos) } - + # Always load ensemble mean and probabilities: + source("modules/Multimodel/load_multimodel_mean.R") + datos <- load_multimodel_mean(recipe) # Building the multi-model multimodel_aux <- build_multimodel(data, recipe) data <- multimodel_aux$data diff --git a/modules/Multimodel/dims_multimodel.R b/modules/Multimodel/dims_multimodel.R index f9076153..4b581379 100644 --- a/modules/Multimodel/dims_multimodel.R +++ b/modules/Multimodel/dims_multimodel.R @@ -7,11 +7,7 @@ dims_multimodel <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index c6bd4585..b4cf8c25 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -2,16 +2,13 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") - +source("modules/Saving/Saving.R") +library(abind) load_multimodel <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] @@ -29,7 +26,7 @@ load_multimodel <- function(recipe) { } else { split_multiselected_dims = FALSE } - + # Find the saved data directory recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", recipe$Analysis$Datasets$Multimodel$createFrom) @@ -37,42 +34,44 @@ load_multimodel <- function(recipe) { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) - filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", + filename <- paste0("scorecards_$model$_", ref.name, "_$var$__$file_date$__", hcst_start, "-", hcst_end, "_s", shortdate, ".nc") } else { filename <- "$var$_$file_date$.nc" } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + "$var$", filename) - hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) - hcst.path <- gsub(variable[1], "$var$", hcst.path) - hcst.path <- gsub('Multimodel', "$model$", hcst.path) fcst.path <- obs.path <- hcst.path obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) - # Load hindcast #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, - var = variable, - file_date = sdates$hcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - + hcst <- list() + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + hcst <- append(hcst, list(as.s2dv_cube(aux))) + } ############################# #NOTE: NOT TESTED YET if (store.freq %in% c("daily_mean", "daily")) { @@ -94,37 +93,35 @@ load_multimodel <- function(recipe) { } ############################### - # Convert hcst to s2dv_cube object - hcst <- as.s2dv_cube(hcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) - } + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - + fcst <- list() + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + fcst <- append(fcst, list(as.s2dv_cube(aux))) + } ############################# #NOTE: NOT TESTED YET if (store.freq %in% c("daily_mean", "daily")) { @@ -145,15 +142,6 @@ load_multimodel <- function(recipe) { dim(attr(fcst, "Variables")$common$time) <- default_time_dims } ############################# - - # Convert fcst to s2dv_cube - fcst <- as.s2dv_cube(fcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - fcst$attrs$Dates[] <- - fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) - } - } else { fcst <- NULL } @@ -234,7 +222,7 @@ load_multimodel <- function(recipe) { obs <- as.s2dv_cube(obs) # Check for consistency between hcst and obs grid - if (!isTRUE(all.equal(as.vector(hcst$coords$latitude), as.vector(obs$coords$latitude)))) { + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -247,7 +235,7 @@ load_multimodel <- function(recipe) { info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - if (!isTRUE(all.equal(as.vector(hcst$coords$longitude), as.vector(obs$coords$longitude)))) { + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -260,18 +248,10 @@ load_multimodel <- function(recipe) { info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") } - - # Print a summary of the loaded data for the user, for each object - if (recipe$Run$logger$threshold <= 2) { - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) - } - } - - info(recipe$Run$logger, - "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/Multimodel/load_multimodel_mean.R new file mode 100644 index 00000000..7021438b --- /dev/null +++ b/modules/Multimodel/load_multimodel_mean.R @@ -0,0 +1,260 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_mean <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", variable, "_hcst_EM_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- obs.path <- hcst.path + fcst.path <- gsub("hcst", "fcst", fcst.path, fixed = T) + obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','',exp.name[1]), ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, + paste0("scorecards_", + gsub('\\.','',exp.name[1]), "_", ref.name, + "_$var$_-obs_$file_date$__", hcst_start, "-", + hcst_end, "_s", shortdate, ".nc")) + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = 'hcst_EM', + model = gsub('\\.','', sys), + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), +# metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + hcst <- append(hcst, list(as.s2dv_cube(aux))) + names(hcst)[length(hcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = 'fcst_EM', + model = gsub('\\.','', sys), + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + fcst <- append(fcst, list(as.s2dv_cube(aux))) + names(fcst)[length(fcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/recipe_tas.yml b/recipe_tas.yml index e9f7e028..9416e669 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,13 +11,18 @@ Analysis: flux: no Datasets: System: - name: ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 - Multimodel: no # Mandatory, bool: Either yes/true or no/false + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '0501' - fcst_year: '2024' + fcst_year: '2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months @@ -81,7 +86,7 @@ Run: Loglevel: INFO Terminal: yes filesystem: esarchive - output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs + output_dir: /esarchive/scratch/nperez/git4/ # replace with the directory where you want to save the outputs code_dir: /esarchive/scratch/nperez/git4/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit diff --git a/testing_multimodel.R b/testing_multimodel.R new file mode 100644 index 00000000..b1859cde --- /dev/null +++ b/testing_multimodel.R @@ -0,0 +1,137 @@ +# From Multimodel +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Multimodel/load_multimodel.R') +source('modules/Multimodel/load_multimodel_splitted.R') +source('modules/Multimodel/dims_multimodel.R') +source('modules/Multimodel/build_multimodel.R') +source('modules/Multimodel/clean_multimodel.R') + +# recipe <- "recipe_tas.yml" # I have included multimodel yes +library(yaml) +recipe <- read_yaml(recipe) + +# (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')) ## Fails becuase I don't know where split_loading comes from + +# I guess we need this: +# From load_multmodel + +archive <- read_yaml("conf/archive.yml")$esarchive +ref.name <- recipe$Analysis$Datasets$Reference$name + +# lets try creating a vector +## I don't know where this comes from +## recipe$Analysis$Datasets$System$models +exp.name <- c("ECMWF-SEAS5.1", "Meteo-France-System7") + +library(startR) + store.freq <- recipe$Analysis$Variables$freq + variable <- "tas" #strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + +recipe$Run$output_dir <- file.path("/esarchive/scratch/nperez/git4/recipe_tas_20240531165840", "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + +hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_$var$__$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + +source("modules/Saving/Saving.R") + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + "$var$", filename) +# hcst.path <- gsub(variable[1], "$var$", hcst.path) +# hcst.path <- gsub('Multimodel', "$model$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) + + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + # No need to check the dates if the files are saved by SUNSET: + #if (recipe$Analysis$Variables$freq == "monthly_mean") { + # hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + #} + +fcst <- as.s2dv_cube(fcst) + + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + +datos <- list(hcst = hcst, fcst = fcst, obs = obs) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) -- GitLab From 6717090c31d036e99355dc99e66d6abebe8602e7 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 4 Jun 2024 15:08:08 +0200 Subject: [PATCH 20/78] save fcst probs --- modules/Crossval/Crossval_anomalies.R | 20 +++++++++++++++++++- recipe_tas.yml | 6 +++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index fa421f22..e011d059 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -198,6 +198,7 @@ Crossval_anomalies <- function(recipe, data) { } # Save probability bins result <- list() + probs_fcst <- list() all_names <- NULL for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { @@ -211,6 +212,10 @@ Crossval_anomalies <- function(recipe, data) { } result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = ps, drop = 'all')), probs_fcst) + } all_names <- c(all_names, name_elem) } } @@ -220,13 +225,26 @@ Crossval_anomalies <- function(recipe, data) { dim(x) <- c(var = 1, dim(x)) return(x)}) } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = result, data_cube = data$hcst, agg = agg, type = "hcst") # TODO Forecast - probs_fcst <- NULL if (!is.null(probs_fcst)) { save_probabilities(recipe = recipe, probs = probs_fcst, data_cube = data$fcst, agg = agg, diff --git a/recipe_tas.yml b/recipe_tas.yml index 9416e669..90890cae 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} - #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System7'} + name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From d829eb86d802fcaf11ccc0592dbe30efbf0040b4 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 4 Jun 2024 17:37:30 +0200 Subject: [PATCH 21/78] load probs --- modules/Crossval/Crossval_anomalies.R | 12 + modules/Multimodel/Multimodel.R | 66 +--- modules/Multimodel/dims_multimodel.R | 103 ------ modules/Multimodel/load_multimodel_probs.R | 294 ++++++++++++++++++ modules/Multimodel/load_multimodel_splitted.R | 58 ---- modules/Multimodel/multimodel_metrics.R | 79 +++++ modules/Saving/R/save_probabilities.R | 9 +- recipe_tas.yml | 6 +- 8 files changed, 410 insertions(+), 217 deletions(-) delete mode 100644 modules/Multimodel/dims_multimodel.R create mode 100644 modules/Multimodel/load_multimodel_probs.R delete mode 100644 modules/Multimodel/load_multimodel_splitted.R create mode 100644 modules/Multimodel/multimodel_metrics.R diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index e011d059..30f05184 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -199,6 +199,7 @@ Crossval_anomalies <- function(recipe, data) { # Save probability bins result <- list() probs_fcst <- list() + probs_obs <- list() all_names <- NULL for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { @@ -212,6 +213,8 @@ Crossval_anomalies <- function(recipe, data) { } result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = ps, drop = 'all')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], along = 'cat', indices = ps, drop = 'all')), probs_fcst) @@ -225,6 +228,12 @@ Crossval_anomalies <- function(recipe, data) { dim(x) <- c(var = 1, dim(x)) return(x)}) } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } if (!is.null(data$fcst)) { names(probs_fcst) <- all_names @@ -244,6 +253,9 @@ Crossval_anomalies <- function(recipe, data) { save_probabilities(recipe = recipe, probs = result, data_cube = data$hcst, agg = agg, type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") # TODO Forecast if (!is.null(probs_fcst)) { save_probabilities(recipe = recipe, probs = probs_fcst, diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 7a9eb670..2390395c 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -19,8 +19,7 @@ library(yaml) recipe <- read_yaml("recipe_tas.yml") source("modules/Saving/R/get_dir.R") source("modules/Loading/Loading.R") -recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604104736/") -#recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240531165840/") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604162251/") # DON'T KNOW HOW TO SOLVE LOGGER: recipe$Run$Loglevel <- 'INFO' recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" @@ -47,59 +46,22 @@ Multimodel <- function(recipe) { ", | |,")[[1]] # Load data splitting by system individual members: if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { - data_aux <- load_multimodel(recipe) - datos <- append(list(obs = data_aux$obs$data), lapply(data_aux$hcst, function(x) x$data)) - names(datos)[-1] <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') - # Compute crps /crpss and return forecast for visualization purposes - if (any(c('crps', 'crpss') %in% requested_metrics)) { - crps <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble')}, - ncores = ncores)$output1 - skill_metrics$crps <- crps - # Build the reference forecast: - sdate_dim <- dim(datos[[1]])['syear'] - cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, - amt.points = sdate_dim, - amt.points_cor = NULL) - tmp <- array(1:length(cross), c(syear = length(cross))) - ref_clim <- Apply(list(datos$obs, tmp), - target_dims = list(c('ensemble', 'syear'), NULL), - function(x, y) { - Subset(x, along = 'syear', indices = cross[[y]]$train.dexes, drop = T)}, - ncores = ncores, output_dims = 'ensemble')$output1 - datos <- append(list(ref = ref_clim), datos) - crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), - fun = function(ref, obs) { - obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble')}, - ncores = ncores)$output1 - skill_metrics$crps_clim <- crps_clim - - - crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(ref, obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - CRPSS(exp = res, obs = obs, ref = ref, - memb_dim = 'ensemble', Fair = FALSE, - time_dim = 'syear', clim.cross.val = FALSE)}, - ncores = ncores) - skill_metrics$crpss <- crpss$crpss - skill_metrics$crpss_significance <- crpss$sign - } - # What about spread or spread-to error ratio? - rm(datos) + individual_memb <- load_multimodel(recipe) } # Always load ensemble mean and probabilities: source("modules/Multimodel/load_multimodel_mean.R") - datos <- load_multimodel_mean(recipe) + ensmean <- load_multimodel_mean(recipe) + + source("modules/Multimodel/load_multimodel_probs.R") + probs <- load_multimodel_probs(recipe) + + + + + + + +############# TO REMOVE: # Building the multi-model multimodel_aux <- build_multimodel(data, recipe) data <- multimodel_aux$data diff --git a/modules/Multimodel/dims_multimodel.R b/modules/Multimodel/dims_multimodel.R deleted file mode 100644 index 4b581379..00000000 --- a/modules/Multimodel/dims_multimodel.R +++ /dev/null @@ -1,103 +0,0 @@ - -source("modules/Loading/R/dates2load.R") -source("modules/Loading/R/get_timeidx.R") -source("modules/Loading/R/check_latlon.R") - -dims_multimodel <- function(recipe) { - - archive <- read_yaml("conf/archive.yml")$esarchive - ref.name <- recipe$Analysis$Datasets$Reference$name - exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') - store.freq <- recipe$Analysis$Variables$freq - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] - exp_descrip <- archive$System[[exp.name[1]]] - reference_descrip <- archive$Reference[[ref.name]] - sdates <- dates2load(recipe, recipe$Run$logger) - - lats.min <- recipe$Analysis$Region$latmin - lats.max <- recipe$Analysis$Region$latmax - lons.min <- recipe$Analysis$Region$lonmin - lons.max <- recipe$Analysis$Region$lonmax - circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - - if (recipe$Analysis$Variables$freq == "monthly_mean") { - split_multiselected_dims = TRUE - } else { - split_multiselected_dims = FALSE - } - - # Find the saved data directory - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) - if (tolower(recipe$Analysis$Output_format) == "scorecards") { - hcst_start <- recipe$Analysis$Time$hcst_start - hcst_end <- recipe$Analysis$Time$hcst_end - shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) - filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", - hcst_start, "-", hcst_end, "_s", shortdate, ".nc") - } else { - filename <- "$var$_$file_date$.nc" - } - - hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) - hcst.path <- gsub(variable[1], "$var$", hcst.path) - hcst.path <- gsub('Multimodel', "$model$", hcst.path) - fcst.path <- obs.path <- hcst.path - - # Load hindcast - #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, - var = variable, - file_date = sdates$hcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = FALSE) - - dim.hcst <- attr(hcst,'Dimensions') - - # Load forecast - #------------------------------------------------------------------- - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = FALSE) - - dim.fcst <- attr(fcst,'Dimensions') - - } else { - dim.fcst <- NULL - } - - return(list(dim.hcst = dim.hcst, dim.fcst = dim.fcst)) -} diff --git a/modules/Multimodel/load_multimodel_probs.R b/modules/Multimodel/load_multimodel_probs.R new file mode 100644 index 00000000..4b0eb358 --- /dev/null +++ b/modules/Multimodel/load_multimodel_probs.R @@ -0,0 +1,294 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_probs <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- hcst.path + obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','', exp.name), + ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, paste0("scorecards__", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc")) + + ## Generate probs names inner files variables: + prob_names <- NULL + all_names <- NULL + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + all_names <- c(all_names, name_elem) + } + prob_names <- append(prob_names, list(all_names)) + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[2] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + hcst <- append(hcst, list(probs)) + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = prob_names[[ps]], + file_date = sdates$fcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + fcst <- append(fcst, list(probs)) + } + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + obs <- list() + + if (store.freq == "monthly_mean") { + for (ps in 1:length(prob_names)) { + aux <- Start(dat = obs.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + obs <- append(obs, list(as.s2dv_cube(aux))) + } + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + #default_dims <- c(dat = 1, var = 1, sday = 1, + # sweek = 1, syear = 1, time = 1, + # latitude = 1, longitude = 1, ensemble = 1) + #default_dims[names(dim(obs))] <- dim(obs) + #dim(obs) <- default_dims + + # Convert obs to s2dv_cube + #obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Multimodel/load_multimodel_splitted.R b/modules/Multimodel/load_multimodel_splitted.R deleted file mode 100644 index 5b9a642f..00000000 --- a/modules/Multimodel/load_multimodel_splitted.R +++ /dev/null @@ -1,58 +0,0 @@ - -load_multimodel_splitted <- function(recipe){ - - # Retrieve data dimension only without loading data - dims <- dims_multimodel(recipe) - data_order <- names(dims$dim.hcst) - - # Create empty array with desired dimensions - data <- list(hcst = NULL, fcst = NULL, obs = NULL) - data$hcst$data <- array(data = NA, dim = dims$dim.hcst) - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- array(data = NA, dim = dims$dim.fcst) - } - files_hcst <- list() - files_fcst <- list() - files_obs <- list() - - - # Loop over system to load hindcast and forecast data - for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ - - recipe_aux <- recipe - system_load <- recipe$Analysis$Datasets$System$models[sys] - recipe_aux$Analysis$Datasets$System$models <- system_load - - data_aux <- load_multimodel(recipe_aux) - - data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) - - if(!is.null(recipe$Analysis$Time$fcst_year)){ - data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) - } - - files_hcst <- append(files_hcst, data_aux$hcst$attrs$source_files, after = length(files_hcst)) - files_fcst <- append(files_fcst, data_aux$fcst$attrs$source_files, after = length(files_fcst)) - files_obs <- append(files_obs, data_aux$obs$attrs$source_files, after = length(files_obs)) - - } # close loop on sys - - # Define obs data - data$obs$data <- data_aux$obs$data - - # Include data attributes - data$hcst$attrs <- data_aux$hcst$attrs - data$fcst$attrs <- data_aux$fcst$attrs - data$obs$attrs <- data_aux$obs$attrs - data$hcst$coords <- data_aux$hcst$coords - data$fcst$coords <- data_aux$fcst$coords - data$obs$coords <- data_aux$obs$coords - - # Remove temporary data_aux - rm(data_aux) - - return(list(data = data, - files_hcst = files_hcst, - files_fcst = files_fcst, - files_obs = files_obs)) -} diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R new file mode 100644 index 00000000..0eb0f7ac --- /dev/null +++ b/modules/Multimodel/multimodel_metrics.R @@ -0,0 +1,79 @@ +# One function to compute all the possible metrics or statistics +# indv_members is the list returned by load_multimodel + ## this is needed to compute crps/crpss but it not always will fit in mem +# ensmean is a list returned by load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss + +multimodel_metrics <- function(recipe, + indv_membs = NULL, + ensmean = NULL, + probs = NULL) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + + if (!is.null(indv_membs)) { + # conver $data elemenst to list to use multiApply: + indv_membs <- append(list(obs = indv_membs$obs$data), + lapply(indv_membs$hcst, function(x) x$data)) + names(indv_membs)[-1] <- sapply(recipe$Analysis$Datasets$System, + '[[', 'name') + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + sdate_dim <- dim(indv_membs[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + ref_clim <- Apply(list(indv_membs$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + indv_membs <- append(list(ref = ref_clim), indv_membs) + crps_clim <- Apply(indv_membs[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + + + } + } +} diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index a9ddc977..c6d5f222 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -117,7 +117,14 @@ save_probabilities <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, "probs") - + if (type == 'obs') { + filename <- strsplit(outfile, "/") + filename <- filename[[1]][length(filename[[1]])] + outfile <- file.path(outdir, + sub(gsub('\\.','', + recipe$Analysis$Datasets$System$name), "", + filename)) + } # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) diff --git a/recipe_tas.yml b/recipe_tas.yml index 90890cae..7fd02265 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - #- {name: 'ECMWF-SEAS5.1'} - #- {name: 'Meteo-France-System7'} - name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 9c2fe38e0e797333dc8cf9fcc0f7da4c553de241 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 10:59:34 +0200 Subject: [PATCH 22/78] make crossval_skill to work with proper param --- modules/Crossval/Crossval_skill.R | 38 +++++++++++++++++++------------ recipe_tas.yml | 6 ++--- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index 58007a5a..fa210302 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -34,7 +34,7 @@ source("modules/Crossval/R/tmp/Eno.R") ## if fair is TRUE, the nmemb used to compute the probabilities is needed ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference Crossval_skill <- function(recipe, data_crossval, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs @@ -47,12 +47,12 @@ Crossval_skill <- function(recipe, data_crossval, ", | |,")[[1]] if ('rps' %in% requested_metrics) { - rps <- RPS(exp = res$probs$hcst_ev[[1]], - obs = res$probs$obs_ev[[1]], memb_dim = NULL, + rps <- RPS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', Fair = fair, nmemb = nmemb, ncores = ncores) - rps_clim <- Apply(list(res$probs$obs_ev[[1]]), + rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), target_dims = c('cat', 'syear'), RPS_clim, bin_dim_abs = 'cat', Fair = fair, cross.val = FALSE, ncores = ncores)$output1 @@ -60,7 +60,8 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$rps_clim <- rps_clim } if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], + rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], ref = NULL, # ref is 1/3 by default if terciles time_dim = 'syear', memb_dim = NULL, cat_dim = 'cat', nmemb = nmemb, @@ -76,18 +77,22 @@ Crossval_skill <- function(recipe, data_crossval, } if ('crps' %in% requested_metrics) { - crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, + crps <- CRPS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, time_dim = 'syear', memb_dim = 'ensemble', Fair = fair, ncores = ncores) skill_metrics$crps <- crps - crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, + crps_clim <- CRPS(exp = data_crossval$ref_obs_tr, + obs = data_crossval$obs$data, time_dim = 'syear', memb_dim = 'ensemble', Fair = fair, ncores = ncores) skill_metrics$crps_clim <- crps_clim } if ('crpss' %in% requested_metrics) { - crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, + crpss <- CRPSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = data_crossval$ref_obs_tr, memb_dim = 'ensemble', Fair = fair, time_dim = 'syear', clim.cross.val = FALSE, ncores = ncores) @@ -96,7 +101,8 @@ Crossval_skill <- function(recipe, data_crossval, } if ('enscorr' %in% requested_metrics) { - enscorr <- Corr(res$hcst$data, res$obs$data, + enscorr <- Corr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, dat_dim = NULL, time_dim = 'syear', method = 'pearson', @@ -110,7 +116,8 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$enscorr <- enscorr } if ('mean_bias' %in% requested_metrics) { - mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, + mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, + obs = data_crossval$obs.full_val$data, time_dim = 'syear', memb_dim = 'ensemble', alpha = alpha, @@ -119,7 +126,8 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$mean_bias_significance <- mean_bias$sig } if ('enssprerr' %in% requested_metrics) { - enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, + enssprerr <- SprErr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', pval = TRUE, ncores = ncores) @@ -127,14 +135,16 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha } if ('rms' %in% requested_metrics) { - rms <- RMS(exp = res$hcst$data, obs = res$obs$data, + rms <- RMS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', alpha = alpha, ncores = ncores) skill_metrics$rms <- rms$rms } if ('rmss' %in% requested_metrics) { - rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, + rmss <- RMSSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, ref = res$ref_obs_tr, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', alpha = alpha, sign = TRUE, @@ -158,7 +168,7 @@ Crossval_skill <- function(recipe, data_crossval, # Save metrics save_metrics(recipe = recipe, metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', + data_cube = data_crossval$hcst, agg = 'global', outdir = recipe$Run$output_dir) recipe$Run$output_dir <- original diff --git a/recipe_tas.yml b/recipe_tas.yml index 7fd02265..5dfe25a9 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} - #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + # - {name: 'ECMWF-SEAS5.1'} + # - {name: 'Meteo-France-System7'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 373c944d4e14974c6fa0ef56c0910261971332df Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 12:31:57 +0200 Subject: [PATCH 23/78] fix probs list --- modules/Crossval/Crossval_anomalies.R | 21 ++++--- modules/Crossval/Crossval_skill.R | 82 +++++++++++++++++---------- recipe_tas.yml | 2 +- 3 files changed, 66 insertions(+), 39 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 30f05184..ada64002 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -197,7 +197,7 @@ Crossval_anomalies <- function(recipe, data) { } } # Save probability bins - result <- list() + probs_hcst <- list() probs_fcst <- list() probs_obs <- list() all_names <- NULL @@ -211,20 +211,23 @@ Crossval_anomalies <- function(recipe, data) { name_elem <- paste0("from_", categories[[ps]][perc-1], "_to_", categories[[ps]][perc]) } - result <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'cat', indices = ps, drop = 'all')), result) + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'cat', indices = ps, drop = 'all')), probs_obs) + along = 'cat', indices = perc, drop = 'all')), + probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'cat', indices = ps, drop = 'all')), probs_fcst) + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) } all_names <- c(all_names, name_elem) } } - names(result) <- all_names - if (!('var' %in% names(dim(result[[1]])))) { - result <- lapply(result, function(x) { + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { dim(x) <- c(var = 1, dim(x)) return(x)}) } @@ -250,7 +253,7 @@ Crossval_anomalies <- function(recipe, data) { } if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = result, + save_probabilities(recipe = recipe, probs = probs_hcst, data_cube = data$hcst, agg = agg, type = "hcst") save_probabilities(recipe = recipe, probs = probs_obs, diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index fa210302..d8696890 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -38,6 +38,19 @@ Crossval_skill <- function(recipe, data_crossval, ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) if (is.null(alpha)) { alpha <- 0.05 } @@ -45,37 +58,48 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics <- list() requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] - - if ('rps' %in% requested_metrics) { - rps <- RPS(exp = data_crossval$probs$hcst_ev[[1]], - obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, - cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = ncores) - rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), - target_dims = c('cat', 'syear'), - RPS_clim, bin_dim_abs = 'cat', Fair = fair, - cross.val = FALSE, ncores = ncores)$output1 - skill_metrics$rps <- rps - skill_metrics$rps_clim <- rps_clim - } - if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], - obs = data_crossval$probs$obs_ev[[1]], - ref = NULL, # ref is 1/3 by default if terciles - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'cat', nmemb = nmemb, - dat_dim = NULL, - prob_thresholds = 0.1, # un use param when providing probs - indices_for_clim = NULL, - Fair = fair, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, na.rm = na.rm, - sig_method.type = 'two.sided.approx', alpha = alpha, + # The recipe allows to requset more than only terciles: + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, ncores = ncores) - skill_metrics$rpss <- rpss$rpss - skill_metrics$rpss_significance <- rpss$sign + rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = 0.1, # un use param when providing probs + indices_for_clim = NULL, + Fair = fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + # TO USE IT when visualization works for more rpsss + #skill_metrics[[paste0('rpss', exe_rps[ps])]] <- rpss$rpss + #skill_metrics[[paste0('rpss', + # exe_rps[ps], + # "_significance")]] <- rpss$sign + } } - if ('crps' %in% requested_metrics) { crps <- CRPS(exp = data_crossval$hcst$data, obs = data_crossval$obs$data, diff --git a/recipe_tas.yml b/recipe_tas.yml index 5dfe25a9..82f5ad07 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -48,7 +48,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std neff save: 'all' cross_validation: yes Probabilities: -- GitLab From 750d354b76a8b0c1a67a93a92d664801fb3786aa Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 15:53:07 +0200 Subject: [PATCH 24/78] statistics in crossval_skill --- modules/Crossval/Crossval_anomalies.R | 2 ++ modules/Crossval/Crossval_skill.R | 44 +++++++++++++++++++++++++++ recipe_tas.yml | 4 +-- tools/check_recipe.R | 4 ++- 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index ada64002..829850d2 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -273,6 +273,7 @@ Crossval_anomalies <- function(recipe, data) { Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), data_cube = data$hcst, agg = agg, module = "statistics") + fcst_EM <- NULL if (!is.null(data$fcst)) { fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) save_metrics(recipe = recipe, @@ -283,6 +284,7 @@ Crossval_anomalies <- function(recipe, data) { } return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, obs_tr = lims_ano_obs_tr_res), probs = list(hcst_ev = hcst_probs_ev, diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index d8696890..f1356f3b 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -176,6 +176,50 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$rmss <- rmss$rmss skill_metrics$rmss_significance <- rmss$sign } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + std_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 + + std_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 + + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + var_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } ## close if on variance + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(data = data_crossval$obs$data, + time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } ## close on n_eff + + if (any(c('cov', 'covariance') %in% requested_metrics)) { + covariance <- Apply(data = list(x = data_crossval$obs$data, + y = data_crossval$hcst_EM), + target_dims = 'syear', + fun = function(x, y) { + cov(as.vector(x), as.vector(y), + use = "everything", + method = "pearson")})$output1 + skill_metrics$covariance <- covariance + } original <- recipe$Run$output_dir recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") diff --git a/recipe_tas.yml b/recipe_tas.yml index 82f5ad07..0c9dfb8c 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -13,7 +13,7 @@ Analysis: System: # - {name: 'ECMWF-SEAS5.1'} # - {name: 'Meteo-France-System7'} - name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false @@ -48,7 +48,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std neff + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff save: 'all' cross_validation: yes Probabilities: diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 5c5ab721..b2449295 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -590,7 +590,9 @@ check_recipe <- function(recipe) { } # Skill - AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", + AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", + "rps_syear", "cov", "covariance", "std", + "standard_deviation", "n_eff", "spread", "rpss", "frps", "frpss", "crps", "crps_syear", "crpss", "bss10", "bss90", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", -- GitLab From f3ed21ecc50747ad979265c114244e83e8afc364 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 16:38:59 +0200 Subject: [PATCH 25/78] conf includes gpfs --- conf/archive.yml | 117 +++++++++++++++++- full_ecvs_anomalies.R | 2 + .../{ => old_modules}/Anomalies/Anomalies.R | 0 .../Calibration/Calibration.R | 0 .../Downscaling/Downscaling.R | 0 .../Downscaling/tmp/Analogs.R | 0 .../{ => old_modules}/Downscaling/tmp/Intbc.R | 0 .../Downscaling/tmp/Interpolation.R | 0 .../{ => old_modules}/Downscaling/tmp/Intlr.R | 0 .../Downscaling/tmp/LogisticReg.R | 0 .../{ => old_modules}/Downscaling/tmp/Utils.R | 0 modules/{ => old_modules}/Indices/Indices.R | 0 .../{ => old_modules}/Indices/R/compute_nao.R | 0 .../Indices/R/compute_nino.R | 0 .../Indices/R/correlation_eno.R | 0 .../Indices/R/drop_indices_dims.R | 0 .../Indices/R/plot_deterministic_forecast.R | 0 .../{ => old_modules}/Statistics/Statistics.R | 0 18 files changed, 118 insertions(+), 1 deletion(-) rename modules/{ => old_modules}/Anomalies/Anomalies.R (100%) rename modules/{ => old_modules}/Calibration/Calibration.R (100%) rename modules/{ => old_modules}/Downscaling/Downscaling.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Analogs.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Intbc.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Interpolation.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Intlr.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/LogisticReg.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Utils.R (100%) rename modules/{ => old_modules}/Indices/Indices.R (100%) rename modules/{ => old_modules}/Indices/R/compute_nao.R (100%) rename modules/{ => old_modules}/Indices/R/compute_nino.R (100%) rename modules/{ => old_modules}/Indices/R/correlation_eno.R (100%) rename modules/{ => old_modules}/Indices/R/drop_indices_dims.R (100%) rename modules/{ => old_modules}/Indices/R/plot_deterministic_forecast.R (100%) rename modules/{ => old_modules}/Statistics/Statistics.R (100%) diff --git a/conf/archive.yml b/conf/archive.yml index 61f62be2..c892f815 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,3 +1,118 @@ +gpfs: + src: "/gpfs/projects/bsc32/esarchive_cache/" + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/sfcWind_f6h/"} + nmember: + fcst: 50 + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind": "monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 124 + hcst: 24 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + DWD-GCFS2.1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/dwd/system21_m1/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + nmember: + fcst: 50 + hcst: 30 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system21_m1.txt" + ECCC-CanCM4i: + name: "ECCC CanCM4i (v3)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc3/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + +######################################################################### esarchive: src: "/esarchive/" System: @@ -181,7 +296,7 @@ esarchive: "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", - "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", + c "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", "tdps":"monthly_mean/tdps_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 44c7e57d..1c873a0b 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -28,4 +28,6 @@ Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, signi source("tools/add_logo.R") add_logo(recipe, "rsz_rsz_bsc_logo.png") +# scorecards computation: + diff --git a/modules/Anomalies/Anomalies.R b/modules/old_modules/Anomalies/Anomalies.R similarity index 100% rename from modules/Anomalies/Anomalies.R rename to modules/old_modules/Anomalies/Anomalies.R diff --git a/modules/Calibration/Calibration.R b/modules/old_modules/Calibration/Calibration.R similarity index 100% rename from modules/Calibration/Calibration.R rename to modules/old_modules/Calibration/Calibration.R diff --git a/modules/Downscaling/Downscaling.R b/modules/old_modules/Downscaling/Downscaling.R similarity index 100% rename from modules/Downscaling/Downscaling.R rename to modules/old_modules/Downscaling/Downscaling.R diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/old_modules/Downscaling/tmp/Analogs.R similarity index 100% rename from modules/Downscaling/tmp/Analogs.R rename to modules/old_modules/Downscaling/tmp/Analogs.R diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/old_modules/Downscaling/tmp/Intbc.R similarity index 100% rename from modules/Downscaling/tmp/Intbc.R rename to modules/old_modules/Downscaling/tmp/Intbc.R diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/old_modules/Downscaling/tmp/Interpolation.R similarity index 100% rename from modules/Downscaling/tmp/Interpolation.R rename to modules/old_modules/Downscaling/tmp/Interpolation.R diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/old_modules/Downscaling/tmp/Intlr.R similarity index 100% rename from modules/Downscaling/tmp/Intlr.R rename to modules/old_modules/Downscaling/tmp/Intlr.R diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/old_modules/Downscaling/tmp/LogisticReg.R similarity index 100% rename from modules/Downscaling/tmp/LogisticReg.R rename to modules/old_modules/Downscaling/tmp/LogisticReg.R diff --git a/modules/Downscaling/tmp/Utils.R b/modules/old_modules/Downscaling/tmp/Utils.R similarity index 100% rename from modules/Downscaling/tmp/Utils.R rename to modules/old_modules/Downscaling/tmp/Utils.R diff --git a/modules/Indices/Indices.R b/modules/old_modules/Indices/Indices.R similarity index 100% rename from modules/Indices/Indices.R rename to modules/old_modules/Indices/Indices.R diff --git a/modules/Indices/R/compute_nao.R b/modules/old_modules/Indices/R/compute_nao.R similarity index 100% rename from modules/Indices/R/compute_nao.R rename to modules/old_modules/Indices/R/compute_nao.R diff --git a/modules/Indices/R/compute_nino.R b/modules/old_modules/Indices/R/compute_nino.R similarity index 100% rename from modules/Indices/R/compute_nino.R rename to modules/old_modules/Indices/R/compute_nino.R diff --git a/modules/Indices/R/correlation_eno.R b/modules/old_modules/Indices/R/correlation_eno.R similarity index 100% rename from modules/Indices/R/correlation_eno.R rename to modules/old_modules/Indices/R/correlation_eno.R diff --git a/modules/Indices/R/drop_indices_dims.R b/modules/old_modules/Indices/R/drop_indices_dims.R similarity index 100% rename from modules/Indices/R/drop_indices_dims.R rename to modules/old_modules/Indices/R/drop_indices_dims.R diff --git a/modules/Indices/R/plot_deterministic_forecast.R b/modules/old_modules/Indices/R/plot_deterministic_forecast.R similarity index 100% rename from modules/Indices/R/plot_deterministic_forecast.R rename to modules/old_modules/Indices/R/plot_deterministic_forecast.R diff --git a/modules/Statistics/Statistics.R b/modules/old_modules/Statistics/Statistics.R similarity index 100% rename from modules/Statistics/Statistics.R rename to modules/old_modules/Statistics/Statistics.R -- GitLab From 5072d5c897356960ffc1146674de7aafd3b109c0 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 16:55:30 +0200 Subject: [PATCH 26/78] fix conf gpfs --- conf/archive.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index c892f815..2dc9142b 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -48,10 +48,10 @@ gpfs: name: "UK MetOffice GloSea 6 (v6.01)" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system601-c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 62 hcst: 28 @@ -79,6 +79,7 @@ gpfs: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 50 hcst: 30 -- GitLab From 4aa5456b2293080be84862d31e12838153a8aada Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 17:32:46 +0200 Subject: [PATCH 27/78] fix typo archive and include multimodel metrics --- conf/archive.yml | 2 +- modules/Multimodel/Multimodel.R | 30 +++++++++++++++++++----------- recipe_tas.yml | 6 +++--- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 2dc9142b..b2237b46 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -297,7 +297,7 @@ esarchive: "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", - c "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", + "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", "tdps":"monthly_mean/tdps_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 2390395c..8b178033 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -19,7 +19,7 @@ library(yaml) recipe <- read_yaml("recipe_tas.yml") source("modules/Saving/R/get_dir.R") source("modules/Loading/Loading.R") -recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604162251/") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240605141443/") # DON'T KNOW HOW TO SOLVE LOGGER: recipe$Run$Loglevel <- 'INFO' recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" @@ -47,26 +47,34 @@ Multimodel <- function(recipe) { # Load data splitting by system individual members: if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { individual_memb <- load_multimodel(recipe) - } + } else { + individual_memb <- NULL + } # Always load ensemble mean and probabilities: source("modules/Multimodel/load_multimodel_mean.R") ensmean <- load_multimodel_mean(recipe) source("modules/Multimodel/load_multimodel_probs.R") probs <- load_multimodel_probs(recipe) - - - + if (recipe$Analysis$Datasets$Multimodel$approac == "weighted") { + stop("Implement the weighting method here") + # I imagine the probs and the ensmean are modified and returned. + multimodel_aux <- build_multimodel(data, recipe) + ensmean <- multimodel_aux$ensmean + probs <- multimodel_aux$prob + rm(multimodel_aux) + } + + # assessment: + source("modules/Multimodel/multimodel_metrics.R") + skill_metrics <- multimodel_metrics(recipe = recipe, + indv_membs = individual_memb, + ensmean = ensmean, + probs = probs) -############# TO REMOVE: - # Building the multi-model - multimodel_aux <- build_multimodel(data, recipe) - data <- multimodel_aux$data - prob <- multimodel_aux$prob - rm(multimodel_aux) # Saving multimodel if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { diff --git a/recipe_tas.yml b/recipe_tas.yml index 0c9dfb8c..64d71f64 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - # - {name: 'ECMWF-SEAS5.1'} - # - {name: 'Meteo-France-System7'} - name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 0c1bf345ceadb5918374826f46a4a6f73bc0cd64 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 6 Jun 2024 16:59:29 +0200 Subject: [PATCH 28/78] modify cat lims and improve multimodel --- modules/Crossval/Crossval_anomalies.R | 8 +- modules/Multimodel/Multimodel.R | 54 ++---- modules/Multimodel/load_multimodel.R | 17 +- modules/Multimodel/load_multimodel_mean.R | 36 ++-- modules/Multimodel/multimodel_metrics.R | 210 +++++++++++++++++++--- 5 files changed, 247 insertions(+), 78 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 829850d2..8fe9fa01 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -11,7 +11,7 @@ Crossval_anomalies <- function(recipe, data) { categories <- recipe$Analysis$Workflow$Probabilities$percentiles categories <- lapply(categories, function (x) { sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) + eval(parse(text = y))})}) ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs ## data dimensions @@ -201,6 +201,12 @@ Crossval_anomalies <- function(recipe, data) { probs_fcst <- list() probs_obs <- list() all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 8b178033..b52e7047 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -9,8 +9,6 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") source('modules/Multimodel/load_multimodel.R') -source('modules/Multimodel/load_multimodel_splitted.R') -source('modules/Multimodel/dims_multimodel.R') source('modules/Multimodel/build_multimodel.R') source('modules/Multimodel/clean_multimodel.R') library(startR) @@ -24,45 +22,29 @@ recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_2024060514144 recipe$Run$Loglevel <- 'INFO' recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" recipe$Run$logger <- logger <- log4r::logger(threshold = recipe$Run$Loglevel, - appenders = list(console_appender(layout = default_log_layout()), - file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) Multimodel <- function(recipe) { - # recipe: auto-s2s recipe as provided by read_yaml - ncores <- recipe$Analysis$ncores - cross.method <- recipe$Analysis$cross.method - if (is.null(cross.method)) { - cross.method <- 'leave-one-out' - } - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) - na.rm <- recipe$Analysis$remove_NAs - skill_metrics <- list() - requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, - ", | |,")[[1]] # Load data splitting by system individual members: if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { - individual_memb <- load_multimodel(recipe) - } else { - individual_memb <- NULL - } - # Always load ensemble mean and probabilities: - source("modules/Multimodel/load_multimodel_mean.R") - ensmean <- load_multimodel_mean(recipe) - - source("modules/Multimodel/load_multimodel_probs.R") - probs <- load_multimodel_probs(recipe) - if (recipe$Analysis$Datasets$Multimodel$approac == "weighted") { - stop("Implement the weighting method here") - # I imagine the probs and the ensmean are modified and returned. - multimodel_aux <- build_multimodel(data, recipe) - ensmean <- multimodel_aux$ensmean - probs <- multimodel_aux$prob - rm(multimodel_aux) + datos <- load_multimodel(recipe) + } else if (recipe$Analysis$Datasets$Multimodel$approach == 'mean') { + # In this case the ensemble dim is 1 for each model + # The functions in skill can still work + source("modules/Multimodel/load_multimodel_mean.R") + datos <- load_multimodel_mean(recipe) + source("modules/Multimodel/load_multimodel_probs.R") + probs <- load_multimodel_probs(recipe) + } else { #(recipe$Analysis$Datasets$Multimodel$approac == "weighted") + stop("Implement the weighting method here") + # I imagine the probs and the ensmean are modified and returned. + multimodel_aux <- build_multimodel(data, recipe) + ensmean <- multimodel_aux$ensmean + probs <- multimodel_aux$prob + rm(multimodel_aux) } # assessment: diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index b4cf8c25..22ad60c6 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -29,7 +29,7 @@ load_multimodel <- function(recipe) { # Find the saved data directory recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) + recipe$Analysis$Datasets$Multimodel$createFrom) if (tolower(recipe$Analysis$Output_format) == "scorecards") { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end @@ -50,10 +50,10 @@ load_multimodel <- function(recipe) { #------------------------------------------------------------------- hcst <- list() for (sys in exp.name) { - aux <- Start(dat = hcst.path, + hcst.path_aux <- gsub("$model$", gsub('\\.','', sys), hcst.path, fixed = T) + aux <- Start(dat = hcst.path_aux, var = variable, file_date = sdates$hcst, - model = gsub('\\.','', sys), time = 'all', latitude = 'all', latitude_reorder = Sort(), @@ -71,6 +71,7 @@ load_multimodel <- function(recipe) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) hcst <- append(hcst, list(as.s2dv_cube(aux))) + names(hcst)[length(hcst)] <- gsub('\\.','', sys) } ############################# #NOTE: NOT TESTED YET @@ -100,10 +101,11 @@ load_multimodel <- function(recipe) { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst <- list() for (sys in exp.name) { - aux <- Start(dat = fcst.path, + fcst.path_aux <- gsub("$model$", gsub('\\.','', sys), + fcst.path, fixed = T) + aux <- Start(dat = fcst.path_aux, var = variable, file_date = sdates$fcst, - model = gsub('\\.','', sys), time = 'all', latitude = 'all', latitude_reorder = Sort(), @@ -121,6 +123,7 @@ load_multimodel <- function(recipe) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) fcst <- append(fcst, list(as.s2dv_cube(aux))) + names(fcst)[length(fcst)] <- gsub('\\.','', sys) } ############################# #NOTE: NOT TESTED YET @@ -175,8 +178,8 @@ load_multimodel <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. - dates <- hcst$attrs$Dates - dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] # Get year and month for file_date dates_file <- sapply(dates, format, '%Y%m') diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/Multimodel/load_multimodel_mean.R index 7021438b..d23f9879 100644 --- a/modules/Multimodel/load_multimodel_mean.R +++ b/modules/Multimodel/load_multimodel_mean.R @@ -34,7 +34,8 @@ load_multimodel_mean <- function(recipe) { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) - filename <- paste0("scorecards_$model$_", ref.name, "_", variable, "_hcst_EM_", + filename <- paste0("scorecards_$model$_", ref.name, "_", + variable, "_hcst_EM_", hcst_start, "-", hcst_end, "_s", shortdate, ".nc") } else { filename <- "$var$_$file_date$.nc" @@ -45,9 +46,10 @@ load_multimodel_mean <- function(recipe) { fcst.path <- obs.path <- hcst.path fcst.path <- gsub("hcst", "fcst", fcst.path, fixed = T) - obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','',exp.name[1]), ref.name, - recipe$Analysis$Workflow$Calibration$method, - variable, + obs.path <- file.path(recipe$Run$output_dir, + gsub('\\.','',exp.name[1]), ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, paste0("scorecards_", gsub('\\.','',exp.name[1]), "_", ref.name, "_$var$_-obs_$file_date$__", hcst_start, "-", @@ -56,9 +58,9 @@ load_multimodel_mean <- function(recipe) { #------------------------------------------------------------------- hcst <- list() for (sys in exp.name) { - aux <- Start(dat = hcst.path, + hcst.path_aux <- gsub("$model$", gsub('\\.','', sys), hcst.path, fixed = T) + aux <- Start(dat = hcst.path_aux, var = 'hcst_EM', - model = gsub('\\.','', sys), sday = 'all', sweek = 'all', syear = 'all', @@ -69,11 +71,17 @@ load_multimodel_mean <- function(recipe) { longitude_reorder = circularsort, synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), -# metadata_dims = 'var', return_vars = list(latitude = 'dat', longitude = 'dat', time = 'dat'), retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube hcst <- append(hcst, list(as.s2dv_cube(aux))) names(hcst)[length(hcst)] <- sys } @@ -105,9 +113,10 @@ load_multimodel_mean <- function(recipe) { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst <- list() for (sys in exp.name) { - aux <- Start(dat = fcst.path, + fcst.path_aux <- gsub("$model$", gsub('\\.','', sys), + fcst.path, fixed = T) + aux <- Start(dat = fcst.path_aux, var = 'fcst_EM', - model = gsub('\\.','', sys), sday = 'all', sweek = 'all', syear = 'all', @@ -122,6 +131,13 @@ load_multimodel_mean <- function(recipe) { longitude = 'dat', time = 'dat'), retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube fcst <- append(fcst, list(as.s2dv_cube(aux))) names(fcst)[length(fcst)] <- sys } @@ -156,7 +172,7 @@ load_multimodel_mean <- function(recipe) { obs <- Start(dat = obs.path, var = variable, - file_date = sdates$hcst, + file_date = sdates$hcst, time = 'all', latitude = 'all', latitude_reorder = Sort(), diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R index 0eb0f7ac..3128f117 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/multimodel_metrics.R @@ -1,32 +1,45 @@ # One function to compute all the possible metrics or statistics -# indv_members is the list returned by load_multimodel - ## this is needed to compute crps/crpss but it not always will fit in mem -# ensmean is a list returned by load_multimodel_mean +# datos is a list returned by load_multimodel or load_multimodel_mean ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model # probs is a list returned by load_multimodel_probs ## this is needed to compute rps/rpss - +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") multimodel_metrics <- function(recipe, - indv_membs = NULL, - ensmean = NULL, + datos = NULL, probs = NULL) { ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs cross.method <- recipe$Analysis$cross.method + # Prepare indices for crossval + sdate_dim <- dim(datos[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha if (is.null(cross.method)) { cross.method <- 'leave-one-out' } requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] - - if (!is.null(indv_membs)) { - # conver $data elemenst to list to use multiApply: - indv_membs <- append(list(obs = indv_membs$obs$data), - lapply(indv_membs$hcst, function(x) x$data)) - names(indv_membs)[-1] <- sapply(recipe$Analysis$Datasets$System, - '[[', 'name') + skill_metrics <- list() + if (!is.null(datos)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = datos$obs$data), + lapply(datos$hcst, function(x) x$data)) + ## CRPS metrics only make sense in pool method: if (any(c('crps', 'crpss') %in% requested_metrics)) { - crps <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + crps <- Apply(datos, target_dims = c('syear', 'eCnsemble'), fun = function(obs, ...) { res <- abind(..., along = 2) names(dim(res)) <- names(dim(obs)) @@ -38,19 +51,14 @@ multimodel_metrics <- function(recipe, ncores = ncores)$output1 skill_metrics$crps <- crps # Build the reference forecast: - sdate_dim <- dim(indv_membs[[1]])['syear'] - cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, - amt.points = sdate_dim, - amt.points_cor = NULL) - tmp <- array(1:length(cross), c(syear = length(cross))) - ref_clim <- Apply(list(indv_membs$obs, tmp), + ref_clim <- Apply(list(datos$obs, tmp), target_dims = list(c('ensemble', 'syear'), NULL), function(x, y) { Subset(x, along = 'syear', indices = cross[[y]]$train.dexes, drop = T)}, ncores = ncores, output_dims = 'ensemble')$output1 - indv_membs <- append(list(ref = ref_clim), indv_membs) - crps_clim <- Apply(indv_membs[1:2], target_dims = c('syear', 'ensemble'), + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), fun = function(ref, obs) { obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') @@ -60,7 +68,7 @@ multimodel_metrics <- function(recipe, skill_metrics$crps_clim <- crps_clim - crpss <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), fun = function(ref, obs, ...) { res <- abind(..., along = 2) names(dim(res)) <- names(dim(obs)) @@ -72,8 +80,162 @@ multimodel_metrics <- function(recipe, ncores = ncores) skill_metrics$crpss <- crpss$crpss skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } - + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + if (is.null(probs)) { + # calculate probs from the ensemble distribution + # Firs compure tercile limits in crossval + lims <- list() + syears_ind <- list(t = tmp) + lims$hcst <- Apply(append(syears_ind, datos[-1]), + target_dims = append(list(NULL), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(t, ..., crossin, prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- Subset(res, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)}) + }, output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + crossin = cross, ncores = ncores) + lims$obs <- Apply(append(syears_ind, datos[1]), + target_dims = list(NULL, c('syear', 'ensemble')), + fun = function(t, obs, crossin, prob_lims) { + res <- Subset(obs, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)}) + }, output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + crossin = cross, ncores = ncores) + } + # Compute rps + return(skill_metrics) } -- GitLab From dd802e92b92858b27081279a35ede53dcc024198 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 6 Jun 2024 17:02:15 +0200 Subject: [PATCH 29/78] update conf archive --- conf/archive.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index b2237b46..22986cd2 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -23,7 +23,7 @@ gpfs: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/sfcWind_f6h/"} + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 50 hcst: 40 @@ -35,7 +35,7 @@ gpfs: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system8c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: @@ -91,7 +91,7 @@ gpfs: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc3/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: -- GitLab From 44d88a98c1ce6eeab12ee04ea2aa84eae4935ce5 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 6 Jun 2024 18:22:31 +0200 Subject: [PATCH 30/78] calculating multimodle crossval probs --- modules/Multimodel/multimodel_metrics.R | 113 +++++++++++++++++++----- 1 file changed, 90 insertions(+), 23 deletions(-) diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R index 3128f117..02488f76 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/multimodel_metrics.R @@ -14,6 +14,8 @@ source("modules/Crossval/R/tmp/Corr.R") source("modules/Crossval/R/tmp/Bias.R") source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils multimodel_metrics <- function(recipe, datos = NULL, probs = NULL) { @@ -205,37 +207,102 @@ multimodel_metrics <- function(recipe, categories <- lapply(categories, function (x) { sapply(x, function(y) { eval(parse(text = y))})}) + # For the pool method: if (is.null(probs)) { # calculate probs from the ensemble distribution # Firs compure tercile limits in crossval lims <- list() syears_ind <- list(t = tmp) lims$hcst <- Apply(append(syears_ind, datos[-1]), - target_dims = append(list(NULL), - lapply(1:length(datos[-1]), - function(x){c('syear', 'ensemble')})), - fun = function(t, ..., crossin, prob_lims) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- Subset(res, along = 'syear', - indices = crossin[[t]]$train.dexes, drop =T) - lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)}) - }, output_dims = lapply(categories, function(x){'cat'}), - prob_lims = categories, - crossin = cross, ncores = ncores) + target_dims = append(list(NULL), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(t, ..., crossin, prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- Subset(res, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) lims$obs <- Apply(append(syears_ind, datos[1]), - target_dims = list(NULL, c('syear', 'ensemble')), - fun = function(t, obs, crossin, prob_lims) { - res <- Subset(obs, along = 'syear', - indices = crossin[[t]]$train.dexes, drop =T) - lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)}) - }, output_dims = lapply(categories, function(x){'cat'}), - prob_lims = categories, - crossin = cross, ncores = ncores) + target_dims = list(NULL, c('syear', 'ensemble')), + fun = function(t, obs, crossin, prob_lims) { + res <- Subset(obs, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) + probs <- list() + probs$hcst <- list() + probs$fcst <- list(NULL) + probs$obs <- list() + for (ps in 1:length(categories)) { + lim <- lims$hcst[ps] + probs$hcst <- append(probs$hcst, + list(Apply(append(lim, datos[-1]), + target_dims = append(list(c('syear', 'cat')), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + lim <- lims$obs[ps] + probs$obs <- append(probs$obs, + list(Apply(append(lim, datos[1]), + target_dims = list(c('syear', 'cat'), + c('syear', 'ensemble')), + fun = function(lim, obs) { + GetProbs(data = obs, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) - } +# if (!is.null(datos$fcst)) { +## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD +# lim <- lims$fcst[ps] +# probs$hcst <- append(probs$fcst, +# list(Apply(append(lim, datos[-1]), +# target_dims = append(list(c('syear', 'cat')), +# lapply(1:length(datos[-1]), +# function(x){c('syear', 'ensemble')})), +# fun = function(lim, ...) { +# res <- abind(..., along = 2) +# names(dim(res)) <- c('syear', 'ensemble') +# GetProbs(data = res, time_dim = 'syear', +# memb_dim = 'ensemble', +# indices_for_quantiles = NULL, +# prob_thresholds = NULL, +# abs_thresholds = lim, +# bin_dim_abs = 'cat', +# weights = NULL, cross.val = FALSE)}, +# ncores = ncores)$output1)) +# } + } + # At this point all method can have probs calculated: + # RPS/RPS_CLIM/RPSS + + } # Compute rps return(skill_metrics) } -- GitLab From 33bb3c81fd8f6c2763621e9bb2515571d627d40b Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 7 Jun 2024 11:29:12 +0200 Subject: [PATCH 31/78] multimodel rps --- modules/Multimodel/Multimodel.R | 9 +- modules/Multimodel/multimodel_metrics.R | 127 ++++++++++++++++++------ 2 files changed, 104 insertions(+), 32 deletions(-) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index b52e7047..7b5a9d71 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -48,11 +48,14 @@ Multimodel <- function(recipe) { } # assessment: + # IF Fair is TRUE, the number of nmembs used to compute the probabilities + # need to be passed + source("modules/Multimodel/multimodel_metrics.R") skill_metrics <- multimodel_metrics(recipe = recipe, - indv_membs = individual_memb, - ensmean = ensmean, - probs = probs) + datos = datos, + probs = probs, + Fair = FALSE) diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R index 02488f76..7a612311 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/multimodel_metrics.R @@ -18,20 +18,25 @@ source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Visualization/Visualization.R") # to load .warning from utils multimodel_metrics <- function(recipe, datos = NULL, - probs = NULL) { + probs = NULL, + Fair = FALSE) { ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } # Prepare indices for crossval - sdate_dim <- dim(datos[[1]])['syear'] + sdate_dim <- dim(datos$hcst[[1]]$data)['syear'] cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, amt.points = sdate_dim, amt.points_cor = NULL) tmp <- array(1:length(cross), c(syear = length(cross))) alpha <- recipe$Analysis$alpha - if (is.null(cross.method)) { - cross.method <- 'leave-one-out' + if (is.null(alpha)) { + alpha <- 0.05 } + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] skill_metrics <- list() @@ -41,7 +46,7 @@ multimodel_metrics <- function(recipe, lapply(datos$hcst, function(x) x$data)) ## CRPS metrics only make sense in pool method: if (any(c('crps', 'crpss') %in% requested_metrics)) { - crps <- Apply(datos, target_dims = c('syear', 'eCnsemble'), + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), fun = function(obs, ...) { res <- abind(..., along = 2) names(dim(res)) <- names(dim(obs)) @@ -243,7 +248,7 @@ multimodel_metrics <- function(recipe, crossin = cross, ncores = ncores) probs <- list() probs$hcst <- list() - probs$fcst <- list(NULL) + probs$fcst <- list() probs$obs <- list() for (ps in 1:length(categories)) { lim <- lims$hcst[ps] @@ -278,31 +283,95 @@ multimodel_metrics <- function(recipe, weights = NULL, cross.val = FALSE)}, ncores = ncores)$output1)) -# if (!is.null(datos$fcst)) { -## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD -# lim <- lims$fcst[ps] -# probs$hcst <- append(probs$fcst, -# list(Apply(append(lim, datos[-1]), -# target_dims = append(list(c('syear', 'cat')), -# lapply(1:length(datos[-1]), -# function(x){c('syear', 'ensemble')})), -# fun = function(lim, ...) { -# res <- abind(..., along = 2) -# names(dim(res)) <- c('syear', 'ensemble') -# GetProbs(data = res, time_dim = 'syear', -# memb_dim = 'ensemble', -# indices_for_quantiles = NULL, -# prob_thresholds = NULL, -# abs_thresholds = lim, -# bin_dim_abs = 'cat', -# weights = NULL, cross.val = FALSE)}, -# ncores = ncores)$output1)) -# } - } - # At this point all method can have probs calculated: - # RPS/RPS_CLIM/RPSS + if (!is.null(datos$fcst)) { + ## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD + lims$fcst[ps] <- Apply(datos[-1], + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + ncores = ncores) + probs$fcst <- append(probs$fcst, + list(Apply(append(lims$fcst[ps], datos[-1]), + target_dims = append(list('cat'), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + } + } + # At this point all method can have probs calculated } + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(probs$obs[[ps]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = 0.1, # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } return(skill_metrics) } -- GitLab From ede7caf17cdcf60f2da0afa654cfe9f79dcf44f3 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 7 Jun 2024 12:07:58 +0200 Subject: [PATCH 32/78] fix rpss --- modules/Crossval/Crossval_skill.R | 4 ++-- modules/Multimodel/Multimodel.R | 2 +- .../Multimodel/{multimodel_metrics.R => Multimodel_skill.R} | 2 +- recipe_tas.yml | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) rename modules/Multimodel/{multimodel_metrics.R => Multimodel_skill.R} (99%) diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index f1356f3b..417ee33b 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -41,7 +41,7 @@ Crossval_skill <- function(recipe, data_crossval, categories <- recipe$Analysis$Workflow$Probabilities$percentiles categories <- lapply(categories, function (x) { sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) + eval(parse(text = y))})}) # TODO: distinguish between rpss and bss # if 1 percentile -> bss @@ -85,7 +85,7 @@ Crossval_skill <- function(recipe, data_crossval, time_dim = 'syear', memb_dim = NULL, cat_dim = 'cat', nmemb = nmemb, dat_dim = NULL, - prob_thresholds = 0.1, # un use param when providing probs + prob_thresholds = categories[[ps]], indices_for_clim = NULL, Fair = fair, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = na.rm, diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 7b5a9d71..4aa48924 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -51,7 +51,7 @@ Multimodel <- function(recipe) { # IF Fair is TRUE, the number of nmembs used to compute the probabilities # need to be passed - source("modules/Multimodel/multimodel_metrics.R") + source("modules/Multimodel/Multimodel_skill.R") skill_metrics <- multimodel_metrics(recipe = recipe, datos = datos, probs = probs, diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/Multimodel_skill.R similarity index 99% rename from modules/Multimodel/multimodel_metrics.R rename to modules/Multimodel/Multimodel_skill.R index 7a612311..242398dc 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/Multimodel_skill.R @@ -363,7 +363,7 @@ multimodel_metrics <- function(recipe, time_dim = 'syear', memb_dim = NULL, cat_dim = 'cat', nmemb = nmemb, dat_dim = NULL, - prob_thresholds = 0.1, # un use param when providing probs + prob_thresholds = categories[[ps]], # un use param when providing probs indices_for_clim = NULL, Fair = Fair, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = na.rm, diff --git a/recipe_tas.yml b/recipe_tas.yml index 64d71f64..4b6a3686 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} - #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System7'} + name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 289fc152390564b2ab8c71db99a5875f8743870c Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 7 Jun 2024 17:52:58 +0200 Subject: [PATCH 33/78] crossval multimodel --- full_ecvs_multimodel_anomalies.R | 47 +++ .../Crossval/Crossval_multimodel_anomalies.R | 339 ++++++++++++++++++ modules/Loading/R/load_seasonal.R | 3 +- recipe_tas.yml | 6 +- 4 files changed, 390 insertions(+), 5 deletions(-) create mode 100644 full_ecvs_multimodel_anomalies.R create mode 100644 modules/Crossval/Crossval_multimodel_anomalies.R diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R new file mode 100644 index 00000000..a6b07220 --- /dev/null +++ b/full_ecvs_multimodel_anomalies.R @@ -0,0 +1,47 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_tas.yml" +# Will it work with a single job? +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +models <- unlist(original_recipe$Analysis$Datasets$System) +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +for (sys in models) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + data <- Loading(recipe = recipe_aux) + data <- Units(recipe_aux, data) + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) +} +datos$obs <- data$obs +data <- datos +rm(list=list('datos')) + +source("modules/Crossval/Crossval_anomalies.R") +res <- Crossval_anomalies(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_skill.R") +skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R new file mode 100644 index 00000000..7548fd5c --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -0,0 +1,339 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + ano_obs_ev_res <- NULL + ano_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + ano_hcst_tr <- ano_hcst_ev <- ano_fcst <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # Store cross validation loops: + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + + # Anomalies of individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_hcst_tr <- append(ano_hcst_tr, + list(s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev <- append(ano_hcst_ev, + list(s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev_res[[sys]] <- abind(ano_hcst_ev_res[[sys]], + ano_hcst_ev[[sys]], + along = length(dim(ano_hcst_ev[[sys]])) + 1) + } + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), + ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + + #store results + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + ano_hcst_ev_res <- lapply(ano_hcst_ev_res, function(x) { + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_fcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + function(x) { + res <- abind(..., along = 2) + res <- mean(as.vector(res), na.rm = na.rm)} + ncores = ncores)$output1 +browser() + ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$hcst, agg = agg, + module = "statistics") + fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 42b74b16..414c88f3 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -18,7 +18,6 @@ load_seasonal <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq @@ -386,7 +385,7 @@ load_seasonal <- function(recipe) { ############################################################################ ############################################################################ - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/recipe_tas.yml b/recipe_tas.yml index 4b6a3686..5af89ec1 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - #- {name: 'ECMWF-SEAS5.1'} - #- {name: 'Meteo-France-System7'} - name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 3f9f49f7df7fe1b9264d48dcfa48c137d6a9835e Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 10 Jun 2024 14:48:09 +0200 Subject: [PATCH 34/78] multimodel version --- full_ecvs_multimodel_anomalies.R | 17 +- .../Crossval/Crossval_multimodel_anomalies.R | 310 ++++++++++-------- modules/Crossval/Crossval_multimodel_skill.R | 269 +++++++++++++++ 3 files changed, 449 insertions(+), 147 deletions(-) create mode 100644 modules/Crossval/Crossval_multimodel_skill.R diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index a6b07220..73970fa8 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -25,18 +25,21 @@ for (sys in models) { names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) } +data_aux <- data datos$obs <- data$obs data <- datos -rm(list=list('datos')) +rm(list = 'datos') -source("modules/Crossval/Crossval_anomalies.R") -res <- Crossval_anomalies(recipe = recipe, data = data) +source("modules/Crossval/Crossval_multimodel_anomalies.R") +res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) -source("modules/Crossval/Crossval_skill.R") -skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) +source("modules/Crossval/Crossval_multimodel_skill.R") +skill_metrics <- multimodel_metrics(recipe = original_recipe, + data = res, Fair = FALSE) + + +Visualization(recipe = recipe_aux, data = data_aux, skill_metrics = skill_metrics, significance = TRUE) -Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) ## Check logo size is appropiated for your maps: source("tools/add_logo.R") diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R index 7548fd5c..89afff40 100644 --- a/modules/Crossval/Crossval_multimodel_anomalies.R +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -140,22 +140,23 @@ Crossval_multimodel_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } - # Forecast anomalies: + ano_fcst <- lims_fcst <- NULL if (!is.null(data$fcst)) { - clim_fcst <- Apply(ano_hcst_ev_res, + ano_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), - function(x) { + function(...) { res <- abind(..., along = 2) - res <- mean(as.vector(res), na.rm = na.rm)} - ncores = ncores)$output1 -browser() - ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + clim <- mean(as.vector(res), na.rm = na.rm) + res <- res - clim}, + ncores = ncores, + output_dims = c('syear', 'ensemble'))$output1 # Terciles limits using the whole hindcast period: lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { + fun = function(..., prob_lims) { + res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + quantile(as.vector(res), ps, na.rm = na.rm)})}, output_dims = lapply(categories, function(x) {'cat'}), prob_lims = categories, ncores = ncores) @@ -163,12 +164,23 @@ browser() # Compute Probabilities for (ps in 1:length(categories)) { - hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'cat', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], - ncores = ncores) + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'cat'), + lapply(ano_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_ano_hcst_tr[[ps]]), + ano_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, bin_dim_abs = 'cat', @@ -176,144 +188,162 @@ browser() memb_dim = 'ensemble', abs_thresholds = lims_ano_obs_tr_res[[ps]], ncores = ncores) +fcst_probs <- NULL if (!is.null(data$fcst)) { - fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'cat', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = lims_fcst[[ps]], - ncores = ncores) +# fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), +# ano_fcst), +# target_dims = list('cat', +# c('syear', 'ensemble')), +# function(lims, fcst) { + fcst_probs[[ps]] <- GetProbs(ano_fcst, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], +#}, + ncores = ncores) } } + return(list(hcst = ano_hcst_ev_res, obs = ano_obs_ev_res, fcst = ano_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, fcst_probs), + ref_obs_tr = ano_obs_tr_res)) +} +##### TO SAVE edit the lines below: # Convert to s2dv_cubes the resulting anomalies - ano_hcst <- data$hcst - ano_hcst$data <- ano_hcst_ev_res - ano_obs <- data$obs - ano_obs$data <- ano_obs_ev_res +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res - info(recipe$Run$logger, - "#### Anomalies and Probabilities Done #####") - if (recipe$Analysis$Workflow$Anomalies$save != 'none') { - info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Anomalies/") +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") # Save forecast - if ((recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { - save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - } +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } # Save hindcast - if (recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') - } - # Save observation - if (recipe$Analysis$Workflow$Anomalies$save == 'all') { - save_observations(recipe = recipe, data_cube = ano_obs) - } - } +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } # Save probability bins - probs_hcst <- list() - probs_fcst <- list() - probs_obs <- list() - all_names <- NULL +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL # Make categories rounded number to use as names: - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) - for (ps in 1:length(categories)) { - for (perc in 1:(length(categories[[ps]]) + 1)) { - if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]) - } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]) - } else { - name_elem <- paste0("from_", categories[[ps]][perc-1], - "_to_", categories[[ps]][perc]) - } - probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), - probs_hcst) - probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), - probs_obs) - if (!is.null(data$fcst)) { - probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'cat', indices = perc, drop = 'all')), - probs_fcst) - } - all_names <- c(all_names, name_elem) - } - } - names(probs_hcst) <- all_names - if (!('var' %in% names(dim(probs_hcst[[1]])))) { - probs_hcst <- lapply(probs_hcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - names(probs_obs) <- all_names - if (!('var' %in% names(dim(probs_obs[[1]])))) { - probs_obs <- lapply(probs_obs, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } - if (!is.null(data$fcst)) { - names(probs_fcst) <- all_names - if (!('var' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - if (!('syear' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(syear = 1, dim(x)) - return(x)}) - } - } - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = probs_hcst, - data_cube = data$hcst, agg = agg, - type = "hcst") - save_probabilities(recipe = recipe, probs = probs_obs, - data_cube = data$hcst, agg = agg, - type = "obs") + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") # TODO Forecast - if (!is.null(probs_fcst)) { - save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, agg = agg, - type = "fcst") - } - } + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } # Save ensemble mean for multimodel option: - hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, - metrics = list(hcst_EM = - Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), - data_cube = data$hcst, agg = agg, - module = "statistics") - fcst_EM <- NULL - if (!is.null(data$fcst)) { - fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, - metrics = list(fcst_EM = - Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), - data_cube = data$fcst, agg = agg, - module = "statistics") - } - return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, - hcst.full_val = data$hcst, obs.full_val = data$obs, - hcst_EM = hcst_EM, fcst_EM = fcst_EM, - cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, - obs_tr = lims_ano_obs_tr_res), - probs = list(hcst_ev = hcst_probs_ev, - obs_ev = obs_probs_ev), - ref_obs_tr = ano_obs_tr_res)) -} + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} ## The result contains the inputs for Skill_full_crossval. diff --git a/modules/Crossval/Crossval_multimodel_skill.R b/modules/Crossval/Crossval_multimodel_skill.R new file mode 100644 index 00000000..326b9c39 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_skill.R @@ -0,0 +1,269 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +multimodel_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + skill_metrics <- list() + if (!is.null(data)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = data$obs), data$hcst) + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data$probs$obs[[ps]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } + return(skill_metrics) +} -- GitLab From f6d8b7c0ead927f63c6a49db24107609e73755a4 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 10 Jun 2024 15:12:41 +0200 Subject: [PATCH 35/78] fixes and renames --- full_ecvs_anomalies.R | 4 ++-- full_ecvs_multimodel_anomalies.R | 4 ++-- .../{Crossval_skill.R => Crossval_metris.R} | 2 +- ...model_skill.R => Crossval_multimodel_metrics.R} | 14 +++++++++----- modules/Visualization/R/plot_metrics.R | 1 + modules/{ => old_modules}/Multimodel/Multimodel.R | 0 .../Multimodel/Multimodel_skill.R | 0 .../Multimodel/build_multimodel.R | 0 .../Multimodel/clean_multimodel.R | 0 .../{ => old_modules}/Multimodel/load_multimodel.R | 0 .../Multimodel/load_multimodel_mean.R | 0 .../Multimodel/load_multimodel_probs.R | 0 12 files changed, 15 insertions(+), 10 deletions(-) rename modules/Crossval/{Crossval_skill.R => Crossval_metris.R} (99%) rename modules/Crossval/{Crossval_multimodel_skill.R => Crossval_multimodel_metrics.R} (96%) rename modules/{ => old_modules}/Multimodel/Multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/Multimodel_skill.R (100%) rename modules/{ => old_modules}/Multimodel/build_multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/clean_multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/load_multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/load_multimodel_mean.R (100%) rename modules/{ => old_modules}/Multimodel/load_multimodel_probs.R (100%) diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 1c873a0b..769db0e4 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -18,8 +18,8 @@ data_summary(data$obs, recipe) source("modules/Crossval/Crossval_anomalies.R") res <- Crossval_anomalies(recipe = recipe, data = data) -source("modules/Crossval/Crossval_skill.R") -skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 73970fa8..d1c535ec 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -33,8 +33,8 @@ rm(list = 'datos') source("modules/Crossval/Crossval_multimodel_anomalies.R") res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) -source("modules/Crossval/Crossval_multimodel_skill.R") -skill_metrics <- multimodel_metrics(recipe = original_recipe, +source("modules/Crossval/Crossval_multimodel_metrics.R") +skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, data = res, Fair = FALSE) diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_metris.R similarity index 99% rename from modules/Crossval/Crossval_skill.R rename to modules/Crossval/Crossval_metris.R index 417ee33b..dd31d23a 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_metris.R @@ -33,7 +33,7 @@ source("modules/Crossval/R/tmp/Eno.R") ## the recipe could be used to read the Percentiles ## if fair is TRUE, the nmemb used to compute the probabilities is needed ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -Crossval_skill <- function(recipe, data_crossval, +Crossval_metrics <- function(recipe, data_crossval, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha diff --git a/modules/Crossval/Crossval_multimodel_skill.R b/modules/Crossval/Crossval_multimodel_metrics.R similarity index 96% rename from modules/Crossval/Crossval_multimodel_skill.R rename to modules/Crossval/Crossval_multimodel_metrics.R index 326b9c39..7f6a7292 100644 --- a/modules/Crossval/Crossval_multimodel_skill.R +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -16,7 +16,7 @@ source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Visualization/Visualization.R") # to load .warning from utils -multimodel_metrics <- function(recipe, +Crossval_multimodel_metrics <- function(recipe, data = NULL, Fair = FALSE) { ncores <- recipe$Analysis$ncores @@ -50,9 +50,9 @@ multimodel_metrics <- function(recipe, names(dim(res)) <- names(dim(obs)) obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, time_dim = 'syear', - memb_dim = 'ensemble')}, + memb_dim = 'ensemble'))}, ncores = ncores)$output1 skill_metrics$crps <- crps # Build the reference forecast: @@ -67,8 +67,9 @@ multimodel_metrics <- function(recipe, fun = function(ref, obs) { obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble')}, + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble'))}, ncores = ncores)$output1 skill_metrics$crps_clim <- crps_clim @@ -265,5 +266,8 @@ multimodel_metrics <- function(recipe, skill_metrics$rpss_significance <- rpss$sign } } + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) return(skill_metrics) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 2df06e8b..3ade1a08 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -173,6 +173,7 @@ plot_metrics <- function(recipe, data_cube, metrics, } # Reorder dimensions +browser() metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. diff --git a/modules/Multimodel/Multimodel.R b/modules/old_modules/Multimodel/Multimodel.R similarity index 100% rename from modules/Multimodel/Multimodel.R rename to modules/old_modules/Multimodel/Multimodel.R diff --git a/modules/Multimodel/Multimodel_skill.R b/modules/old_modules/Multimodel/Multimodel_skill.R similarity index 100% rename from modules/Multimodel/Multimodel_skill.R rename to modules/old_modules/Multimodel/Multimodel_skill.R diff --git a/modules/Multimodel/build_multimodel.R b/modules/old_modules/Multimodel/build_multimodel.R similarity index 100% rename from modules/Multimodel/build_multimodel.R rename to modules/old_modules/Multimodel/build_multimodel.R diff --git a/modules/Multimodel/clean_multimodel.R b/modules/old_modules/Multimodel/clean_multimodel.R similarity index 100% rename from modules/Multimodel/clean_multimodel.R rename to modules/old_modules/Multimodel/clean_multimodel.R diff --git a/modules/Multimodel/load_multimodel.R b/modules/old_modules/Multimodel/load_multimodel.R similarity index 100% rename from modules/Multimodel/load_multimodel.R rename to modules/old_modules/Multimodel/load_multimodel.R diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/old_modules/Multimodel/load_multimodel_mean.R similarity index 100% rename from modules/Multimodel/load_multimodel_mean.R rename to modules/old_modules/Multimodel/load_multimodel_mean.R diff --git a/modules/Multimodel/load_multimodel_probs.R b/modules/old_modules/Multimodel/load_multimodel_probs.R similarity index 100% rename from modules/Multimodel/load_multimodel_probs.R rename to modules/old_modules/Multimodel/load_multimodel_probs.R -- GitLab From fb05eb5263c4b1702c7f5da7f957244ceea9ad7b Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Tue, 11 Jun 2024 11:54:41 +0200 Subject: [PATCH 36/78] use grid type from conf --- .gitignore | 2 + conf/archive.yml | 4 +- full_ecvs_anomalies.R | 2 +- recipe_tas.yml | 21 +++++---- recipe_tas_singl.yml | 105 ++++++++++++++++++++++++++++++++++++++++++ sunset.sh | 14 ++++++ 6 files changed, 135 insertions(+), 13 deletions(-) create mode 100644 recipe_tas_singl.yml create mode 100644 sunset.sh diff --git a/.gitignore b/.gitignore index f05c28f4..9244f19d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ ecsbatch.log* modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml +*.err +*.out diff --git a/conf/archive.yml b/conf/archive.yml index 22986cd2..6c5c7379 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -110,8 +110,8 @@ gpfs: "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} calendar: "standard" - reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" ######################################################################### esarchive: diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 769db0e4..dc83ebb7 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -5,7 +5,7 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_tas.yml" +#recipe_file <- "recipe_tas_singl.yml" recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/recipe_tas.yml b/recipe_tas.yml index 5af89ec1..2f995c9a 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,8 +11,9 @@ Analysis: flux: no Datasets: System: + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -26,15 +27,15 @@ Analysis: hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 3 # Mandatory, int: Last leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - latmin: 20 - latmax: 10 - lonmin: -20 - lonmax: 10 + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: "to_reference" + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -85,9 +86,9 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: esarchive - output_dir: /esarchive/scratch/nperez/git4/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git4/sunset/ # replace with the directory where your code is + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: diff --git a/recipe_tas_singl.yml b/recipe_tas_singl.yml new file mode 100644 index 00000000..6c509521 --- /dev/null +++ b/recipe_tas_singl.yml @@ -0,0 +1,105 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: '2021' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: all + Time_aggregation: + execute: no + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/sunset.sh b/sunset.sh new file mode 100644 index 00000000..7b2c3214 --- /dev/null +++ b/sunset.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH -n 8 +#SBATCH -t 01:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R + -- GitLab From 642d5da27f7acc4dd31fd006e1085d7b73d124e9 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 12 Jun 2024 17:28:21 +0200 Subject: [PATCH 37/78] full-crossval in MN5 --- full_ecvs_multimodel_anomalies.R | 1 + modules/Visualization/R/plot_metrics.R | 1 - recipe_tas.yml | 6 +++++- sunset.sh | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index d1c535ec..00e94036 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -30,6 +30,7 @@ datos$obs <- data$obs data <- datos rm(list = 'datos') +original_recipe$Analysis$ncores <- 32 source("modules/Crossval/Crossval_multimodel_anomalies.R") res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 3ade1a08..2df06e8b 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -173,7 +173,6 @@ plot_metrics <- function(recipe, data_cube, metrics, } # Reorder dimensions -browser() metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. diff --git a/recipe_tas.yml b/recipe_tas.yml index 2f995c9a..ede20ab1 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -14,6 +14,10 @@ Analysis: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea601'} + #- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-CanCM4i'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -23,7 +27,7 @@ Analysis: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '0501' - fcst_year: '2021' + fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months diff --git a/sunset.sh b/sunset.sh index 7b2c3214..c37bc9bb 100644 --- a/sunset.sh +++ b/sunset.sh @@ -1,5 +1,5 @@ #!/bin/bash -#SBATCH -n 8 +#SBATCH -n 16 #SBATCH -t 01:00:00 #SBATCH -J sunset_multimodel #SBATCH -o sunset_multimodel-%J.out -- GitLab From 4fb5d08213c21bfd05861d0ba4d7d7353ac44ce4 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 13 Jun 2024 17:54:12 +0200 Subject: [PATCH 38/78] run for individual and multimodel --- full_ecvs_multimodel_anomalies.R | 13 ++++++++++++- .../{Crossval_metris.R => Crossval_metrics.R} | 0 recipe_tas.yml | 11 ++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) rename modules/Crossval/{Crossval_metris.R => Crossval_metrics.R} (100%) diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 00e94036..1416b72a 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -11,15 +11,27 @@ recipe_file <- "recipe_tas.yml" original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) # Load datasets models <- unlist(original_recipe$Analysis$Datasets$System) + +original_recipe$Analysis$ncores <- 32 + recipe_aux <- original_recipe datos <- list() datos$hcst <- list() datos$fcst <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_anomalies.R") for (sys in models) { recipe_aux$Analysis$Datasets$System <- NULL recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) data <- Loading(recipe = recipe_aux) data <- Units(recipe_aux, data) + # verification individual models + product <- Crossval_anomalies(recipe = recipe_aux, data = data) + skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + Visualization(recipe = recipe_aux, data = data, + skill_metrics = skill_model, significance = TRUE) + ## end ver datos$hcst <- append(datos$hcst, list(data$hcst)) datos$fcst <- append(datos$fcst, list(data$fcst)) names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) @@ -30,7 +42,6 @@ datos$obs <- data$obs data <- datos rm(list = 'datos') -original_recipe$Analysis$ncores <- 32 source("modules/Crossval/Crossval_multimodel_anomalies.R") res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) diff --git a/modules/Crossval/Crossval_metris.R b/modules/Crossval/Crossval_metrics.R similarity index 100% rename from modules/Crossval/Crossval_metris.R rename to modules/Crossval/Crossval_metrics.R diff --git a/recipe_tas.yml b/recipe_tas.yml index ede20ab1..32862c16 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -13,7 +13,7 @@ Analysis: System: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - - {name: 'ECMWF-SEAS5.1'} + #- {name: 'ECMWF-SEAS5.1'} #- {name: 'UK-MetOffice-Glosea601'} #- {name: 'NCEP-CFSv2'} #- {name: 'DWD-GCFS2.1'} @@ -29,7 +29,7 @@ Analysis: sdate: '0501' fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: @@ -45,7 +45,7 @@ Analysis: Anomalies: compute: yes cross_validation: no - save: all + save: none Time_aggregation: execute: no Calibration: @@ -58,13 +58,13 @@ Analysis: cross_validation: yes Probabilities: percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. - save: all + save: none Indicators: index: no Visualization: plots: skill_metrics #forecast_ensemble_mean most_likely_terciles multi_panel: no - dots: both + #dots: both projection: Robinson file_format: 'PNG' #projection: robinson @@ -85,6 +85,7 @@ Analysis: calculate_diff: FALSE ncores: 4 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 Output_format: scorecards logo: yes Run: -- GitLab From 8cdde729e11bdbae8fe7519aef7ea75c300b2d22 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Jun 2024 17:37:33 +0200 Subject: [PATCH 39/78] pNAO calculation --- Crossval_NAO.R | 322 +++++ full_nao.R | 26 + modules/Crossval/Crossval_NAO.R | 374 +++++ modules/Crossval/Crossval_anomalies.R | 5 +- modules/Crossval/Crossval_metrics.R | 26 +- modules/Crossval/R/tmp/EOF.R | 293 ++++ modules/Crossval/R/tmp/GetProbs.R | 2 - modules/Crossval/R/tmp/NAO.R | 574 ++++++++ modules/Crossval/R/tmp/ProjectField.R | 272 ++++ modules/Crossval/R/tmp/Utils.R | 1885 +++++++++++++++++++++++++ recipe_nao_single.yml | 108 ++ recipe_tas_singl.yml | 8 +- 12 files changed, 3879 insertions(+), 16 deletions(-) create mode 100644 Crossval_NAO.R create mode 100644 full_nao.R create mode 100644 modules/Crossval/Crossval_NAO.R create mode 100644 modules/Crossval/R/tmp/EOF.R create mode 100644 modules/Crossval/R/tmp/NAO.R create mode 100644 modules/Crossval/R/tmp/ProjectField.R create mode 100644 modules/Crossval/R/tmp/Utils.R create mode 100644 recipe_nao_single.yml diff --git a/Crossval_NAO.R b/Crossval_NAO.R new file mode 100644 index 00000000..8fe9fa01 --- /dev/null +++ b/Crossval_NAO.R @@ -0,0 +1,322 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$hcst, agg = agg, + module = "statistics") + fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/full_nao.R b/full_nao.R new file mode 100644 index 00000000..43b1e2e3 --- /dev/null +++ b/full_nao.R @@ -0,0 +1,26 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_nao_single.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + + +source("modules/Crossval/Crossval_NAO.R") +res <- Crossval_NAO(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +# No visualisation available for region aggregated + diff --git a/modules/Crossval/Crossval_NAO.R b/modules/Crossval/Crossval_NAO.R new file mode 100644 index 00000000..9fb31918 --- /dev/null +++ b/modules/Crossval/Crossval_NAO.R @@ -0,0 +1,374 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Crossval/R/tmp/NAO.R") +source("modules/Crossval/R/tmp/ProjectField.R") +source("modules/Crossval/R/tmp/EOF.R") +source("modules/Crossval/R/tmp/Utils.R") + +Crossval_NAO <- function(recipe, data) { + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'region' + + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + nao_hcst_ev_res <- NULL + nao_obs_ev_res <- NULL + nao_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_nao_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_nao_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + + # compute category limits + lims_nao_hcst_tr <- Apply(nao_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_nao_obs_tr <- Apply(nao_obs_tr, target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + nao_hcst_ev_res <- abind(nao_hcst_ev_res, nao_hcst_ev, + along = length(dim(nao_hcst_ev)) + 1) + nao_obs_ev_res <- abind(nao_obs_ev_res, nao_obs_ev, + along = length(dim(nao_obs_ev)) + 1) + nao_obs_tr_res <- abind(nao_obs_tr_res, nao_obs_tr, + along = length(dim(nao_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_nao_hcst_tr_res[[ps]] <- abind(lims_nao_hcst_tr_res[[ps]], + lims_nao_hcst_tr[[ps]], + along = length(dim(lims_nao_hcst_tr[[ps]])) + 1) + lims_nao_obs_tr_res[[ps]] <- abind(lims_nao_obs_tr_res[[ps]], + lims_nao_obs_tr[[ps]], + along = length(dim(lims_nao_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(nao_hcst_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_tr_res)) <- c('ensemble', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + # To make crps_clim to work the reference forecast need to have same dims as obs: + nao_hcst_ev_res <- Subset(nao_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_ev_res <- Subset(nao_obs_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_tr_res <- Subset(nao_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_nao_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', + 'sweek', 'time', 'syear') + names(dim(lims_nao_obs_tr_res[[ps]])) <- c('cat', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + lims_nao_obs_tr_res[[ps]] <- Subset(lims_nao_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + ano_hcst <- Ano(data = data$hcst$data, clim = clim_hcst) + ano_obs <- Ano(data= data$obs$data, clim = clim_obs) + nao_fcst <- NAO(exp = ano_hcst, obs = ano_obs, exp_cor = ano_fcst, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(nao_fcst$exp, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(nao_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_hcst_tr_res[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(nao_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(nao_fcst$exp_cor, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes + nao_hcst <- data$hcst + nao_hcst$data <- nao_hcst_ev_res + nao_obs <- data$obs + nao_obs$data <- nao_obs_ev_res + + info(recipe$Run$logger, + "#### NAO and Probabilities Done #####") + if (recipe$Analysis$Workflow$Indices$NAO$save != 'none') { + info(recipe$Run$logger, "##### START SAVING NAO #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Indices$NAO$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = nao_fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Indices$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = nao_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { + save_observations(recipe = recipe, data_cube = nao_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + return(list(hcst = nao_hcst, obs = nao_obs, fcst = nao_fcst, + # Mean Bias for the NAO doesn't make sense, so + # not returning fullvals + #hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(hcst_tr = lims_nao_hcst_tr_res, + obs_tr = lims_nao_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = nao_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 8fe9fa01..3ff0f50e 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -125,7 +125,7 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } - +browser() # Forecast anomalies: if (!is.null(data$fcst)) { clim_hcst <- Apply(ano_hcst_ev_res, @@ -150,7 +150,8 @@ Crossval_anomalies <- function(recipe, data) { prob_thresholds = NULL, bin_dim_abs = 'cat', indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + memb_dim = 'ensemble', + abs_thresholds = lims_ano_hcst_tr_res[[ps]], ncores = ncores) obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index dd31d23a..1c592b15 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -140,14 +140,19 @@ Crossval_metrics <- function(recipe, data_crossval, skill_metrics$enscorr <- enscorr } if ('mean_bias' %in% requested_metrics) { - mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, - obs = data_crossval$obs.full_val$data, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = alpha, - ncores = ncores) - skill_metrics$mean_bias <- mean_bias$bias - skill_metrics$mean_bias_significance <- mean_bias$sig + if (!is.null(data_crossval$hcst.full_val$data)) { + mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, + obs = data_crossval$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } else { + info(recipe$Run$logger, + "Full values not available") + } } if ('enssprerr' %in% requested_metrics) { enssprerr <- SprErr(exp = data_crossval$hcst$data, @@ -176,6 +181,11 @@ Crossval_metrics <- function(recipe, data_crossval, skill_metrics$rmss <- rmss$rmss skill_metrics$rmss_significance <- rmss$sign } + if (is.null(data_crossval$hcst_EM)) { + data_crossval$hcst_EM <- MeanDims(data_crossval$hcst$data, + dims = 'ensemble', + drop = TRUE) + } if (any(c('std', 'standard_deviation') %in% requested_metrics)) { std_hcst <- Apply(data = data_crossval$hcst_EM, target_dims = 'syear', diff --git a/modules/Crossval/R/tmp/EOF.R b/modules/Crossval/R/tmp/EOF.R new file mode 100644 index 00000000..87795b66 --- /dev/null +++ b/modules/Crossval/R/tmp/EOF.R @@ -0,0 +1,293 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano / InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} + diff --git a/modules/Crossval/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R index 2a538892..fb2cda0c 100644 --- a/modules/Crossval/R/tmp/GetProbs.R +++ b/modules/Crossval/R/tmp/GetProbs.R @@ -212,7 +212,6 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - ############################### if (is.null(abs_thresholds)) { res <- Apply(data = list(data = data), @@ -298,7 +297,6 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', } } # quantiles: [bin-1, sdate] - # Probabilities probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] for (i_time in seq_len(dim(data)[1])) { diff --git a/modules/Crossval/R/tmp/NAO.R b/modules/Crossval/R/tmp/NAO.R new file mode 100644 index 00000000..255e2a9f --- /dev/null +++ b/modules/Crossval/R/tmp/NAO.R @@ -0,0 +1,574 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains some of the following items depending on the data inputs: +#'\item{exp}{ +#' A numeric array of hindcast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observation NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + # Check inputs + ## exp, obs, and exp_cor (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + add_member_back <- FALSE + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (!all(space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!all(space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } + ## exp and obs (2) + #TODO: Add checks for exp_cor + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(obs)) { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(obs)) { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.") + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } + + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } + + if (is.null(exp_cor)) { + + for (tt in 1:ntime) { # cross-validation + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] + } + + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } + # add_member_back + if (add_member_back) { + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) + } + + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) + } +} + diff --git a/modules/Crossval/R/tmp/ProjectField.R b/modules/Crossval/R/tmp/ProjectField.R new file mode 100644 index 00000000..efa35dc3 --- /dev/null +++ b/modules/Crossval/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") + } + if (!'wght' %in% names(eof)) { + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in seq_along(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + + diff --git a/modules/Crossval/R/tmp/Utils.R b/modules/Crossval/R/tmp/Utils.R new file mode 100644 index 00000000..cd7a1e10 --- /dev/null +++ b/modules/Crossval/R/tmp/Utils.R @@ -0,0 +1,1885 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + x > 0 && attributes(x)$match.length == nchar(name) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste0(output, part) + } else { + if (part %in% names(replace_values)) { + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (any(grep("^http", filename))) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop("Error: The variable", namevar, "is not defined in the file", filename) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (any(grep('xsize', x))) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (any(grep('ysize', x))) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360 / common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360 / common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop("Error: Couldn't find the mask file", mask[['path']]) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) + } + } else { + if (length(vars_in_mask) != 1) { + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']]) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- seq_along(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[rev(seq_along(lat_indices))] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- seq_along(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(1) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- NULL + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (!all(is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- seq_along(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- seq_along(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- rev(seq_along(sub_lat)) + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in seq_along(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL + found_value <- NULL + for (i in seq_along(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!is(res, "try-error")) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(seq_along(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- NULL + new_dims2 <- NULL + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in seq_along(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim + } + } + + } else if (type %in% c('obs', 'hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else { + stop('type must be dcpp, hist or obs') + } + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(seq_along(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in seq_along(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/recipe_nao_single.yml b/recipe_nao_single.yml new file mode 100644 index 00000000..a2b3cb32 --- /dev/null +++ b/recipe_nao_single.yml @@ -0,0 +1,108 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: '2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + # type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: all + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive #gpfs + output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl.yml b/recipe_tas_singl.yml index 6c509521..9dc99cbc 100644 --- a/recipe_tas_singl.yml +++ b/recipe_tas_singl.yml @@ -35,7 +35,7 @@ Analysis: lonmax: 359.9 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -86,9 +86,9 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: gpfs - output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs - code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + filesystem: esarchive #gpfs + output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: -- GitLab From 333bcd545c6dda8be20a882f36b62a8c5855fe1f Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 27 Jun 2024 09:10:49 +0200 Subject: [PATCH 40/78] capitals for NAO --- full_nao.R => full_NAO.R | 2 +- recipe_nao_single.yml => recipe_NAO_single.yml | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename full_nao.R => full_NAO.R (95%) rename recipe_nao_single.yml => recipe_NAO_single.yml (100%) diff --git a/full_nao.R b/full_NAO.R similarity index 95% rename from full_nao.R rename to full_NAO.R index 43b1e2e3..481cbc0c 100644 --- a/full_nao.R +++ b/full_NAO.R @@ -5,7 +5,7 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_nao_single.yml" +#recipe_file <- "recipe_NAO_single.yml" recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/recipe_nao_single.yml b/recipe_NAO_single.yml similarity index 100% rename from recipe_nao_single.yml rename to recipe_NAO_single.yml -- GitLab From 368c5d8e0895241eaaae3fdb63fbe517c0ade492 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 27 Jun 2024 17:34:30 +0200 Subject: [PATCH 41/78] NAO working on MN5 trying plot_NAO and Calibration --- full_NAO.R | 13 +- modules/Crossval/Crossval_Calibration.R | 97 ++++++++ modules/Crossval/Crossval_NAO.R | 1 + plot_NAO.R | 307 ++++++++++++++++++++++++ recipe_NAO_single.yml | 20 +- sunset.sh | 2 +- 6 files changed, 423 insertions(+), 17 deletions(-) create mode 100644 modules/Crossval/Crossval_Calibration.R create mode 100644 plot_NAO.R diff --git a/full_NAO.R b/full_NAO.R index 481cbc0c..b0db00df 100644 --- a/full_NAO.R +++ b/full_NAO.R @@ -3,11 +3,11 @@ source("modules/Loading/Loading.R") source("modules/Saving/Saving.R") source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -#recipe_file <- "recipe_NAO_single.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO_single.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) # Load datasets data <- Loading(recipe) data <- Units(recipe, data) @@ -18,6 +18,9 @@ data_summary(data$obs, recipe) source("modules/Crossval/Crossval_NAO.R") res <- Crossval_NAO(recipe = recipe, data = data) +source("plot_NAO.R") +plot_NAO(recipe = recipe, nao = res, data = data) + source("modules/Crossval/Crossval_metrics.R") skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R new file mode 100644 index 00000000..c425a84c --- /dev/null +++ b/modules/Crossval/Crossval_Calibration.R @@ -0,0 +1,97 @@ +# take the output of Flor/s2s/subseasonal_loading.R + +source("../git/sunset/modules/Crossval/R/tmp/GetProbs.R") + +Crossval_calibration <- function(recipe, data) { + + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + + + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + cal_hcst_ev_res <- NULL + cal_obs_ev_res <- NULL + cal_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_cal_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_cal_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- CST_Subset(data$obs, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- CST_Subset(data$hcst, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- CST_Subset(data$hcst, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- CST_Subset(data$obs, along = 'syear', + indices = cross[[t]]$eval.dexes) + + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4a191e9a0362b56a5f0c/R/CST_MergeDims.R") + + hcst_tr <- CST_MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs_tr <- CST_MergeDims(obs_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst_ev <- CST_MergeDims(hcst_ev, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + } + cal_hcst_tr <- CST_Calibration(exp = hcst_tr, obs = obs_tr, + memb_dim = 'ensemble', sdate_dim = 'syear', + ncores = ncores) + cal_hcst_ev <- CST_Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + memb_dim = 'ensemble', sdate_dim = 'syear', + ncores = ncores) + + lims_cal_hcst_tr <- Apply(cal_hcst_tr$data, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_cal_obs_tr <- Apply(obs_tr$data, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + + + + +} diff --git a/modules/Crossval/Crossval_NAO.R b/modules/Crossval/Crossval_NAO.R index 9fb31918..d4882f3e 100644 --- a/modules/Crossval/Crossval_NAO.R +++ b/modules/Crossval/Crossval_NAO.R @@ -178,6 +178,7 @@ Crossval_NAO <- function(recipe, data) { along = 'unneeded', indices = 1, drop = 'selected') } # Forecast anomalies: + nao_fcst <- NULL if (!is.null(data$fcst)) { clim_hcst <- Apply(data$hcst$data, target_dims = c('syear', 'ensemble'), diff --git a/plot_NAO.R b/plot_NAO.R new file mode 100644 index 00000000..8f01c45a --- /dev/null +++ b/plot_NAO.R @@ -0,0 +1,307 @@ +plot_NAO <- function(recipe, nao, data) { + +# Read variable long_name to plot it + conf <- yaml::read_yaml("conf/variable-dictionary.yml") + var_name <- conf$vars[[which(names(conf$vars) == + recipe$Analysis$Variables$name)]]$long + plot_ts <- TRUE + plot_sp <- TRUE + alpha <- 0.05 + lons <- data$hcst$coords$longitude + lats <- data$hcst$coords$latitude + nao_region <- c(lonmin = -80, lonmax = 40, + latmin = 20, latmax = 80) + + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + # Use startdates param from SaveExp to correctly name the files: + if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { + file_dates <- Apply(data$hcst$attrs$source_files, target_dim = NULL, + fun = function(x) { + pos <- which(strsplit(x, "")[[1]] == ".") + res <- substr(x, pos-8, pos-1) + })$output1 + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + file_dates <- paste0('s', + recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) + } + + if (plot_ts) { + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + source("modules/old_modules/Indices/R/plot_deterministic_forecast.R") + for (tstep in 1:dim(nao$hcst$data)['time']) { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + obs <- Subset(nao$obs$data, along = 'time', ind = tstep) + exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <-gsub(".", "", recipe$Analysis$Datasets$System$name) + } + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste("NAO Index\n", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n") + xlabs <- as.numeric(substr(file_dates, 1, 4)) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste("NAO Index\n", + "Lead time", fmonth, + " / Start dates", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Lead time: ", fmonth, "\n") + xlabs <- file_dates + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + plot_deterministic_forecast(obs, exp, + time_dim = 'syear', + member_dim = 'ensemble', style = 'boxplot', + xlabs = xlabs, + title = toptitle, fileout = plotfile, + caption = caption, caption_line = 6.5, + legend_text = c( + recipe$Analysis$Datasets$Reference$name, + recipe$Analysis$Datasets$System$name)) + } + } + if (plot_sp) { + ## TODO: To be removed when s2dv is released: + source("modules/Visualization/R/tmp/PlotRobinson.R") + source("modules/old_modules/Indices/R/correlation_eno.R") + source("modules/Visualization/R/get_proj_code.R") + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + # Get correct code for stereographic projection + projection_code <- get_proj_code(proj_name = "stereographic") + correl_obs <- Apply(list(data$obs$data, nao$obs$data), + target_dims = 'syear', fun = .correlation_eno, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE, + ncores = recipe$Analysis$ncores) + correl_hcst <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + x <- apply(x, 1, mean, na.rm = TRUE) + y <- apply(y, 1, mean, na.rm = TRUE) + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + correl_hcst_full <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x,y) { + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + + for (tstep in 1:dim(nao$obs$data)['time']) { + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + ## Observations + map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date: month ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <- gsub(".", "", recipe$Analysis$Datasets$System$name) + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'polygon', + toptitle = toptitle, crop_coastlines = nao_region, + caption = caption, mask = sig, bar_extra_margin = c(4,0,4,0), + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + ## Ensemble-mean + map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "NAO Index -", var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + # Full hcst corr + map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + } # end tstep loop + } +} diff --git a/recipe_NAO_single.yml b/recipe_NAO_single.yml index a2b3cb32..aaf166e2 100644 --- a/recipe_NAO_single.yml +++ b/recipe_NAO_single.yml @@ -23,9 +23,9 @@ Analysis: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '0501' - fcst_year: '2023' + fcst_year: #'2023' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months Region: @@ -35,14 +35,12 @@ Analysis: lonmax: 40 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt - # type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" - #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt Workflow: Anomalies: compute: yes cross_validation: no - save: all + save: none Time_aggregation: execute: no Indices: @@ -52,7 +50,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff save: 'all' cross_validation: yes Probabilities: @@ -82,16 +80,16 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 4 # Optional, int: number of cores, defaults to 1 + ncores: 16 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: scorecards logo: yes Run: Loglevel: INFO Terminal: yes - filesystem: esarchive #gpfs - output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: diff --git a/sunset.sh b/sunset.sh index c37bc9bb..7f913f15 100644 --- a/sunset.sh +++ b/sunset.sh @@ -10,5 +10,5 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From d9a4b27f881f4eab878211a32cb7e543c869c69d Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 24 Jul 2024 14:40:57 +0200 Subject: [PATCH 42/78] fix folder names and add fcst categories saving --- modules/Crossval/Crossval_anomalies.R | 56 +++++++++++++++++++++------ 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 3ff0f50e..cc41ffae 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -125,23 +125,60 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } -browser() + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Forecast anomalies: if (!is.null(data$fcst)) { - clim_hcst <- Apply(ano_hcst_ev_res, + clim_hcst <- Apply(data$hcst$data, target_dims = c('syear', 'ensemble'), mean, na.rm = na.rm, ncores = ncores)$output1 data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + hcst_ano <- Ano(data = data$hcst$data, clim = clim_hcst) # Terciles limits using the whole hindcast period: - lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, output_dims = lapply(categories, function(x) {'cat'}), prob_lims = categories, ncores = ncores) + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + obs_ano <- Ano(data = data$obs$data, clim = clim_obs) + lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + tmp_lims2 <- list() + + for(ps in 1:length(categories)) { + tmp_lims3 <- drop(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['cat']) { + tmp_lims <- tmp_lims3[l,,,] + if (!('var' %in% names(dim(tmp_lims)))) { + dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) + } + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) + } + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + } } # Compute Probabilities @@ -180,8 +217,8 @@ browser() "#### Anomalies and Probabilities Done #####") if (recipe$Analysis$Workflow$Anomalies$save != 'none') { info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Anomalies/") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") # Save forecast if ((recipe$Analysis$Workflow$Anomalies$save %in% c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { @@ -202,11 +239,6 @@ browser() probs_fcst <- list() probs_obs <- list() all_names <- NULL - # Make categories rounded number to use as names: - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { @@ -292,8 +324,8 @@ browser() return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, hcst_EM = hcst_EM, fcst_EM = fcst_EM, - cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, - obs_tr = lims_ano_obs_tr_res), + #cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), probs = list(hcst_ev = hcst_probs_ev, obs_ev = obs_probs_ev), ref_obs_tr = ano_obs_tr_res)) -- GitLab From 5c4f0d91999dad3cb98963db833e6409a9be2d03 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Mon, 29 Jul 2024 15:55:25 +0200 Subject: [PATCH 43/78] UKMO to test, NAO multimodel under dev --- full_ecvs_multimodel_anomalies.R | 11 +- full_multimodel_NAO.R | 49 ++++++++ modules/Crossval/Crossval_Calibration.R | 4 +- modules/Crossval/Crossval_multimodel_NAO.R | 139 +++++++++++++++++++++ modules/Loading/R/dates2load.R | 19 ++- recipe_NAO.yml | 110 ++++++++++++++++ recipe_tas.yml | 16 +-- recipe_tas_decadal.yml | 116 +++++++++++++++++ sunset.sh | 12 +- 9 files changed, 456 insertions(+), 20 deletions(-) create mode 100644 full_multimodel_NAO.R create mode 100644 modules/Crossval/Crossval_multimodel_NAO.R create mode 100644 recipe_NAO.yml create mode 100644 recipe_tas_decadal.yml diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 1416b72a..4f423764 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -1,4 +1,4 @@ - +library(pryr) source("modules/Loading/Loading.R") source("modules/Saving/Saving.R") source("modules/Units/Units.R") @@ -12,7 +12,6 @@ original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) # Load datasets models <- unlist(original_recipe$Analysis$Datasets$System) -original_recipe$Analysis$ncores <- 32 recipe_aux <- original_recipe datos <- list() @@ -20,9 +19,11 @@ datos$hcst <- list() datos$fcst <- list() source("modules/Crossval/Crossval_metrics.R") source("modules/Crossval/Crossval_anomalies.R") +source("modules/Aggregation/Aggregation.R") for (sys in models) { recipe_aux$Analysis$Datasets$System <- NULL recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) data <- Units(recipe_aux, data) # verification individual models @@ -36,6 +37,8 @@ for (sys in models) { datos$fcst <- append(datos$fcst, list(data$fcst)) names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + gc() + print(mem_used()) } data_aux <- data datos$obs <- data$obs @@ -49,13 +52,13 @@ source("modules/Crossval/Crossval_multimodel_metrics.R") skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, data = res, Fair = FALSE) - +recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' Visualization(recipe = recipe_aux, data = data_aux, skill_metrics = skill_metrics, significance = TRUE) ## Check logo size is appropiated for your maps: source("tools/add_logo.R") -add_logo(recipe, "rsz_rsz_bsc_logo.png") +add_logo(recipe_aux, "rsz_rsz_bsc_logo.png") # scorecards computation: diff --git a/full_multimodel_NAO.R b/full_multimodel_NAO.R new file mode 100644 index 00000000..e53cd199 --- /dev/null +++ b/full_multimodel_NAO.R @@ -0,0 +1,49 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO.yml" +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +models <- unlist(original_recipe$Analysis$Datasets$System) + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +NAO_ind <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_NAO.R") +for (sys in models[1:2]) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # Load datasets + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + # Verification individual models + product <- Crossval_NAO(recipe = recipe_aux, data = data) + source("plot_NAO.R") + plot_NAO(recipe = recipe_aux, nao = product, data = data) + + skill_model <- Crossval_metrics(recipe = recipe_aux, + data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + NAO_ind <- append(NAO_ind, list(product)) + names(NAO_ind)[length(NAO_ind)] <- gsub('\\.','', sys) +} +# No visualisation available for region aggregated +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') +source("modules/Crossval/Crossval_multimodel_NAO.R") +res <- Crossval_multimodel_NAO(recipe = original_recipe, data = data) + + diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index c425a84c..203b1448 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -71,9 +71,11 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 } cal_hcst_tr <- CST_Calibration(exp = hcst_tr, obs = obs_tr, memb_dim = 'ensemble', sdate_dim = 'syear', - ncores = ncores) + eval.method = 'in-sample', + ncores = ncores) cal_hcst_ev <- CST_Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', ncores = ncores) lims_cal_hcst_tr <- Apply(cal_hcst_tr$data, target_dims = c('syear', 'ensemble'), diff --git a/modules/Crossval/Crossval_multimodel_NAO.R b/modules/Crossval/Crossval_multimodel_NAO.R new file mode 100644 index 00000000..b82aea16 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_NAO.R @@ -0,0 +1,139 @@ +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_NAO <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + # Define output dims and names: + nao_hcst_tr_aux <- list() + nao_hcst_ev_aux <- list() + + nao_obs_tr_res <- list() + nao_hcst_ev_res <- list() + # Cross-val loop starts + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + nao_hcst_tr_res <- list() + nao_hcst_ev_res <- list() + + # Observations + obs_tr <- Subset(data $obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # NAO for individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, + exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + # store results: + nao_hcst_tr_res <- append(nao_hcst_tr_res, list(nao_hcst_tr)) + names(nao_hcst_tr_res)[length(nao_hcst_tr_res)] <- names(data$hcst)[[sys]] + nao_hcst_ev_res <- append(nao_hcst_ev_res, list(nao_hcst_ev)) + names(nao_hcst_ev_res)[length(nao_hcst_ev_res)] <- names(data$hcst)[[sys]] + } + if (t == 1) { + nao_hcst_ev_aux <- nao_hcst_ev_res + } else { + for (sys in 1:length(data$hcst)) { + nao_hcst_ev_aux[[sys]] <- abind(nao_hcst_ev_aux[[sys]], + nao_hcst_ev_res[[sys]], + along = length(dim(nao_hcst_ev_res[[sys]])) + 1) + nao_hcst_tr_aux[[sys]] <- abind(nao_hcst_tr_aux[[sys]], + nao_hcst_tr_res[[sys]], + along = length(dim(nao_hcst_tr_res[[sys]])) + 1) + } + } + } + nao_hcst_ev_aux <- lapply(1:length(nao_hcst_ev_aux), function(x) { + names(dim(nao_hcst_ev_aux[[x]])) <- c(names(dim(nao_hcst_ev_res[[x]])), 'sample') + return(nao_hcst_ev_aux[[x]])}) + nao_hcst_tr_aux <- lapply(1:length(nao_hcst_tr_aux), function(x) { + names(dim(nao_hcst_tr_aux[[x]])) <- c(names(dim(nao_hcst_tr_res[[x]])), 'sample') + return(nao_hcst_tr_aux[[x]])}) + + # Observed NAO should be the same for all models. + nao_obs_ev_res <- append(nao_obs_ev_res, list(nao_obs_ev)) + names(nao_obs_ev_res)[length(nao_obs_ev_res)] <- names(data$hcst)[[1]] + + diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index f084ce62..15242c0a 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -18,15 +18,28 @@ library(lubridate) dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq + system_name <- recipe$Analysis$Datasets$System$name recipe <- recipe$Analysis$Time # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) - + + # Note: UKMO models are always missing the start date of January 1993. + # This piece of code removes that date from the start dates. + UKMO_MODELS <- c("UK-MetOffice-Glosea600", "UK-MetOffice-Glosea601", + "UKMO-System602") + if ((system_name %in% UKMO_MODELS) && ("19930101" %in% file_dates)) { + file_dates <- file_dates[-(which(file_dates == '19930101'))] + warn(logger, + paste("January 1993 start date is not available for", system_name, + "and has been removed from the list of start dates.")) + } + + # Add dimensions to the vector if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } - # fcst dates (if fcst_year empty it creates an empty object) + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { @@ -38,7 +51,6 @@ dates2load <- function(recipe, logger) { paste("fcst_year empty in the recipe, creating empty fcst object...")) } return(list(hcst = file_dates, fcst = file_dates.fcst)) - ## TODO: document header of fun } # adds the correspondent dims to each sdate array @@ -49,3 +61,4 @@ dates2load <- function(recipe, logger) { dim(data) <- default_dims return(data) } + diff --git a/recipe_NAO.yml b/recipe_NAO.yml new file mode 100644 index 00000000..27687ac8 --- /dev/null +++ b/recipe_NAO.yml @@ -0,0 +1,110 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'UK-MetOffice-Glosea601'} + - {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-CanCM4i'} + #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas.yml b/recipe_tas.yml index 32862c16..f3e2e7c3 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -13,11 +13,11 @@ Analysis: System: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - #- {name: 'ECMWF-SEAS5.1'} - #- {name: 'UK-MetOffice-Glosea601'} - #- {name: 'NCEP-CFSv2'} - #- {name: 'DWD-GCFS2.1'} - #- {name: 'ECCC-CanCM4i'} + - {name: 'ECMWF-SEAS5.1'} + - {name: 'UK-MetOffice-Glosea601'} + - {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-CanCM4i'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -26,7 +26,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0501' + sdate: '0301' fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' @@ -53,7 +53,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff save: 'all' cross_validation: yes Probabilities: @@ -83,7 +83,7 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 4 # Optional, int: number of cores, defaults to 1 + ncores: 100 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE alpha: 0.05 Output_format: scorecards diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml new file mode 100644 index 00000000..09f88567 --- /dev/null +++ b/recipe_tas_decadal.yml @@ -0,0 +1,116 @@ +Description: + Author: nperez + Info: Test decadal 25-07-2024 + +Analysis: + Horizon: decadal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + - {name: 'EC-Earth3-i4'} + - {name: 'HadGEM3-GC31-MM'} + - {name: 'BCC-CSM2-MR'} + - {name: 'CanESM5'} + - {name: 'CMCC-CM2-SR5'} + - {name: 'FGOALS-f3-L'} + - {name: 'IPSL-CM6A-LR'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2021' + hcst_start: '1980' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 120 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Time_aggregation: + execute: yes + method: average + user_def: + Y1: [1, 12] # aggregate from 1 to 3 forecast times + Y2: [13, 24] + Y2-Y5: [13, 60] + Y5-Y10: [49, 120] + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + #dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 100 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/sunset.sh b/sunset.sh index 7f913f15..34b7d05f 100644 --- a/sunset.sh +++ b/sunset.sh @@ -1,14 +1,18 @@ #!/bin/bash -#SBATCH -n 16 -#SBATCH -t 01:00:00 +#SBATCH -n 112 +#SBATCH -N 1 +#SBATCH -t 24:00:00 #SBATCH -J sunset_multimodel #SBATCH -o sunset_multimodel-%J.out #SBATCH -e sunset_multimodel-%J.err #SBATCH --account=bsc32 -#SBATCH --qos=acc_bsces +#SBATCH --qos=gp_bsces +#SBATCH --constraint=highmem + +#### --qos=acc_bsces source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_NAO.R #ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From c3e04411dd3c49f86f5f087c46e853db69cb4eb4 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Mon, 29 Jul 2024 18:33:22 +0200 Subject: [PATCH 44/78] revert UKMO 0101 --- modules/Loading/R/dates2load.R | 19 ++++--------------- recipe_tas.yml | 2 +- recipe_tas_singl.yml | 20 ++++++++++---------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 15242c0a..69c4daa3 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -18,28 +18,15 @@ library(lubridate) dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq - system_name <- recipe$Analysis$Datasets$System$name recipe <- recipe$Analysis$Time # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) - - # Note: UKMO models are always missing the start date of January 1993. - # This piece of code removes that date from the start dates. - UKMO_MODELS <- c("UK-MetOffice-Glosea600", "UK-MetOffice-Glosea601", - "UKMO-System602") - if ((system_name %in% UKMO_MODELS) && ("19930101" %in% file_dates)) { - file_dates <- file_dates[-(which(file_dates == '19930101'))] - warn(logger, - paste("January 1993 start date is not available for", system_name, - "and has been removed from the list of start dates.")) - } - - # Add dimensions to the vector + if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } - # fcst dates (if fcst_year empty it creates an empty object) + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { @@ -51,6 +38,8 @@ dates2load <- function(recipe, logger) { paste("fcst_year empty in the recipe, creating empty fcst object...")) } return(list(hcst = file_dates, fcst = file_dates.fcst)) + ## TODO: document header of fun + } # adds the correspondent dims to each sdate array diff --git a/recipe_tas.yml b/recipe_tas.yml index f3e2e7c3..4ec92a78 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -26,7 +26,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0301' + sdate: '0501' fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' diff --git a/recipe_tas_singl.yml b/recipe_tas_singl.yml index 9dc99cbc..be20a311 100644 --- a/recipe_tas_singl.yml +++ b/recipe_tas_singl.yml @@ -14,7 +14,7 @@ Analysis: #- {name: 'ECMWF-SEAS5.1'} #- {name: 'Meteo-France-System8'} #- {name: 'CMCC-SPS3.5'} - name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: no approach: pooled # Mandatory, bool: Either yes/true or no/false @@ -22,20 +22,20 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0501' - fcst_year: '2021' + sdate: '0101' + fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - latmin: -90 - latmax: 90 + latmin: -10 + latmax: 10 lonmin: 0 - lonmax: 359.9 + lonmax: 40 #359.9 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -86,9 +86,9 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: esarchive #gpfs - output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: -- GitLab From c89ef132e2d030afb72769bdb784d084582839d0 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 31 Jul 2024 18:18:08 +0200 Subject: [PATCH 45/78] decadal conf and test --- conf/archive_decadal.yml | 51 ++++++++++++++++++++++++++++++++ full_ecvs_multimodel_anomalies.R | 5 +++- recipe_tas_decadal.yml | 14 ++++----- 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 0697fc3f..d06f336c 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -1,3 +1,54 @@ +gpfs: + src: "/gpfs/scratch/bsc32/MN4/bsc32/bsc32693/data_amd/" + System: +# ---- + EC-Earth3-i4: + name: "EC-Earth3-i4" + institution: "EC-Earth-Consortium" + src: + hcst: "exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/" + fcst: + first_dcppB_syear: 2021 + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "clt":"Amon", "hfls":"Amon", + "hurs":"Amon", "huss":"Amon", "rsds":"Amon", "rsut":"Amon", "ta":"Amon", + "tasmax":"Amon", "tosa":"Amon", "ua":"Amon", "va":"Amon", "zg":"Amon", + "evspsbl":"Amon", "hfss":"Amon", "hursmin":"Amon", "rlut":"Amon", + "rsdt":"Amon", "sfcWind":"Amon", "tasmin":"Amon", "ts":"Amon", "uas":"Amon", + "vas":"Amon"} + grid: {"tas":"gr", "pr":"gr", "psl":"gr", "clt":"gr", "hfls":"gr", + "hurs":"gr", "huss":"gr", "rsds":"gr", "rsut":"gr", "ta":"gr", + "tasmax":"gr", "tosa":"gr", "ua":"gr", "va":"gr", "zg":"gr", + "vas":"gr"} + version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910", "clt":"v20210910", + "hurs":"v20210910", "huss":"v20210910", "rsds":"v20210910", "rsut":"v20210910", "ta":"v20210910", + "tasmax":"v20210910", "tosa":"v20210910", "ua":"v20210910", "va":"v20210910", "zg":"v20210910", + "evspsbl":"v20210910", "hfss":"v20210910", "hursmin":"v20210910", "rlut":"v20210910", + "rsdt":"v20210910", "sfcWind":"v20210910", "tasmin":"v20210910", "ts":"v20210910", "uas":"v20210910", + "vas":"v20210910"} + daily_mean: + grid: {"tas":"gr", "pr":"gr", "psl":"gr"} + version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910"} + calendar: "proleptic_gregorian" + member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" + + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/era5/" + monthly_mean: {"tas":"_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + + esarchive: src: "/esarchive/" System: diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 4f423764..4c7e16ab 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -6,6 +6,7 @@ source("modules/Visualization/Visualization.R") #args = commandArgs(trailingOnly = TRUE) #recipe_file <- args[1] recipe_file <- "recipe_tas.yml" +# recipe_file <- "recipe_tas_decadal.yml" # Will it work with a single job? #recipe <- read_atomic_recipe(recipe_file) original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) @@ -20,12 +21,14 @@ datos$fcst <- list() source("modules/Crossval/Crossval_metrics.R") source("modules/Crossval/Crossval_anomalies.R") source("modules/Aggregation/Aggregation.R") +# sys <- models[1] for (sys in models) { recipe_aux$Analysis$Datasets$System <- NULL recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) - data <- Units(recipe_aux, data) + data <- Units(recipe = recipe_aux, data = data) + # data <- Aggregation(recipe = recipe_aux, data = data) # verification individual models product <- Crossval_anomalies(recipe = recipe_aux, data = data) skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml index 09f88567..e5169dbe 100644 --- a/recipe_tas_decadal.yml +++ b/recipe_tas_decadal.yml @@ -26,12 +26,12 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0501' + # sdate: '0501' fcst_year: #'2021' - hcst_start: '1980' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + hcst_start: '1981' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2010' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 120 # Mandatory, int: Last leadtime time step in months + ftime_max: 12 # Mandatory, int: Last leadtime time step in months Region: latmin: -90 latmax: 90 @@ -47,9 +47,9 @@ Analysis: method: average user_def: Y1: [1, 12] # aggregate from 1 to 3 forecast times - Y2: [13, 24] - Y2-Y5: [13, 60] - Y5-Y10: [49, 120] + Y1-JJA: [6, 8] + #Y2-Y5: [13, 60] + #Y5-Y10: [49, 120] Anomalies: compute: yes cross_validation: no -- GitLab From 30b8e5c82d4c992d679e4ca1eb5df86fb42e48ec Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 1 Aug 2024 17:33:58 +0200 Subject: [PATCH 46/78] avoid memory line an save probs --- modules/Crossval/Crossval_anomalies.R | 8 +++++--- modules/Loading/R/load_decadal.R | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index cc41ffae..d743c834 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -175,9 +175,11 @@ Crossval_anomalies <- function(recipe, data) { tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) } - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) + if (recipe$Analysis$Workflow$Probabilities$save == 'yes') { + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + } } } diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index d3b4f439..1e9c332a 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -480,6 +480,6 @@ load_decadal <- function(recipe) { info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } -- GitLab From d87721aa2802fd004b73c8bde2cbec4f8f3b81f4 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 1 Aug 2024 17:35:56 +0200 Subject: [PATCH 47/78] add fix aggregation --- modules/Aggregation/R/agg_ini_end.R | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R index 17c6940a..c58a7ebe 100644 --- a/modules/Aggregation/R/agg_ini_end.R +++ b/modules/Aggregation/R/agg_ini_end.R @@ -9,6 +9,8 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { # take the first and last element of each indices list for time_bounds saving ini <- unlist(lapply(indices, function(x){x[1]})) end <- unlist(lapply(indices, function(x){x[length(x)]})) + indices <- lapply(1:length(ini), function(x) { + ini[x]:end[x]}) plotting_attr <- list(names(indices)) } original_dims <- names(dim(x[[1]]$data)) @@ -16,8 +18,13 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { - sapply(1:length(indices), - function(z) {mean(y[indices[[z]]], na.rm = na.rm)}) + res <- sapply(1:length(indices), + function(z) { + mean(y[indices[[z]]], na.rm = na.rm)}) + if (is.null(dim(res))) { + dim(res) <- c(length(res)) + } + return(res) }, ind = indices, output_dims = 'time', @@ -26,8 +33,13 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { - sapply(1:length(indices), - function(z) {sum(y[indices[[z]]], na.rm = na.rm)}) + res <- sapply(1:length(indices), + function(z) { + sum(y[indices[[z]]], na.rm = na.rm)}) + if (is.null(dim(res))) { + dim(res) <- c(length(res)) + } + return(res) }, ind = indices, output_dims = 'time', @@ -54,3 +66,4 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$attrs$Dates <- tmp_dates return(x) } + -- GitLab From 57842eeb7dd5f340005af01606ad9c28cf5ccd72 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Fri, 13 Sep 2024 17:40:43 +0200 Subject: [PATCH 48/78] briefing --- Rplots.pdf | Bin 0 -> 3611 bytes conf/archive.yml | 2 +- full_ecvs_anomalies.R | 26 +- modules/Aggregation/R/agg_ini_end.R | 14 +- modules/Crossval/Crossval_anomalies.R | 35 +-- modules/Crossval/Crossval_metrics.R | 15 +- modules/Visualization/R/plot_ensemble_mean.R | 237 +++++++++++------- modules/Visualization/R/plot_metrics.R | 8 +- .../R/plot_most_likely_terciles_map.R | 220 ++++++++++------ modules/Visualization/output_size.yml | 4 + recipe_prlr.yml | 116 +++++++++ recipe_tas.yml | 27 +- sunset.sh | 2 +- 13 files changed, 492 insertions(+), 214 deletions(-) create mode 100644 Rplots.pdf create mode 100644 recipe_prlr.yml diff --git a/Rplots.pdf b/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2610e8a00b147a0801d6aad8aec8c2fd21a5a476 GIT binary patch literal 3611 zcmZ`+dpK0<9u^_f#XZeVSwu!*nK8yC%4J+4xf}OPV_|k)1vIZ26f&8;|& zgxsPex5T)mluEQCLPZg(Gt=I?y`O!avz})?>-paA_j`Zu_pN{4SH&K0p@G)KB2=O# zq9&ubQA1t~1RBr+Xvcp=7#bo#GX_M2$uugS2txo0Z-CLlV72toT4WK1r9^nQ_>!0{QZG$U^UxmkP__C-R!P&jS`X zBaH&*)?z(53_FqdjEP%b@HsfR7*;lFH&x5(@F!TWiKfPJ{)h{L3>?= z3_FSwK%F1ce8Y6|qiB%MqcG7LlOY1*R^7+m4}xwFxzau2?R^hDG^LbF6OntpoZ`=> zODDhLa&4!Q+aGFe(>=Rq0{tWl=`R0@A>hwlcb5qXLf%w5@I+5vPb)|xs0B3~;k|z4 z{PL*`pV)|bBwk^q?K2@fYpcMG5~y}`H6e&BE8so5!M^EhLa#3fBQo-h1HWG|-corlq8KDt!@&=UOpuD%KS zmo0Dd5>^yMzp-|2mt0qFNdzyL<9kYu07o)vJ-eh6g|kfKjaTw6uZXvOD;jOcIP-Kx zvOmL3uvqaI>AM?m-Ic5t5FZy3G(jb9Jn4%bkCr*N0T0N@MAqi- zxDZ|8t9ng@l`n~n!O?fXQKWIKo~UM2kMG8@4USsdawFB=BOV${#Kv_=$T}v#fLWLP z%{aGnJ6{V4n@QTsU)ZE#W@;rUYdo9pcul@6ex|^G+H)EO|I#S1)vPYxAy8^nHdJ(v zmRL!v#|=q}aJ8?U-lo)8xz_IY7(E5e=(@WX=|W)=^Raeqm&PUC5Qm~lB_kyy4X!3@ zNp8PqVVcHP7L!ygKPOcrNH%_YZg-JFLy}cmX_u#rh`Bl1{>X*R4-{nMCaz`_xEA0F z%N#`QbFLj6b{R(fiufdHYjNUg??B(Q0IY6+%t_X?Amm@^-!)x5ExDw(Dk~CGKuSTU z?ua-dzgMZFQ#NzJo`xb0O^fHh!=M<1%WY@2qbYQq4&IDEynHjCyHB-9!z z07Y_+Ewh818irZJ7BlWMdRxu3ON9F!$;CCaz?d2}*`%Z#>l*7^>$49|%Bt-u*f}6H z{dNhnN?c{F9&picv2$se+4~6hXn$Gdqc_(=u6=l9J^b{M#qf*k+QXdTq2apg!Pj37 z8Iv=~u_s>t`ffn4`~$o#D78m^KKs0W!qnJEtWGa>50(%(KP2CuJu*1*ap=Kd*`Pt; zC+92wnJdUCYc^Yp{am!orPJ0jzuIi7sE-tnXiHD)yXLoqE`fDeUs)=RpBiHtwL?2X z9YW(m=R%j4#8%b6Du3Oz+Pvx{Vj!X@A}mrNQn&R(yn35Vn|J)z_{y^>XOE}gcCe5} z7+39WT79}yot6`(xRbi>{)4`ZUK!E@jIH+bpwQ7RxN2P+bUAt_CZTuNL*X|~y$Krq z+mCKHO%_Ssc|>X`J*$lZrFsl%#As~RXj0E}N4m9p23610tp*0sa;VP(Ii2yxr30M; zhe(U%6h;mgkv@A#sj)u4Q^>Z0YrYvMCd8fX|4JQsuVF$CQB2)}PgA z_n&c_?w{^Yn2`R>X^#1lz||eCdldYD`+^qtQZ1q;;@o%K_njL&HjG8SiM(#ydG}%z z$D}A<*yNFkcyw>Hd8?5k-O9J?^SP$Qty7z)g5#q5Bm3tHmhYvsWyEn5QpLnKSBgdK zAWAz(4BUxq^zDw$5Ux~eR_ak&z-QqPUkxpkEtI}`rSMoMyYoV)6{+Glp~EL>C{g-8 ztCKWYIC*8FRBly$tbuOWdayw6)vD5@c}TLR`iNV0%gTyWyGpi-rPRO~^cjsa3;G4w zB7IAPk;Sz(rS+6~&3WluQJN_2{K5lp%1*gq??qK3>r=Kbat*U4avjq?I^W&yyg9dY zwREJo<>+SHXL+C1jP^Dg30+FdJn3vDSt7QEsnuS@Bve1FPO2{I$-Xq_g;POD=|ETLR7kdd8k8z%!uYqnnETC{* zAG*F8nSC}^w+ltkF65e8c9ZjR(?e)Ely|t)1bp ztUp@SiJr`nV~isq)>2Cqu5KE(DKdYNhW&Z|I%XK9R!sBu*wre{DC;Lick- z?`mVi;lk@RZ`;COg};qT6yo%z_V#LA)_^ML6vNHFn%_QOUziy<~Z~|7mgG6f!i==0~Ug$r%IH)q0I=lPeFYKDS&f?O^(>)8z z3PVP&Ot0VAqhj~YT$Ro{`X(>?;6>KV+qbWWNX<5(3)yFkm=)k{+Ivd>=;BluH|Op0 zyF(!_KCM@6?vqRP@$-Jy_?mv5c{ed3k3z_G^{* z3}AF{#>n8kZ)?l}(pZ)1A?W*3`0Z)MFU0oPKK$aIXO^9NIjZ0xpB47{;gR&L()L69 zzk97cd_5X`hrN!`l0BgF&1-3WsPf9OYj>V}ADo94;3rF83CK!+pG&tLBADYEvq9gR zGJ~bH8y2b-_8-a&pZ>CA>BD?!id|l~&~ohJ8qa|L;THd};<`U=DB=$*ZcgR-N-~w_ z#rd}NuN8qzHK8)ee*y>)PxkkR7!Z}jGwhxKRtF3sQ&~(v9|xXf(O`%I`NIe_77O~( zc#(n0Bm-aogLsPPafu`r-|CXc3=%8Ip8~M~5GGT2UKR`@k{C2903K&R{6bzQkwk*1 zFaq==^F}erOaRoR(qKQx9{?FYM3Q)RpF$!24G&~d1BeV(5QWHs0gx6zqe3Tm(}+Kq znSAH&{kJT_W 1) { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + years <- NULL + time_labels <- + unlist(lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + year_ini <- year(fcst$attrs$time_bounds$start[i]) + month_ini <- month.name[month(fcst$attrs$time_bounds$start[i])] + year_end <- year(fcst$attrs$time_bounds$end[i]) + month_end <- month.name[month(fcst$attrs$time_bounds$end[i])] + ## TODO: Rename 'toptitle'? + if (year_ini == year_end) { + toptitle <- paste(month_ini, "to", month_end, year_end) + } else { + toptitle <- paste(month_ini, year_ini, "to", + month_end, year_end) + } + } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + ## set to make ftime_ini be a Monday + ## if init_week always Thursday, the 2nd addend can be simplified to +4 + ftime_ini <- ymd(init_date) + + ((8 - wday(ymd(init_date), week_start = 1)) %% 7) + + weeks(ftime_ini) + ## set to make ftime_end be a Sunday + ftime_end <- ymd(init_date) + + ((8 - wday(ymd(init_date), week_start = 1)) %% 7) + + weeks(ftime_end) + 6 + toptitle <- paste("Valid from", ftime_ini, "to", ftime_end) + } + })) } else { - months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + time_labels <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] } } - years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) - + if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define name of output file and titles - titles <- as.vector(months) + if (tolower(recipe$Analysis$Horizon) %in% c("seasonal", "subseasonal")) { + titles <- as.vector(time_labels) + } else { + titles <- NULL + } + if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", "Forecast Ensemble ", method, " / ", + "Issued on ", + format(ymd(start_date), "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", "Forecast Ensemble ", method, " / ", + "Init.: ", i_syear) + } # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_var_ens_mean, longitude, latitude, - mask = mask, - dots = dots, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.7, - subtitle_scale = 0.8, - subtitle_margin_scale = 2, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = paste0(outfile, ".pdf"), - bar_label_digits = 4, - extra_margin = rep(1, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.1) + output_configuration <- output_conf$Multipanel$forecast_ensemble_mean + base_args <- list(fun = "PlotEquiMap", + plot_dims = c('longitude', 'latitude'), + var = i_var_ens_mean, lon = longitude, + lat = latitude, mask = mask, dots = dots, + filled.continents = FALSE, toptitle = toptitle, + title_scale = 0.7, subtitle_scale = 0.8, + subtitle_margin_scale = 2, titles = titles, + units = units, cols = cols, brks = brks, + fileout = paste0(outfile, ".pdf"), + bar_label_digits = 4, extra_margin = rep(1, 4), + bar_label_scale = 1.5, axes_label_scale = 1.1) + base_args[names(output_configuration)] <- output_configuration + do.call(PlotLayout, base_args) } else { # Define function and parameters depending on projection if (projection == 'cylindrical_equidistant') { @@ -174,7 +225,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o base_args <- list(var = NULL, dots = NULL, mask = NULL, lon = longitude, lat = latitude, dot_symbol = 20, title_scale = 0.6, - font.main = 2, + font.main = 2, margin_scale = c(1, 5, 5, 5), filled.continents = F, brks = brks, cols = cols, bar_label_digits = 4, bar_label_scale = 1.5, axes_label_scale = 1, units = units) @@ -187,24 +238,21 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { target_proj <- projection } + output_configuration <- output_conf$PlotRobinson$forecast_ensemble_mean base_args <- list(data = NULL, mask = NULL, dots = NULL, lon = longitude, lat = latitude, lon_dim = 'longitude', lat_dim = 'latitude', target_proj = target_proj, legend = 's2dv', - style = 'point', brks = brks, cols = cols) + style = 'point', brks = brks, cols = cols, + bar_extra_margin = c(3.5, 0, 3.5, 0), + point_size = "auto", title_size = 10, + dots_size = 0.2, + width = 8, height = 5, + units = units) + base_args[names(output_configuration)] <- output_configuration } # Loop over forecast times - for (i in 1:length(months)) { - # Get forecast time label - if (is.null(attributes(fcst$attrs$time_bounds))) { - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 - } - forecast_time <- sprintf("%02d", forecast_time) - } else { - forecast_time <- months - } + for (i in 1:length(time_labels)) { # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -217,20 +265,27 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { dots_i <- NULL } - # Define plot title + # Define plot title if (tolower(recipe$Analysis$Horizon) == 'seasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Ensemble Mean / ", - months[i], " ", years[i], + "\n", "Ensemble ", method, " / ", + time_labels[i], " ", years[i], " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + format(as.Date(i_syear, format = "%Y%m%d"), + "%d-%m-%Y")) + } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble ", method, " / ", + "Issued on ", + format(ymd(start_date), "%d-%m-%Y"), + "\n", time_labels[i], years[i]) } else { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Ensemble Mean / ", - months[i], + "\n", "Ensemble ", method, "/ ", + time_labels[i], " ", years[i], " / Start date: ", i_syear) } @@ -238,8 +293,9 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o if (identical(fun, PlotRobinson)) { ## TODO: Customize technical details base_args[['caption']] <- - paste0("Nominal start date: ", start_date, "\n", - "Forecast month: ", sprintf("%02d", i), "\n", + paste0("Nominal start date: ", format(ymd(start_date), + "%d-%m-%Y"), "\n", + "Forecast week: ", sprintf("%02d", i), "\n", "Reference: ", recipe$Analysis$Datasets$Reference) } # Modify base arguments @@ -249,8 +305,8 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { fileout <- paste0(outfile, "_ft", - attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", - attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") } else { fileout <- paste0(outfile, "_ft", months[i], ".pdf") } @@ -262,10 +318,11 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o args = c(base_args, list(toptitle = toptitle, fileout = fileout))) - } + } } } } info(recipe$Run$logger, "##### FORECAST ENSEMBLE MEAN PLOTS SAVED TO OUTPUT DIRECTORY #####") } + diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 2df06e8b..455e1c28 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -301,11 +301,15 @@ plot_metrics <- function(recipe, data_cube, metrics, # labels for file name: forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) # title names: + ## TODO: There's probably a better way to do this, like using + ## time_bounds$start and time_bounds$end directly. forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecat_time_ini <- ifelse(forecast_time_ini > 12, + forecast_time_ini - 12, forecast_time_ini) forecast_time_ini <- month.name[forecast_time_ini] forecast_time_end <- init_month + forecast_time_end - 1 - forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- ifelse(forecast_time_end > 12, + forecast_time_end - 12, forecast_time_end) forecast_time_end <- month.name[forecast_time_end] toptitle <- paste(system_name, "/", str_to_title(var_long_name), diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 739f2d10..e15928d9 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -17,49 +17,56 @@ plot_most_likely_terciles <- function(recipe, dots, outdir, output_conf) { - + ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { stop("Visualization functions not yet implemented for daily data.") } - + latitude <- fcst$coords$lat longitude <- fcst$coords$lon archive <- get_archive(recipe) - if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + if (recipe$Analysis$Datasets$System$name == 'Multimodel') { system_name <- paste0('Multimodel-', recipe$Analysis$Datasets$Multimodel$approach) } else { system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name } - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + # Initialization dates + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + init_date <- as.numeric(substr(recipe$Analysis$Time$sdate, start = 1, stop = 2)) + } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + start_date <- recipe$Analysis$Time$sdate + init_date <- start_date ## start_date and init_date are same for subseasonal } else { ## TODO: Sort out decadal initial month (is it always January?) - init_month <- 1 + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + init_date <- 1 } # Retrieve and rearrange probability bins for the forecast - if (is.null(probabilities$probs_fcst$prob_b33) || - is.null(probabilities$probs_fcst$prob_33_to_66) || - is.null(probabilities$probs_fcst$prob_a66)) { - stop("The forecast tercile probability bins are not present inside ", - "'probabilities', the most likely tercile map cannot be plotted.") - } - - probs_fcst <- abind(probabilities$probs_fcst$prob_b33, - probabilities$probs_fcst$prob_33_to_66, - probabilities$probs_fcst$prob_a66, - along = 0) - names(dim(probs_fcst)) <- c("bin", - names(dim(probabilities$probs_fcst$prob_b33))) - +# if (is.null(probabilities$probs_fcst$prob_b33) || +# is.null(probabilities$probs_fcst$prob_33_to_66) || +# is.null(probabilities$probs_fcst$prob_a66)) { +# stop("The forecast tercile probability bins are not present inside ", +# "'probabilities', the most likely tercile map cannot be plotted.") +# } + +# probs_fcst <- abind(probabilities$probs_fcst$prob_b33, +# probabilities$probs_fcst$prob_33_to_66, +# probabilities$probs_fcst$prob_a66, +# along = 0) +# names(dim(probs_fcst)) <- c("bin", +# names(dim(probabilities$probs_fcst$prob_b33))) + probs_fcst <- probabilities$probs_fcst ## TODO: Improve this section # Drop extra dims, add time dim if missing: for (var in 1:fcst$dims[['var']]) { + ## NOTE: Variables starting with var_* represent the variable counter. variable <- fcst$attrs$Variable$varName[[var]] var_long_name <- fcst$attrs$Variable$metadata[[variable]]$long_name # Choose colors depending on the variable @@ -80,7 +87,9 @@ plot_most_likely_terciles <- function(recipe, drop = 'selected') var_probs <- Reorder(var_probs, c("syear", "time", "bin", "longitude", "latitude")) + for (i_syear in start_date) { + ## NOTE: Variables the start with i_* represent the forecast year counter. # Define name of output file and titles i_var_probs <- ClimProjDiags::Subset(var_probs, along = c("syear"), @@ -97,6 +106,8 @@ plot_most_likely_terciles <- function(recipe, dim_mask <- dim(var_mask) var_mask <- as.numeric(var_mask <= 0) dim(var_mask) <- dim_mask + } else { + var_mask <- NULL } # Dots if (!is.null(dots)) { @@ -108,48 +119,85 @@ plot_most_likely_terciles <- function(recipe, dim_dots <- dim(var_dots) var_dots <- as.numeric(var_dots <= 0) dim(var_dots) <- dim_dots + } else { + var_dots <- NULL } - toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Most Likely Tercile / Initialization: ", - i_syear) + + # Define labels for forecast times + years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (is.null(attributes(fcst$attrs$time_bounds))) { - months <- lubridate::month( - fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) + if (tolower(recipe$Analysis$Horizon == "subseasonal")) { + time_labels <- fcst$attrs$Dates[1, 1, which(start_date == i_syear), ] + monday <- ymd_hms(time_labels) - days(wday(ymd_hms(time_labels), week_start = 1) - 1) + sunday <- monday + days(6) + time_labels <- paste0("Valid from ", format(monday,"%d-%m"), " to ", + format(sunday, "%d-%m"), " of ") + } else { + time_labels <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + } } else { if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + years <- NULL + time_labels <- + unlist(lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + year_ini <- year(fcst$attrs$time_bounds$start[i]) + month_ini <- month.name[month(fcst$attrs$time_bounds$start[i])] + year_end <- year(fcst$attrs$time_bounds$end[i]) + month_end <- month.name[month(fcst$attrs$time_bounds$end[i])] + ## TODO: Rename 'toptitle'? + if (year_ini == year_end) { + toptitle <- paste(month_ini, "to", month_end, year_end) + } else { + toptitle <- paste(month_ini, year_ini, "to", + month_end, year_end) + } + } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + ## set to make ftime_ini be a Monday + ## if init_date always Thursday, the 2nd addend can be simplified to +4 + ftime_ini <- ymd(init_date) + + ((8 - wday(ymd(init_date), week_start = 1)) %% 7) + + weeks(ftime_ini) + ## set to make ftime_end be a Sunday + ftime_end <- ymd(init_date) + + ((8 - wday(ymd(init_date), week_start = 1)) %% 7) + + weeks(ftime_end) + 6 + toptitle <- paste("Valid from", ftime_ini, "to", ftime_end) + } + })) } else { - months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + time_labels <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] } } - - years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) + if (recipe$Analysis$Workflow$Visualization$multi_panel) { ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) + titles <- as.vector(time_labels) + # Define top title: system name, variable, forecast date + if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", "Issued on ", + format(ymd(start_date), "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", "Initialization: ", + i_syear) + } # Plots ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. + ## on. This option does not work with mask or dots for now. suppressWarnings( PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), cat_dim = 'bin', i_var_probs, longitude, latitude, - mask = mask, - dots = dots, + mask = var_mask, + dots = var_dots, coast_width = 1.5, title_scale = 0.6, title_margin_scale = 0.7, @@ -178,59 +226,78 @@ plot_most_likely_terciles <- function(recipe, col_mask = 'antiquewhite', cols = cols, col_sup = col_sup, - title_scale = 1, - legend_scale = 0.8, + title_scale = 0.9, + legend_scale = 0.7, cex_bar_titles = 0.9, bar_label_digits = 2, bar_label_scale = 0.7, bar_limits = list(c(40, 85), c(40, 85), c(40, 85)), - brks = list(4,2,4), + brks = list(4, 2, 4), axes_label_scale = 1.1, - plot_margin = c(5.1, 4.1, 4.1, 2.1), + plot_margin = c(1, 5, 5, 5), + # plot_margin = c(5.1, 4.1, 4.1, 2.1), return_leg = T, - triangle_ends = c(F, T) , width = 10, height = 8) + triangle_ends = c(F, T), width = 10, height = 8) base_args[names(output_configuration)] <- output_configuration - for (i in 1:length(months)) { + for (i in 1:length(time_labels)) { + ## variables ending in *_i represent each forecast time # Get forecast time label - if (is.null(attributes(fcst$attrs$time_bounds))) { - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (tolower(recipe$Analysis$Horizon) %in% c("seasonal", "decadal")) { + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(time_labels[i], month.name) - init_date + 1 + # Months should range from 1 to 12 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + ## TODO: Check this + # forecast_time <- sprintf("%02d", time_labels[i]) + forecast_time <- time_labels[i] } - forecast_time <- sprintf("%02d", forecast_time) - } else { - forecast_time <- months + } else if (tolower(recipe$Analysis$Horizon) == "subseasonal") { + forecast_time <- sprintf("%02d", i) } - # Get mask subset if (!is.null(mask)) { - mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) + mask_i <- Subset(var_mask, along = 'time', + indices = i, drop = TRUE) } else { mask_i <- NULL } # Get dots subset if (!is.null(dots)) { - dots_i <- Subset(var_dots, along = 'time', indices = i, drop = TRUE) + dots_i <- Subset(var_dots, along = 'time', + indices = i, drop = TRUE) } else { dots_i <- NULL } + # Define plot title if (tolower(recipe$Analysis$Horizon) == 'seasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / ", - months[i], " ", years[i], + time_labels[i], " ", years[i], " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + format(as.Date(i_syear, format = "%Y%m%d"), + "%d-%m-%Y")) + } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + "Issued on ", + format(ymd(start_date), "%d-%m-%Y"), + "\n", time_labels[i], years[i]) } else { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / ", - months[i], + time_labels[i], " / Start date: ", - i_syear) - } + i_syear) + } + # Plot if (is.null(attributes(fcst$attrs$time_bounds))) { fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") @@ -240,7 +307,7 @@ plot_most_likely_terciles <- function(recipe, attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") } else { - fileout <- paste0(outfile, "_ft", months[i], ".pdf") + fileout <- paste0(outfile, "_ft", time_labels[i], ".pdf") } } @@ -263,10 +330,10 @@ plot_most_likely_terciles <- function(recipe, tmp$nmap <- NULL tmp$var_limits <- NULL if (length(cb_info$brks[[i_bar]]) > 2) { - # plot colorbar as normal + # plot colorbar as normal do.call(ColorBar, tmp) } else { - # plot a square + # plot a square tmp$brks <- 4 tmp$draw_ticks <- F tmp$box_label <- "> 40" @@ -280,6 +347,7 @@ plot_most_likely_terciles <- function(recipe, } } } - info(recipe$Run$logger, - "##### MOST LIKELY TERCILE PLOTS SAVED TO OUTPUT DIRECTORY #####") +info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOTS SAVED TO OUTPUT DIRECTORY #####") } + diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 0cd945be..c9183380 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -30,4 +30,8 @@ region: #units inches NA-EU: #Norht Atlantic European region Mediterranean: Global: + Robinson: + forecast_ensemble_mean: + width: 8.5 + height: 5 # Add other regions diff --git a/recipe_prlr.yml b/recipe_prlr.yml new file mode 100644 index 00000000..b0ad1fad --- /dev/null +++ b/recipe_prlr.yml @@ -0,0 +1,116 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + units: mm + flux: no + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea601'} + #- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-CanCM4i'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0901' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + name: 'Global' + latmin: -90 + latmax: 90 + lonmin: 0 #-179.9 + lonmax: 359.9 #180 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias enscorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + #dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 100 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas.yml b/recipe_tas.yml index 4ec92a78..be625355 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,13 +11,13 @@ Analysis: flux: no Datasets: System: - - {name: 'Meteo-France-System8'} - - {name: 'CMCC-SPS3.5'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} - - {name: 'UK-MetOffice-Glosea601'} - - {name: 'NCEP-CFSv2'} - - {name: 'DWD-GCFS2.1'} - - {name: 'ECCC-CanCM4i'} + #- {name: 'UK-MetOffice-Glosea601'} + #- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-CanCM4i'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -26,8 +26,8 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0501' - fcst_year: #'2021' + sdate: '0901' + fcst_year: '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months @@ -35,8 +35,8 @@ Analysis: Region: latmin: -90 latmax: 90 - lonmin: 0 - lonmax: 359.9 + lonmin: 0 #-179.9 + lonmax: 359.9 #180 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" @@ -47,7 +47,10 @@ Analysis: cross_validation: no save: none Time_aggregation: - execute: no + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] Calibration: method: raw # Mandatory, str: Calibration method. See docu. cross_validation: yes @@ -62,7 +65,7 @@ Analysis: Indicators: index: no Visualization: - plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + plots: skill_metrics forecast_ensemble_mean most_likely_terciles multi_panel: no #dots: both projection: Robinson diff --git a/sunset.sh b/sunset.sh index 34b7d05f..43441ada 100644 --- a/sunset.sh +++ b/sunset.sh @@ -14,5 +14,5 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From d8d9a046a950058a8b0b3116915f174f7b139ac0 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Tue, 17 Sep 2024 11:31:35 +0200 Subject: [PATCH 49/78] building oper --- modules/Visualization/R/plot_metrics.R | 4 + modules/Visualization/output_size.yml | 8 +- recipe_seas5_oper.yml | 114 +++++++++++++++++++++++++ recipe_tas.yml | 8 +- 4 files changed, 128 insertions(+), 6 deletions(-) create mode 100644 recipe_seas5_oper.yml diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 455e1c28..89e08af9 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -263,6 +263,7 @@ plot_metrics <- function(recipe, data_cube, metrics, base_args[names(output_configuration)] <- output_configuration } else { fun <- PlotRobinson + output_configuration <- output_conf$PlotRobinson$skill_metric common_projections <- c("robinson", "stereographic", "lambert_europe") if (projection %in% common_projections) { target_proj <- get_proj_code(projection) @@ -275,8 +276,11 @@ plot_metrics <- function(recipe, data_cube, metrics, lon_dim = 'longitude', lat_dim = 'latitude', target_proj = target_proj, legend = 's2dv', style = 'point', brks = brks, cols = cols, + bar_extra_margin = c(3.5, 0, 3.5, 0), + title_size = 10, dots_size = 0.2, col_inf = col_inf, col_sup = col_sup, units = units) + base_args[names(output_configuration)] <- output_configuration } # Loop over forecast times for (i in 1:dim(metric)[['time']]) { diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index c9183380..76631e4b 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -30,8 +30,12 @@ region: #units inches NA-EU: #Norht Atlantic European region Mediterranean: Global: - Robinson: + PlotRobinson: forecast_ensemble_mean: - width: 8.5 + width: 8 + height: 5 + skill_metrics: + width: 8 height: 5 + # Add other regions diff --git a/recipe_seas5_oper.yml b/recipe_seas5_oper.yml new file mode 100644 index 00000000..b3f501af --- /dev/null +++ b/recipe_seas5_oper.yml @@ -0,0 +1,114 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: prlr, freq: monthly_mean, units: mm, flux: no} + - {name: tas, freq: monthly_mean, units: C} + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea601'} + #- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-CanCM4i'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0901' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + name: 'Global' + latmin: -90 + latmax: 90 + lonmin: 0 #-179.9 + lonmax: 359.9 #180 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias enscorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + #dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 32 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 32 + platform: MN5 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas.yml b/recipe_tas.yml index be625355..98821be3 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -35,8 +35,8 @@ Analysis: Region: latmin: -90 latmax: 90 - lonmin: 0 #-179.9 - lonmax: 359.9 #180 + lonmin: -180 #0 #-179.9 + lonmax: -179.9 #359.9 #180 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" @@ -67,10 +67,10 @@ Analysis: Visualization: plots: skill_metrics forecast_ensemble_mean most_likely_terciles multi_panel: no - #dots: both + mask_terciles: 'both' + mask_ens: 'both' projection: Robinson file_format: 'PNG' - #projection: robinson Scorecards: execute: no # yes/no regions: -- GitLab From 8d4b7e1ff0f27dd614a05f1ee6ec16df56a0c6c9 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 13 Nov 2024 11:34:29 +0100 Subject: [PATCH 50/78] logo and multimod --- conf/archive.yml | 30 +- conf/archive_decadal.yml | 2 +- full_ecvs_anomalies.R | 17 +- full_ecvs_calibration.R | 45 ++ full_ecvs_multimodel_calibrated.R | 89 ++++ modules/Aggregation/R/agg_ini_end.R | 2 +- modules/Crossval/Crossval_Calibration.R | 289 ++++++++++++- modules/Crossval/Crossval_anomalies.R | 26 +- .../Crossval_multimodel_Calibration.R | 389 ++++++++++++++++++ .../Crossval/Crossval_multimodel_anomalies.R | 18 +- .../Crossval/Crossval_multimodel_metrics.R | 12 +- modules/Loading/R/load_decadal.R | 1 - .../R/plot_most_likely_terciles_map.R | 32 +- modules/Visualization/Visualization.R | 1 - modules/Visualization/output_size.yml | 9 +- recipe_prlr.yml | 2 +- recipe_seas5_oper.yml | 2 +- recipe_tas.yml | 18 +- recipe_tas_decadal.yml | 14 +- recipe_tas_singl.yml | 15 +- recipe_tasv2.yml | 111 +++++ sunset.sh | 4 +- sunsetv2.sh | 21 + tools/add_logo.R | 41 +- 24 files changed, 1102 insertions(+), 88 deletions(-) create mode 100644 full_ecvs_calibration.R create mode 100644 full_ecvs_multimodel_calibrated.R create mode 100644 modules/Crossval/Crossval_multimodel_Calibration.R create mode 100644 recipe_tasv2.yml create mode 100644 sunsetv2.sh diff --git a/conf/archive.yml b/conf/archive.yml index 3c550fdd..d371b212 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -44,6 +44,20 @@ gpfs: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" + UK-MetOffice-Glosea603: + name: "UK MetOffice GloSea 6 (v6.03)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system603-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" UK-MetOffice-Glosea601: name: "UK MetOffice GloSea 6 (v6.01)" institution: "European Centre for Medium-Range Weather Forecasts" @@ -75,7 +89,7 @@ gpfs: DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/dwd/system21_m1/" + src: "exp/dwd/system21c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", @@ -86,6 +100,20 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" + ECCC-GEM5.2-NEMO: + name: "ECCC GEM5.2-NEMO" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc5/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 20 + hcst: 20 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" ECCC-CanCM4i: name: "ECCC CanCM4i (v3)" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index d06f336c..a917725a 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -1,5 +1,5 @@ gpfs: - src: "/gpfs/scratch/bsc32/MN4/bsc32/bsc32693/data_amd/" + src: "/gpfs/projects/bsc32/esarchive_cache/" System: # ---- EC-Earth3-i4: diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 4ab3b375..ed0dcc53 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -7,8 +7,8 @@ source("modules/Aggregation/Aggregation.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] #recipe_file <- "recipe_tas_singl.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) data <- Units(recipe, data) @@ -16,6 +16,19 @@ data_summary(data$hcst, recipe) data_summary(data$obs, recipe) data_summary(data$fcst, recipe) + #### Reorde for MostLikely to plot EU centered: + data$hcst <- CST_Subset(data$hcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$fcst <- CST_Subset(data$fcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$obs <- CST_Subset(data$obs, along = 'longitude', + indices=c(181:360, 1:180)) + data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 + data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 + data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + #### + + data_agg <- Aggregation(recipe = recipe, data = data) data_summary(data_agg$hcst, recipe) data_summary(data_agg$obs, recipe) diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R new file mode 100644 index 00000000..87b21574 --- /dev/null +++ b/full_ecvs_calibration.R @@ -0,0 +1,45 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_singl.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) +data_summary(data$fcst, recipe) + +data_agg <- Aggregation(recipe = recipe, data = data) +data_summary(data_agg$hcst, recipe) +data_summary(data_agg$obs, recipe) +data_summary(data_agg$fcst, recipe) + +source("modules/Crossval/Crossval_Calibration.R") +res <- Crossval_Calibration(recipe = recipe, data = data_agg) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +# Required to plot a forecast: +data_agg$fcst <- res$fcst +tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = 4, latitude = 180, longitude = 360) + + +Visualization(recipe = recipe, data = data_agg, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe, "tools/BSC_logo_95.jpg") #"rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/full_ecvs_multimodel_calibrated.R b/full_ecvs_multimodel_calibrated.R new file mode 100644 index 00000000..0a62d09f --- /dev/null +++ b/full_ecvs_multimodel_calibrated.R @@ -0,0 +1,89 @@ +library(pryr) +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_tas.yml" +# recipe_file <- "recipe_tas_decadal.yml" +# Will it work with a single job? +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +models <- unlist(original_recipe$Analysis$Datasets$System) + + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_Calibration.R") +source("modules/Aggregation/Aggregation.R") +# sys <- models[1] +for (sys in models) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member + data <- Loading(recipe = recipe_aux) + #### Reorde for MostLikely to plot EU centered: + data$hcst <- CST_Subset(data$hcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$fcst <- CST_Subset(data$fcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$obs <- CST_Subset(data$obs, along = 'longitude', + indices=c(181:360, 1:180)) + data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 + data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 + data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + #### + data <- Units(recipe = recipe_aux, data = data) + data <- Aggregation(recipe = recipe_aux, data = data) + # verification individual models + product <- Crossval_Calibration(recipe = recipe_aux, data = data) + skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + + tmp_probs <- list(probs_fcst = product$probs$fcst[[1]]) + lats <- length(data$hcst$coords$latitude) + lons <- length(data$hcst$coords$longitude) + timesteps <- as.numeric(dim(data$hcst$data)['time']) + dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = timesteps, + #latitude = 20, longitude = 20) + latitude = lats, longitude = lons) + + Visualization(recipe = recipe_aux, data = data, probabilities = tmp_probs, + skill_metrics = skill_model, significance = TRUE) + ## end ver + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + gc() + print(mem_used()) +} +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') + +source("modules/Crossval/Crossval_multimodel_Calibration.R") +res <- Crossval_multimodel_Calibration(recipe = original_recipe, data = data) + +source("modules/Crossval/Crossval_multimodel_metrics.R") +skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, + data = res, Fair = FALSE) + +recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' +tmp_probs <- list(probs_fcst = res$probs$probs_fcst[[1]][[1]]) +Visualization(recipe = recipe_aux, data = data_aux, probabilities = tmp_probs, skill_metrics = skill_metrics, significance = TRUE) + + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe_aux, "rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R index e81cc227..06662722 100644 --- a/modules/Aggregation/R/agg_ini_end.R +++ b/modules/Aggregation/R/agg_ini_end.R @@ -14,7 +14,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { plotting_attr <- list(names(indices)) } original_dims <- names(dim(x[[1]]$data)) - if (method == 'average') { + if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 203b1448..64e44701 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -1,9 +1,10 @@ # take the output of Flor/s2s/subseasonal_loading.R -source("../git/sunset/modules/Crossval/R/tmp/GetProbs.R") - -Crossval_calibration <- function(recipe, data) { +#source("../git/sunset/modules/Crossval/R/tmp/GetProbs.R") +source("modules/Crossval/R/tmp/GetProbs.R") +Crossval_Calibration <- function(recipe, data) { + cal_method <- recipe$Analysis$Workflow$Calibration$method cross.method <- recipe$Analysis$cross.method # TODO move check if (is.null(cross.method)) { @@ -34,29 +35,29 @@ Crossval_calibration <- function(recipe, data) { amt.points_cor = NULL) # k = ? cal_hcst_ev_res <- NULL - cal_obs_ev_res <- NULL - cal_obs_tr_res <- NULL + cal_hcst_tr_res <- NULL + #cal_obs_ev_res <- NULL + obs_tr_res <- NULL # as long as probs requested in recipe: lims_cal_hcst_tr_res <- lapply(categories, function(X) {NULL}) - lims_cal_obs_tr_res <- lapply(categories, function(X) {NULL}) + lims_obs_tr_res <- lapply(categories, function(X) {NULL}) fcst_probs <- lapply(categories, function(x){NULL}) hcst_probs_ev <- lapply(categories, function(x){NULL}) obs_probs_ev <- lapply(categories, function(x){NULL}) - for (t in 1:length(cross)) { info(recipe$Run$logger, paste("crossval:", t)) # subset years: Subset works at BSC not at Athos ## training indices - obs_tr <- CST_Subset(data$obs, along = 'syear', + obs_tr <- Subset(data$obs$data, along = 'syear', indices = cross[[t]]$train.dexes) - hcst_tr <- CST_Subset(data$hcst, along = 'syear', + hcst_tr <- Subset(data$hcst$data, along = 'syear', indices = cross[[t]]$train.dexes) ## evaluation indices - hcst_ev <- CST_Subset(data$hcst, along = 'syear', + hcst_ev <- Subset(data$hcst$data, along = 'syear', indices = cross[[t]]$eval.dexes) - obs_ev <- CST_Subset(data$obs, along = 'syear', + obs_ev <- Subset(data$obs$data, along = 'syear', indices = cross[[t]]$eval.dexes) if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { @@ -69,23 +70,31 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 hcst_ev <- CST_MergeDims(hcst_ev, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) } - cal_hcst_tr <- CST_Calibration(exp = hcst_tr, obs = obs_tr, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - ncores = ncores) - cal_hcst_ev <- CST_Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - ncores = ncores) + cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores) + + cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores) + - lims_cal_hcst_tr <- Apply(cal_hcst_tr$data, target_dims = c('syear', 'ensemble'), + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, output_dims = lapply(categories, function(x) {'cat'}), prob_lims = categories, ncores = ncores) - lims_cal_obs_tr <- Apply(obs_tr$data, target_dims = c('syear'),#, 'ensemble'), + lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, @@ -93,7 +102,245 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 prob_lims = categories, ncores = ncores) + #store results + cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, + along = length(dim(cal_hcst_ev)) + 1) + cal_hcst_tr_res <- abind(cal_hcst_tr_res, cal_hcst_tr, + along = length(dim(cal_hcst_tr)) + 1) + + # Obs ev are original obs +# cal_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, +# along = length(dim(ano_obs_ev)) + 1) + obs_tr_res <- abind(obs_tr_res, obs_tr, + along = length(dim(obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_cal_hcst_tr_res[[ps]] <- abind(lims_cal_hcst_tr_res[[ps]], + lims_cal_hcst_tr[[ps]], + along = length(dim(lims_cal_hcst_tr[[ps]])) + 1) + lims_obs_tr_res[[ps]] <- abind(lims_obs_tr_res[[ps]], + lims_obs_tr[[ps]], + along = length(dim(lims_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'sday', 'sweek', 'unneeded', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'loop', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + names(dim(obs_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'latitude', 'longitude', 'unneeded', 'syear') + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', + 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', + 'unneeded', 'syear') + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, + drop = 'selected') + } + + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + + # Forecast calibration: + if (!is.null(data$fcst)) { + data$fcst$data <- Calibration(exp = data$hcst$data, + obs = data$obs$data, + exp_cor = data$fcst$data, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- Calibration(exp = data$hcst$data, + obs = data$obs$data, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims <- Apply(data$obs$data, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + tmp_lims2 <- list() +# What to save hcst category limits or obs category limits? + for(ps in 1:length(categories)) { + tmp_lims3 <- drop(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['cat']) { + tmp_lims <- tmp_lims3[l,,,] + if (!('var' %in% names(dim(tmp_lims)))) { + dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) + } + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) + } + if (recipe$Analysis$Workflow$Probabilities$save == 'yes') { + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + } + } + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(cal_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_cal_hcst_tr_res[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(data$obs$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting calibrated + hcst <- data$hcst + hcst$data <- cal_hcst_ev_res + + info(recipe$Run$logger, + "#### Calibrated and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst, type = 'hcst') + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + hcst_EM <- MeanDims(hcst$data, 'ensemble', drop = T) + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + } + return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + #cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, + fcst = fcst_probs), + ref_obs_tr = obs_tr_res)) } diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 84fe9e92..c99d38bf 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -83,14 +83,14 @@ Crossval_anomalies <- function(recipe, data) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x){'cat'}), + output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, ncores = ncores) #store results @@ -118,9 +118,9 @@ Crossval_anomalies <- function(recipe, data) { ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') for(ps in 1:length(categories)) { - names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('bin', orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') - names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('bin', tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') @@ -147,7 +147,7 @@ Crossval_anomalies <- function(recipe, data) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) clim_obs <- Apply(data$obs$data, @@ -160,14 +160,14 @@ Crossval_anomalies <- function(recipe, data) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) tmp_lims2 <- list() for(ps in 1:length(categories)) { tmp_lims3 <- drop(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['cat']) { + for (l in 1:dim(lims[[ps]])['bin']) { tmp_lims <- tmp_lims3[l,,,] if (!('var' %in% names(dim(tmp_lims)))) { dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) @@ -187,14 +187,14 @@ Crossval_anomalies <- function(recipe, data) { for (ps in 1:length(categories)) { hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr_res[[ps]], ncores = ncores) obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_ano_obs_tr_res[[ps]], @@ -202,7 +202,7 @@ Crossval_anomalies <- function(recipe, data) { if (!is.null(data$fcst)) { fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_fcst[[ps]], @@ -253,14 +253,14 @@ Crossval_anomalies <- function(recipe, data) { "_to_", categories[[ps]][perc]) } probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), + along = 'bin', indices = perc, drop = 'all')), probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), + along = 'bin', indices = perc, drop = 'all')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'cat', indices = perc, drop = 'all')), + along = 'bin', indices = perc, drop = 'all')), probs_fcst) } all_names <- c(all_names, name_elem) diff --git a/modules/Crossval/Crossval_multimodel_Calibration.R b/modules/Crossval/Crossval_multimodel_Calibration.R new file mode 100644 index 00000000..229abd0a --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_Calibration.R @@ -0,0 +1,389 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_Calibration <- function(recipe, data) { + cal_method <- recipe$Analysis$Workflow$Calibration$method + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + obs_ev_res <- NULL + cal_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + cal_hcst_tr_res <- lapply(data$hcst, function(x) {NULL}) + obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_cal_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # Store cross validation loops: + obs_ev_res <- abind(obs_ev_res, obs_ev, + along = length(dim(obs_ev)) + 1) + obs_tr_res <- abind(obs_tr_res, obs_tr, + along = length(dim(obs_tr)) + 1) + + # Calibrate individual models + cal_hcst_tr <- list() + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # calibrate: + cal_hcst_tr <- append(cal_hcst_tr, + list( + Calibration(exp = hcst_tr, + obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores))) + cal_hcst_ev <- #append(cal_hcst_ev, + #list( + Calibration(exp = hcst_tr, + obs = obs_tr, + exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores)#)#) + + cal_hcst_ev_res[[sys]] <- abind(cal_hcst_ev_res[[sys]], + cal_hcst_ev, + along = length(dim(cal_hcst_ev)) + 1) + # cal_hcst_tr_res[[sys]] <- abind(cal_hcst_tr_res[[sys]], + # cal_hcst_tr, +# along = length(dim(cal_hcst_tr)) + 1) + } + # compute category limits + lims_cal_hcst_tr <- Apply(cal_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), + ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims_obs_tr <- Apply(obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'bin'}), + prob_lims = categories, + ncores = ncores) + + #store results + for (ps in 1:length(categories)) { + lims_cal_hcst_tr_res[[ps]] <- abind(lims_cal_hcst_tr_res[[ps]], + lims_cal_hcst_tr[[ps]], + along = length(dim(lims_cal_hcst_tr[[ps]])) + 1) + lims_obs_tr_res[[ps]] <- abind(lims_obs_tr_res[[ps]], + lims_obs_tr[[ps]], + along = length(dim(lims_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Calibration Cross-validation loop ended #####") + gc() + # Add dim names: + cal_hcst_ev_res <- lapply(cal_hcst_ev_res, function(x) { + dim(x) <- dim(x)[-1] + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(obs_ev_res)) <- ev_dim_names + names(dim(obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast calibration: + cal_fcst <- lims_fcst <- NULL + if (!is.null(data$fcst)) { + for (sys in 1:length(data$hcst)) { + cal_fcst <- append(cal_fcst, + list(Calibration(exp = data$hcst[[sys]]$data, + obs = data$obs$data, + exp_cor = data$fcst[[sys]]$data, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores))) + } + names(cal_fcst) <- names(data$hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(cal_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'bin'), + lapply(cal_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_cal_hcst_tr[[ps]]), + cal_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 + obs_probs_ev[[ps]] <- GetProbs(obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) +fcst_probs <- NULL + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), + cal_fcst), + target_dims = target_dims_list, + fun = function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores) + } + } + return(list(hcst = cal_hcst_ev_res, obs = obs_ev_res, fcst = cal_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_cal_hcst_tr_res, + obs_tr = lims_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_probs = hcst_probs_ev, + obs_probs = obs_probs_ev, probs_fcst = fcst_probs), + ref_obs_tr = obs_tr_res)) +} +##### TO SAVE edit the lines below: + # Convert to s2dv_cubes the resulting anomalies +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res + +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } + # Save hindcast +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } + # Save probability bins +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL + # Make categories rounded number to use as names: +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) + + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") + # TODO Forecast + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } + # Save ensemble mean for multimodel option: + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R index 89afff40..fd7fd0fe 100644 --- a/modules/Crossval/Crossval_multimodel_anomalies.R +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -100,7 +100,7 @@ Crossval_multimodel_anomalies <- function(recipe, data) { lapply(prob_lims, function(ps) { quantile(as.vector(res), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, @@ -108,7 +108,7 @@ Crossval_multimodel_anomalies <- function(recipe, data) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x){'cat'}), + output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, ncores = ncores) @@ -133,9 +133,9 @@ Crossval_multimodel_anomalies <- function(recipe, data) { ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') for(ps in 1:length(categories)) { - names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('bin', orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') - names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('bin', tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') @@ -157,7 +157,7 @@ Crossval_multimodel_anomalies <- function(recipe, data) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { quantile(as.vector(res), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) } @@ -165,7 +165,7 @@ Crossval_multimodel_anomalies <- function(recipe, data) { # Compute Probabilities for (ps in 1:length(categories)) { # create a list of unknown length of systems and limits: - target_dims_list <- append(list(lims = 'cat'), + target_dims_list <- append(list(lims = 'bin'), lapply(ano_hcst_ev_res, function(x) { c('syear', 'ensemble')})) hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_ano_hcst_tr[[ps]]), @@ -176,14 +176,14 @@ Crossval_multimodel_anomalies <- function(recipe, data) { names(dim(res)) <- c('syear', 'ensemble') GetProbs(res, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims)}, ncores = ncores)$output1 obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_ano_obs_tr_res[[ps]], @@ -197,7 +197,7 @@ fcst_probs <- NULL # function(lims, fcst) { fcst_probs[[ps]] <- GetProbs(ano_fcst, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_fcst[[ps]], diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R index 7f6a7292..2adfb361 100644 --- a/modules/Crossval/Crossval_multimodel_metrics.R +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -36,8 +36,8 @@ Crossval_multimodel_metrics <- function(recipe, alpha <- 0.05 } - requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, - ", | |,")[[1]] + requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]]) skill_metrics <- list() if (!is.null(data)) { # conver $data elements to list to use multiApply: @@ -234,12 +234,12 @@ Crossval_multimodel_metrics <- function(recipe, if ('rps' %in% requested_metrics) { rps <- RPS(exp = data$probs$hcst[[ps]], obs = data$probs$obs[[ps]], memb_dim = NULL, - cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', Fair = Fair, nmemb = nmemb, ncores = ncores) rps_clim <- Apply(list(data$probs$obs[[ps]]), - target_dims = c('cat', 'syear'), - RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + target_dims = c('bin', 'syear'), + RPS_clim, bin_dim_abs = 'bin', Fair = Fair, cross.val = FALSE, ncores = ncores)$output1 skill_metrics$rps <- rps skill_metrics$rps_clim <- rps_clim @@ -254,7 +254,7 @@ Crossval_multimodel_metrics <- function(recipe, obs = data$probs$obs[[ps]], ref = NULL, # ref is 1/3 by default if terciles time_dim = 'syear', memb_dim = NULL, - cat_dim = 'cat', nmemb = nmemb, + cat_dim = 'bin', nmemb = nmemb, dat_dim = NULL, prob_thresholds = categories[[ps]], # un use param when providing probs indices_for_clim = NULL, diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 1e9c332a..e4e6e4ba 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -66,7 +66,6 @@ load_decadal <- function(recipe) { if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } - #------------------------- # derived from above: #------------------------- diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index e15928d9..07ecda9c 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -17,7 +17,6 @@ plot_most_likely_terciles <- function(recipe, dots, outdir, output_conf) { - ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { @@ -85,6 +84,37 @@ plot_most_likely_terciles <- function(recipe, along = c("var"), indices = var, drop = 'selected') + if ('sday' %in% names(dim(var_probs))) { + if (dim(var_probs)['sday'] == 1) { + var_probs <- ClimProjDiags::Subset(var_probs, + along = "sday", + indices = 1, + drop = 'selected') + } else { + stop("'sday' dimension present in fcst probs") + } + } + if ('sweek' %in% names(dim(var_probs))) { + if (dim(var_probs)['sweek'] == 1) { + var_probs <- ClimProjDiags::Subset(var_probs, + along = "sweek", + indices = 1, + drop = 'selected') + } else { + stop("'sweek' dimension present in fcst probs") + } + } + if ('dat' %in% names(dim(var_probs))) { + if (dim(var_probs)['dat'] == 1) { + var_probs <- ClimProjDiags::Subset(var_probs, + along = "dat", + indices = 1, + drop = 'selected') + } else { + stop("'dat' dimension present in fcst probs") + } + } + var_probs <- Reorder(var_probs, c("syear", "time", "bin", "longitude", "latitude")) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 3750baea..b86affa3 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -25,7 +25,6 @@ Visualization <- function(recipe, # s2dv_cube objects # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - # Try to set default configuration if not specified by user if (is.null(output_conf) && !is.null(recipe$Analysis$Region$name)) { output_conf <- read_yaml("modules/Visualization/output_size.yml", diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 76631e4b..c282679d 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -37,5 +37,12 @@ region: #units inches skill_metrics: width: 8 height: 5 - + PlotEquiMap: + most_likely_terciles: + width: 8.5 + height: 8.5 + dot_size: 2 + # xlonshft: 180 + plot_margin: !expr c(0, 4.1, 4.1, 2.1) + # Add other regions diff --git a/recipe_prlr.yml b/recipe_prlr.yml index b0ad1fad..706d8850 100644 --- a/recipe_prlr.yml +++ b/recipe_prlr.yml @@ -26,7 +26,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0901' + sdate: '1101' fcst_year: '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' diff --git a/recipe_seas5_oper.yml b/recipe_seas5_oper.yml index b3f501af..0ca42408 100644 --- a/recipe_seas5_oper.yml +++ b/recipe_seas5_oper.yml @@ -24,7 +24,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0901' + sdate: '1001' fcst_year: '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' diff --git a/recipe_tas.yml b/recipe_tas.yml index 98821be3..1c579002 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -14,10 +14,10 @@ Analysis: #- {name: 'Meteo-France-System8'} #- {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} - #- {name: 'UK-MetOffice-Glosea601'} - #- {name: 'NCEP-CFSv2'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} #- {name: 'DWD-GCFS2.1'} - #- {name: 'ECCC-CanCM4i'} + #- {name: 'ECCC-GEM5.2-NEMO'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -26,24 +26,22 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0901' + sdate: '1101' fcst_year: '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - latmin: -90 - latmax: 90 - lonmin: -180 #0 #-179.9 - lonmax: -179.9 #359.9 #180 + - {name: 'Global', latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: - compute: yes + compute: no cross_validation: no save: none Time_aggregation: @@ -52,7 +50,7 @@ Analysis: ini: [1, 2, 3, 4] end: [3, 4, 5, 6] Calibration: - method: raw # Mandatory, str: Calibration method. See docu. + method: evmos #crps_min #evmos # Mandatory, str: Calibration method. See docu. cross_validation: yes save: none Skill: diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml index e5169dbe..3edfa9c9 100644 --- a/recipe_tas_decadal.yml +++ b/recipe_tas_decadal.yml @@ -11,13 +11,13 @@ Analysis: flux: no Datasets: System: - - {name: 'EC-Earth3-i4'} - - {name: 'HadGEM3-GC31-MM'} - - {name: 'BCC-CSM2-MR'} - - {name: 'CanESM5'} - - {name: 'CMCC-CM2-SR5'} - - {name: 'FGOALS-f3-L'} - - {name: 'IPSL-CM6A-LR'} + - {name: 'EC-Earth3-i4', member: 'all'} + # - {name: 'HadGEM3-GC31-MM'} + #- {name: 'BCC-CSM2-MR'} + #- {name: 'CanESM5'} + #- {name: 'CMCC-CM2-SR5'} + #- {name: 'FGOALS-f3-L'} + #- {name: 'IPSL-CM6A-LR'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes diff --git a/recipe_tas_singl.yml b/recipe_tas_singl.yml index be20a311..695d7fd3 100644 --- a/recipe_tas_singl.yml +++ b/recipe_tas_singl.yml @@ -11,10 +11,10 @@ Analysis: flux: no Datasets: System: - #- {name: 'ECMWF-SEAS5.1'} + - {name: 'ECMWF-SEAS5.1'} #- {name: 'Meteo-France-System8'} #- {name: 'CMCC-SPS3.5'} - name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + # name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: no approach: pooled # Mandatory, bool: Either yes/true or no/false @@ -22,8 +22,8 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0101' - fcst_year: #'2021' + sdate: '0901' + fcst_year: '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months @@ -43,9 +43,12 @@ Analysis: cross_validation: no save: all Time_aggregation: - execute: no + execute: yes + method: average + ini: [1,2,3,4] + end: [3,4,5,6] Calibration: - method: raw # Mandatory, str: Calibration method. See docu. + method: mse_min # Mandatory, str: Calibration method. See docu. cross_validation: yes save: none Skill: diff --git a/recipe_tasv2.yml b/recipe_tasv2.yml new file mode 100644 index 00000000..9350a407 --- /dev/null +++ b/recipe_tasv2.yml @@ -0,0 +1,111 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1101' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: evmos #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 20 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/sunset.sh b/sunset.sh index 43441ada..b47c3d88 100644 --- a/sunset.sh +++ b/sunset.sh @@ -14,5 +14,7 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R diff --git a/sunsetv2.sh b/sunsetv2.sh new file mode 100644 index 00000000..963be381 --- /dev/null +++ b/sunsetv2.sh @@ -0,0 +1,21 @@ +#!/bin/bash +#SBATCH -n 112 +#SBATCH -N 1 +#SBATCH -t 24:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=lowmem + + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tasv2.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R + diff --git a/tools/add_logo.R b/tools/add_logo.R index ad96cf1a..2a44275f 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -1,4 +1,4 @@ -add_logo <- function(recipe, logo) { +add_logo <- function(recipe, logo, logo_resize_percentage = 0.25) { # recipe: SUNSET recipe # logo: URL to the logo system <- list.files(paste0(recipe$Run$output_dir, "/plots/")) @@ -10,7 +10,40 @@ add_logo <- function(recipe, logo) { full.names = TRUE) })[[1]] dim(files) <- c(file = length(files)) - Apply(list(files), target_dims = NULL, function(x) { - system(paste("composite -gravity southeast -geometry +10+10", - logo, x, x))}, ncores = recipe$Analysis$ncores) + + Apply(list(files), target_dims = NULL, + fun = function(x, logo, logo_resize_percentage) { + fig_width <- as.numeric(system(paste("identify -format '%w'", x), + intern = TRUE)) + logo_height <- as.numeric(system(paste("convert", logo, + "-resize", fig_width * 0.1, "-format '%h' info:", sep=" "), + intern = TRUE)) + system(paste0("convert ", x, + " -gravity south -background white -splice 0x", + logo_height, " extended_fig.png")) + system(paste0("convert extended_fig.png \\( ", + logo, " -resize ", fig_width * logo_resize_percentage, + " \\) -gravity southeast -composite ", + x)) + }, + logo = logo, + logo_resize_percentage = logo_resize_percentage, + ncores = 1) + file.remove("extended_fig.png") } + + +#fig <- "/home/bsc/bsc032339/recipe_tas_20241106160039/plots/Meto-France-System8/ERA5/evmos/tas/forecast_most_likely_tercile-20240801_ft01.png" + +#fig <- "/home/bsc/bsc032339/recipe_tas_20241106160039/plots/Meto-France-System8/ERA5/evmos/tas/forecast_ensemble_median-20240801_ft01.png" + +#fig <- "/home/bsc/bsc032339/recipe_tas_20241106160039/plots/Meto-France-System8/ERA5/evmos/tas/rpss-august_ft01.png" + +#fig_width <- as.numeric(system(paste("identify -format '%w'", fg), intern = TRUE)) + +#logo <- "rsz_rsz_bsc_logo.png" +#logo_height <- as.numeric(system(paste("convert", logo, "-resiz", fig_width * 0.1, "-format '%h' info:", sep=" "), intern = TRUE)) + +#system(paste0("convert ", fig, " -gravity south -background whie -splice 0x", logo_height, " extended_fig.png")) + +#system(paste0("convert extended_fig.png \\( ", logo, " -resize , map_width * 0.25, " \\) -gravity southeast -composite extended_fig.png")) -- GitLab From efde6615f8e44b36f7ffa6766f0b62cc60f2463d Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 14 Nov 2024 10:22:25 +0100 Subject: [PATCH 51/78] name 'bin' dimension --- modules/Crossval/Crossval_metrics.R | 8 +- recipe_prlrv2.yml | 116 ++++++++++++++++++++++++++++ sunsetv2.sh | 2 +- 3 files changed, 121 insertions(+), 5 deletions(-) create mode 100644 recipe_prlrv2.yml diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index cac2adad..d2a52820 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -63,12 +63,12 @@ Crossval_metrics <- function(recipe, data_crossval, if ('rps' %in% requested_metrics) { rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, - cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', Fair = fair, nmemb = nmemb, ncores = ncores) rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), - target_dims = c('cat', 'syear'), - RPS_clim, bin_dim_abs = 'cat', Fair = fair, + target_dims = c('bin', 'syear'), + RPS_clim, bin_dim_abs = 'bin', Fair = fair, cross.val = FALSE, ncores = ncores)$output1 skill_metrics$rps <- rps skill_metrics$rps_clim <- rps_clim @@ -83,7 +83,7 @@ Crossval_metrics <- function(recipe, data_crossval, obs = data_crossval$probs$obs_ev[[1]], ref = NULL, # ref is 1/3 by default if terciles time_dim = 'syear', memb_dim = NULL, - cat_dim = 'cat', nmemb = nmemb, + cat_dim = 'bin', nmemb = nmemb, dat_dim = NULL, prob_thresholds = categories[[ps]], indices_for_clim = NULL, diff --git a/recipe_prlrv2.yml b/recipe_prlrv2.yml new file mode 100644 index 00000000..47e0989f --- /dev/null +++ b/recipe_prlrv2.yml @@ -0,0 +1,116 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + units: mm + flux: no + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea601'} + #- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-CanCM4i'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1101' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + name: 'Global' + latmin: -90 + latmax: 90 + lonmin: 0 #-179.9 + lonmax: 359.9 #180 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias enscorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + #dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 20 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/sunsetv2.sh b/sunsetv2.sh index 963be381..7150d7e6 100644 --- a/sunsetv2.sh +++ b/sunsetv2.sh @@ -15,7 +15,7 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tasv2.yml +Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_prlrv2.yml #Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From a4ab37f25bf1cc7270b3173fc346eacc9e50fc42 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Tue, 26 Nov 2024 18:11:11 +0100 Subject: [PATCH 52/78] bin and save --- full_ecvs_calibration.R | 14 +++++++++++++ modules/Crossval/Crossval_Calibration.R | 28 +++++++++++++------------ modules/Crossval/Crossval_anomalies.R | 14 +++++++++---- modules/Crossval/Crossval_metrics.R | 5 +++-- recipe_prlrv2.yml | 2 +- recipe_tas.yml | 10 ++++----- recipe_tasv2.yml | 2 +- sunset.sh | 4 ++-- sunsetv2.sh | 2 +- 9 files changed, 52 insertions(+), 29 deletions(-) diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R index 87b21574..455316c9 100644 --- a/full_ecvs_calibration.R +++ b/full_ecvs_calibration.R @@ -16,6 +16,20 @@ data_summary(data$hcst, recipe) data_summary(data$obs, recipe) data_summary(data$fcst, recipe) +#### Reorde for MostLikely to plot EU centered: + data$hcst <- CST_Subset(data$hcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$fcst <- CST_Subset(data$fcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$obs <- CST_Subset(data$obs, along = 'longitude', + indices=c(181:360, 1:180)) + data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 + data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 + data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + #### + + + data_agg <- Aggregation(recipe = recipe, data = data) data_summary(data_agg$hcst, recipe) data_summary(data_agg$obs, recipe) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 64e44701..b3c5f37d 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -91,14 +91,16 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, + function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x){'cat'}), + output_dims = lapply(categories, + function(x){'bin'}), prob_lims = categories, ncores = ncores) @@ -136,10 +138,10 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', indices = 1, drop = 'selected') for(ps in 1:length(categories)) { - names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', 'time', 'latitude', 'longitude', 'syear') - names(dim(lims_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', 'time', 'latitude', 'longitude', 'unneeded', 'syear') lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], @@ -183,21 +185,21 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims <- Apply(data$obs$data, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'cat'}), + output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) tmp_lims2 <- list() # What to save hcst category limits or obs category limits? for(ps in 1:length(categories)) { tmp_lims3 <- drop(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['cat']) { + for (l in 1:dim(lims[[ps]])['bin']) { tmp_lims <- tmp_lims3[l,,,] if (!('var' %in% names(dim(tmp_lims)))) { dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) @@ -217,14 +219,14 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 for (ps in 1:length(categories)) { hcst_probs_ev[[ps]] <- GetProbs(cal_hcst_ev_res, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_cal_hcst_tr_res[[ps]], ncores = ncores) obs_probs_ev[[ps]] <- GetProbs(data$obs$data, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_obs_tr_res[[ps]], @@ -232,7 +234,7 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 if (!is.null(data$fcst)) { fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', prob_thresholds = NULL, - bin_dim_abs = 'cat', + bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_fcst[[ps]], @@ -277,14 +279,14 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 "_to_", categories[[ps]][perc]) } probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), + along = 'bin', indices = perc, drop = 'all')), probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), + along = 'bin', indices = perc, drop = 'all')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'cat', indices = perc, drop = 'all')), + along = 'bin', indices = perc, drop = 'all')), probs_fcst) } all_names <- c(all_names, name_elem) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index c99d38bf..606368b7 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -82,15 +82,21 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'bin'}), + res <- quantile(as.vector(x), + ps, na.rm = na.rm) + dim(res) <- c(bin = length(res)) + return(res) + })}, prob_lims = categories, ncores = ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x){'bin'}), + res <- quantile(as.vector(x), ps, + na.rm = na.rm) + dim(res) <- c(bin = length(res)) + return(res) + })}, prob_lims = categories, ncores = ncores) #store results diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index d2a52820..addbb49b 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -245,11 +245,12 @@ Crossval_metrics <- function(recipe, data_crossval, return(res) }) # Save metrics - save_metrics(recipe = recipe, + if (recipe$Analysis$Workflow$Skill$save == TRUE) { + save_metrics(recipe = recipe, metrics = skill_metrics, data_cube = data_crossval$hcst, agg = 'global', outdir = recipe$Run$output_dir) - + } recipe$Run$output_dir <- original # reduce dimension to work with Visualization module: skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) diff --git a/recipe_prlrv2.yml b/recipe_prlrv2.yml index 47e0989f..acfa708b 100644 --- a/recipe_prlrv2.yml +++ b/recipe_prlrv2.yml @@ -53,7 +53,7 @@ Analysis: ini: [1, 2, 3, 4] end: [3, 4, 5, 6] Calibration: - method: raw # Mandatory, str: Calibration method. See docu. + method: evmos # Mandatory, str: Calibration method. See docu. cross_validation: yes save: none Skill: diff --git a/recipe_tas.yml b/recipe_tas.yml index 1c579002..c6589a06 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,13 +11,13 @@ Analysis: flux: no Datasets: System: - #- {name: 'Meteo-France-System8'} - #- {name: 'CMCC-SPS3.5'} + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} #- {name: 'UK-MetOffice-Glosea603'} ##- {name: 'NCEP-CFSv2'} - #- {name: 'DWD-GCFS2.1'} - #- {name: 'ECCC-GEM5.2-NEMO'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-GEM5.2-NEMO'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -84,7 +84,7 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 100 # Optional, int: number of cores, defaults to 1 + ncores: 30 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE alpha: 0.05 Output_format: scorecards diff --git a/recipe_tasv2.yml b/recipe_tasv2.yml index 9350a407..db0d2a07 100644 --- a/recipe_tasv2.yml +++ b/recipe_tasv2.yml @@ -50,7 +50,7 @@ Analysis: ini: [1, 2, 3, 4] end: [3, 4, 5, 6] Calibration: - method: evmos #crps_min #evmos # Mandatory, str: Calibration method. See docu. + method: mse_min #crps_min #evmos # Mandatory, str: Calibration method. See docu. cross_validation: yes save: none Skill: diff --git a/sunset.sh b/sunset.sh index b47c3d88..fcf712a0 100644 --- a/sunset.sh +++ b/sunset.sh @@ -14,7 +14,7 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml -#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R diff --git a/sunsetv2.sh b/sunsetv2.sh index 7150d7e6..80f91312 100644 --- a/sunsetv2.sh +++ b/sunsetv2.sh @@ -15,7 +15,7 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_prlrv2.yml +Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_tasv2.yml #Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From 78471d72a685013b2edf24fa8443618a6d3413bc Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 4 Dec 2024 14:39:44 +0100 Subject: [PATCH 53/78] Decadal recipe updated --- recipe_tas_decadal.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml index 3edfa9c9..86bcec08 100644 --- a/recipe_tas_decadal.yml +++ b/recipe_tas_decadal.yml @@ -47,7 +47,9 @@ Analysis: method: average user_def: Y1: [1, 12] # aggregate from 1 to 3 forecast times - Y1-JJA: [6, 8] + Y1-Y5: [1, 60] + Y6-Y10: [61, 120] + #Y1-JJA: [6, 8] #Y2-Y5: [13, 60] #Y5-Y10: [49, 120] Anomalies: -- GitLab From 22e3ac129723ffefa2f93e45c1bfcb2287c8c3b4 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Fri, 6 Dec 2024 11:14:07 +0100 Subject: [PATCH 54/78] working calibration --- full_ecvs_anomalies.R | 36 +-- full_ecvs_calibration.R | 33 ++- .../{old_modules => }/Anomalies/Anomalies.R | 0 .../Calibration/Calibration.R | 0 modules/Crossval/Crossval_Calibration.R | 235 +++++++++++++----- modules/Crossval/Crossval_metrics.R | 32 +++ .../Downscaling/Downscaling.R | 0 .../Downscaling/tmp/Analogs.R | 0 .../{old_modules => }/Downscaling/tmp/Intbc.R | 0 .../Downscaling/tmp/Interpolation.R | 0 .../{old_modules => }/Downscaling/tmp/Intlr.R | 0 .../Downscaling/tmp/LogisticReg.R | 0 .../{old_modules => }/Downscaling/tmp/Utils.R | 0 modules/{old_modules => }/Indices/Indices.R | 0 .../{old_modules => }/Indices/R/compute_nao.R | 0 .../Indices/R/compute_nino.R | 0 .../Indices/R/correlation_eno.R | 0 .../Indices/R/drop_indices_dims.R | 0 .../Indices/R/plot_deterministic_forecast.R | 0 .../{old_modules => }/Multimodel/Multimodel.R | 0 .../Multimodel/Multimodel_skill.R | 0 .../Multimodel/build_multimodel.R | 0 .../Multimodel/clean_multimodel.R | 0 .../Multimodel/load_multimodel.R | 0 .../Multimodel/load_multimodel_mean.R | 0 .../Multimodel/load_multimodel_probs.R | 0 .../{old_modules => }/Statistics/Statistics.R | 0 recipe_ecvs_ano_seas.yml | 109 ++++++++ recipe_tas.yml => recipe_ecvs_cal_seas.yml | 14 +- recipe_subseasonal_ecvs.yml | 139 +++++++++++ recipe_tas_singl_ano_seas.yml | 106 ++++++++ ...singl.yml => recipe_tas_singl_cal_seas.yml | 8 +- sunset.sh | 6 +- sunsetv2.sh | 4 +- 34 files changed, 610 insertions(+), 112 deletions(-) rename modules/{old_modules => }/Anomalies/Anomalies.R (100%) rename modules/{old_modules => }/Calibration/Calibration.R (100%) rename modules/{old_modules => }/Downscaling/Downscaling.R (100%) rename modules/{old_modules => }/Downscaling/tmp/Analogs.R (100%) rename modules/{old_modules => }/Downscaling/tmp/Intbc.R (100%) rename modules/{old_modules => }/Downscaling/tmp/Interpolation.R (100%) rename modules/{old_modules => }/Downscaling/tmp/Intlr.R (100%) rename modules/{old_modules => }/Downscaling/tmp/LogisticReg.R (100%) rename modules/{old_modules => }/Downscaling/tmp/Utils.R (100%) rename modules/{old_modules => }/Indices/Indices.R (100%) rename modules/{old_modules => }/Indices/R/compute_nao.R (100%) rename modules/{old_modules => }/Indices/R/compute_nino.R (100%) rename modules/{old_modules => }/Indices/R/correlation_eno.R (100%) rename modules/{old_modules => }/Indices/R/drop_indices_dims.R (100%) rename modules/{old_modules => }/Indices/R/plot_deterministic_forecast.R (100%) rename modules/{old_modules => }/Multimodel/Multimodel.R (100%) rename modules/{old_modules => }/Multimodel/Multimodel_skill.R (100%) rename modules/{old_modules => }/Multimodel/build_multimodel.R (100%) rename modules/{old_modules => }/Multimodel/clean_multimodel.R (100%) rename modules/{old_modules => }/Multimodel/load_multimodel.R (100%) rename modules/{old_modules => }/Multimodel/load_multimodel_mean.R (100%) rename modules/{old_modules => }/Multimodel/load_multimodel_probs.R (100%) rename modules/{old_modules => }/Statistics/Statistics.R (100%) create mode 100644 recipe_ecvs_ano_seas.yml rename recipe_tas.yml => recipe_ecvs_cal_seas.yml (93%) create mode 100644 recipe_subseasonal_ecvs.yml create mode 100644 recipe_tas_singl_ano_seas.yml rename recipe_tas_singl.yml => recipe_tas_singl_cal_seas.yml (96%) diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index ed0dcc53..97bcd115 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -6,7 +6,7 @@ source("modules/Visualization/Visualization.R") source("modules/Aggregation/Aggregation.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_tas_singl.yml" +#recipe_file <- "recipe_tas_singl_ano_seas.yml" #recipe <- read_atomic_recipe(recipe_file) recipe <- prepare_outputs(recipe_file) # Load datasets @@ -17,15 +17,17 @@ data_summary(data$obs, recipe) data_summary(data$fcst, recipe) #### Reorde for MostLikely to plot EU centered: - data$hcst <- CST_Subset(data$hcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$fcst <- CST_Subset(data$fcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$obs <- CST_Subset(data$obs, along = 'longitude', - indices=c(181:360, 1:180)) - data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 - data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 - data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + if (tolower(recipe$Analysis$Region$name) == 'global') { + data$hcst <- CST_Subset(data$hcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$fcst <- CST_Subset(data$fcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$obs <- CST_Subset(data$obs, along = 'longitude', + indices=c(181:360, 1:180)) + data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 + data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 + data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + } #### @@ -44,15 +46,13 @@ skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, # Required to plot a forecast: data_agg$fcst <- res$fcst tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) -dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = 4, latitude = 180, longitude = 360) +nlats <- as.numeric(dim(data_agg$hcst$data)['latitude']) +nlons <- as.numeric(dim(data_agg$hcst$data)['longitude']) +ntimes <- as.numeric(dim(data_agg$hcst$data)['time']) -#tmp_probs$probs_fcst <- list() -#tmp_probs$probs_fcst$prob_b33 <- res$probs$fcst[[1]][1,1,1,1,1,1,,,] -#tmp_probs$probs_fcst$prob_33_to_66 <- res$probs$fcst[[1]][2,1,1,1,1,1,,,] -#tmp_probs$probs_fcst$prob_a66 <- res$probs$fcst[[1]][3,1,1,1,1,1,,,] -#tmp_probs$probs_fcst$prob_b33 <- InsertDim(tmp_probs$probs_fcst$prob_b33, len = 1, pos = 1, name = 'var') -#tmp_probs$probs_fcst$prob_33_to_66 <- InsertDim(tmp_probs$probs_fcst$prob_33_to_66, len = 1, pos = 1, name = 'var') -#tmp_probs$probs_fcst$prob_a66 <- InsertDim(tmp_probs$probs_fcst$prob_a66, len = 1, pos = 1, name = 'var') +# TODO: consider more than terciles: +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, + latitude = nlats, longitude = nlons) Visualization(recipe = recipe, data = data_agg, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R index 455316c9..64f8d78b 100644 --- a/full_ecvs_calibration.R +++ b/full_ecvs_calibration.R @@ -6,7 +6,7 @@ source("modules/Visualization/Visualization.R") source("modules/Aggregation/Aggregation.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_tas_singl.yml" +#recipe_file <- "recipe_tas_singl_cal_seas.yml" #recipe <- read_atomic_recipe(recipe_file) recipe <- prepare_outputs(recipe_file) # Load datasets @@ -16,20 +16,21 @@ data_summary(data$hcst, recipe) data_summary(data$obs, recipe) data_summary(data$fcst, recipe) -#### Reorde for MostLikely to plot EU centered: - data$hcst <- CST_Subset(data$hcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$fcst <- CST_Subset(data$fcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$obs <- CST_Subset(data$obs, along = 'longitude', - indices=c(181:360, 1:180)) - data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 - data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 - data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + #### Reorde for MostLikely to plot EU centered: + if (tolower(recipe$Analysis$Region$name) == 'global') { + data$hcst <- CST_Subset(data$hcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$fcst <- CST_Subset(data$fcst, along = 'longitude', + indices=c(181:360, 1:180)) + data$obs <- CST_Subset(data$obs, along = 'longitude', + indices=c(181:360, 1:180)) + data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 + data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 + data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + } #### - data_agg <- Aggregation(recipe = recipe, data = data) data_summary(data_agg$hcst, recipe) data_summary(data_agg$obs, recipe) @@ -45,7 +46,13 @@ skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, # Required to plot a forecast: data_agg$fcst <- res$fcst tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) -dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = 4, latitude = 180, longitude = 360) +nlats <- as.numeric(dim(data_agg$hcst$data)['latitude']) +nlons <- as.numeric(dim(data_agg$hcst$data)['longitude']) +ntimes <- as.numeric(dim(data_agg$hcst$data)['time']) + +# TODO: consider more than terciles +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, + latitude = nlats, longitude = nlons) Visualization(recipe = recipe, data = data_agg, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) diff --git a/modules/old_modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R similarity index 100% rename from modules/old_modules/Anomalies/Anomalies.R rename to modules/Anomalies/Anomalies.R diff --git a/modules/old_modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R similarity index 100% rename from modules/old_modules/Calibration/Calibration.R rename to modules/Calibration/Calibration.R diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index b3c5f37d..a8fea870 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -1,6 +1,5 @@ # take the output of Flor/s2s/subseasonal_loading.R -#source("../git/sunset/modules/Crossval/R/tmp/GetProbs.R") source("modules/Crossval/R/tmp/GetProbs.R") Crossval_Calibration <- function(recipe, data) { @@ -59,16 +58,17 @@ Crossval_Calibration <- function(recipe, data) { indices = cross[[t]]$eval.dexes) obs_ev <- Subset(data$obs$data, along = 'syear', indices = cross[[t]]$eval.dexes) - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { -source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4a191e9a0362b56a5f0c/R/CST_MergeDims.R") - hcst_tr <- CST_MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), + hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) - obs_tr <- CST_MergeDims(obs_tr, merge_dims = c('sday', 'syear'), + + obs_tr <- MergeDims(obs_tr, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) - hcst_ev <- CST_MergeDims(hcst_ev, merge_dims = c('sday', 'syear'), + + hcst_ev <- MergeDims(hcst_ev, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) + } cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, cal.method = cal_method, @@ -86,13 +86,12 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 apply_to = NULL, alpha = NULL, ncores = ncores) - lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, output_dims = lapply(categories, - function(x) {'bin'}), + function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), @@ -100,10 +99,25 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, output_dims = lapply(categories, - function(x){'bin'}), + function(x){'bin'}), prob_lims = categories, ncores = ncores) + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + cal_hcst_ev <- SplitDim(cal_hcst_ev, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$eval.dexes))) + obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + + } + #store results cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, along = length(dim(cal_hcst_ev)) + 1) @@ -127,26 +141,57 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 info(recipe$Run$logger, "#### Anomalies Cross-validation loop ended #####") gc() - names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'sday', 'sweek', 'unneeded', 'time', - 'latitude', 'longitude', 'ensemble', 'syear') - names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'loop', 'time', - 'latitude', 'longitude', 'ensemble', 'syear') - names(dim(obs_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', - 'latitude', 'longitude', 'unneeded', 'syear') - obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', - indices = 1, drop = 'selected') - cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', - indices = 1, drop = 'selected') - for(ps in 1:length(categories)) { - names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', - 'time', 'latitude', 'longitude', - 'syear') - names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', - 'time', 'latitude', 'longitude', - 'unneeded', 'syear') - lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], - along = 'unneeded', indices = 1, - drop = 'selected') + + if (tolower(recipe$Analysis$Horizon) %in% c('seasonal', 'decadal')) { + names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'sday', 'sweek', + 'unneeded', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'loop', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + names(dim(obs_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'latitude', 'longitude', 'unneeded', 'syear') + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', + 'sweek', + 'time', 'latitude', 'longitude', + 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', + 'unneeded', 'syear') + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, + drop = 'selected') + } + } else { #if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'unneeded', 'sweek', + 'time', 'latitude', 'longitude', 'ensemble', + 'sday', 'syear') + names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'loop', 'sweek', 'time', + 'latitude', 'longitude', 'ensemble', 'sday', + 'syear') + names(dim(obs_tr_res)) <- c('dat', 'var', 'ensemble', 'sweek', 'time', + 'latitude', 'longitude', 'unneeded', + 'sday', 'syear') + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + + for (ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sweek', + 'time', 'latitude', 'longitude', + 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sweek', + 'time', 'latitude', 'longitude', + 'unneeded', 'syear') + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, + drop = 'selected') + } } # Make categories rounded number to use as names: @@ -159,26 +204,62 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 # Forecast calibration: if (!is.null(data$fcst)) { - data$fcst$data <- Calibration(exp = data$hcst$data, - obs = data$obs$data, - exp_cor = data$fcst$data, - cal.method = cal_method, - multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) - hcst_cal <- Calibration(exp = data$hcst$data, - obs = data$obs$data, - cal.method = cal_method, - eval.method = 'in-sample', - multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) + if (tolower(recipe$Analysis$Horizon) %in% c('seasonal', 'decadal')) { + data$fcst$data <- Calibration(exp = data$hcst$data, + obs = data$obs$data, + exp_cor = data$fcst$data, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- Calibration(exp = data$hcst$data, + obs = data$obs$data, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + } else { # if subseasonal + # merge sample dimensions and select central week + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, + drop = 'selected') + fcst_cal <- Calibration(exp = hcst, + obs = obs, + exp_cor = fcst, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- Calibration(exp = hcst, + obs = obs, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + + + } # Terciles limits using the whole hindcast period: lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), @@ -197,6 +278,9 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 ncores = ncores) tmp_lims2 <- list() # What to save hcst category limits or obs category limits? +# TODO saving: +recipe$Analysis$Workflow$Probabilities$save <- FALSE +if (recipe$Analysis$Workflow$Probabilities$save) { for(ps in 1:length(categories)) { tmp_lims3 <- drop(lims[[ps]]) for (l in 1:dim(lims[[ps]])['bin']) { @@ -214,23 +298,27 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 } } } - +} # Compute Probabilities for (ps in 1:length(categories)) { - hcst_probs_ev[[ps]] <- GetProbs(cal_hcst_ev_res, time_dim = 'syear', + # Get only the probabilities of the central day in sday + central_day <- (dim(cal_hcst_ev_res)['sday'] + 1)/2 + tmp <- Subset(cal_hcst_ev_res, along = 'sday', indices = central_day) + hcst_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', prob_thresholds = NULL, bin_dim_abs = 'bin', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_cal_hcst_tr_res[[ps]], ncores = ncores) - obs_probs_ev[[ps]] <- GetProbs(data$obs$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'bin', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = lims_obs_tr_res[[ps]], - ncores = ncores) + tmp <- Subset(data$obs$data, along = 'sday', indices = central_day) + obs_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) if (!is.null(data$fcst)) { fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', prob_thresholds = NULL, @@ -245,9 +333,30 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 hcst <- data$hcst hcst$data <- cal_hcst_ev_res + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + # Keep only de central sday position from the dimension + if (dim(hcst$data)['sday'] > 1) { + hcst <- CST_Subset(hcst, along = 'sday', + indices = (dim(hcst$data)['sday'] + 1) / 2) + } + if (dim(data$obs$data)['sday'] > 1) { + data$obs <- CST_Subset(data$obs, along = 'sday', + indices = (dim(data$obs$data)['sday'] + 1) / 2) + } + if (dim(data$hcst$data)['sday'] > 1) { + data$hcst <- CST_Subset(data$hcst, along = 'sday', + indices = (dim(data$hcst$data)['sday'] + 1) / 2) + } + if (dim(obs_tr_res)['sday'] > 1) { + obs_tr_res <- Subset(obs_tr_res, along = 'sday', + indices = (dim(obs_tr_res)['sday'] + 1) / 2) + } + } info(recipe$Run$logger, - "#### Calibrated and Probabilities Done #####") - if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + "#### Calibrated and Probabilities Done #####") +# TODO saving: +recipe$Analysis$Workflow$Calibration$save <- FALSE + if (recipe$Analysis$Workflow$Calibration$save != FALSE) { info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") # recipe$Run$output_dir <- paste0(recipe$Run$output_dir, # "/outputs/Anomalies/") @@ -330,14 +439,9 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 data_cube = data$fcst, agg = agg, type = "fcst") } - hcst_EM <- MeanDims(hcst$data, 'ensemble', drop = T) - if (!is.null(data$fcst)) { - fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) - } return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, - hcst_EM = hcst_EM, fcst_EM = fcst_EM, #cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, # obs_tr = lims_ano_obs_tr_res), probs = list(hcst_ev = hcst_probs_ev, @@ -346,3 +450,4 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 ref_obs_tr = obs_tr_res)) } + diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index addbb49b..2aa7b4b6 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -46,6 +46,7 @@ Crossval_metrics <- function(recipe, data_crossval, # TODO: distinguish between rpss and bss # if 1 percentile -> bss # if more than 1 -> rpss + # TODO: for subseasonal check if the dimension sday is 1 exe_rps <- unlist(lapply(categories, function(x) { if (length(x) > 1) { x <- x[1] *100 @@ -60,6 +61,15 @@ Crossval_metrics <- function(recipe, data_crossval, ", | |,")[[1]]) # The recipe allows to requset more than only terciles: for (ps in 1:length(exe_rps)) { + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + data_crossval$probs$hcst_ev[[ps]] <- MergeDims(data_crossval$probs$hcst_ev[[ps]], + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$probs$obs_ev[[ps]] <- MergeDims(data_crossval$probs$obs_ev[[ps]], + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + } + if ('rps' %in% requested_metrics) { rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, @@ -100,6 +110,25 @@ Crossval_metrics <- function(recipe, data_crossval, # "_significance")]] <- rpss$sign } } + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + data_crossval$hcst$data <- MergeDims(data_crossval$hcst$data, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs$data <- MergeDims(data_crossval$obs$data, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$ref_obs_tr <- MergeDims(data_crossval$ref_obs_tr, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$hcst.full_val$data <- MergeDims(data_crossval$hcst.full_val$data, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs.full_val$data <- MergeDims(data_crossval$obs.full_val$data, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + } + if ('crps' %in% requested_metrics) { crps <- CRPS(exp = data_crossval$hcst$data, obs = data_crossval$obs$data, @@ -147,6 +176,7 @@ Crossval_metrics <- function(recipe, data_crossval, time_dim = 'syear', memb_dim = 'ensemble', alpha = alpha, + na.rm = na.rm, ncores = ncores) skill_metrics$mean_bias <- mean_bias$bias skill_metrics$mean_bias_significance <- mean_bias$sig @@ -160,6 +190,7 @@ Crossval_metrics <- function(recipe, data_crossval, obs = data_crossval$obs$data, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', pval = TRUE, + na.rm = na.rm, ncores = ncores) skill_metrics$SprErr <- enssprerr$ratio skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha @@ -259,3 +290,4 @@ Crossval_metrics <- function(recipe, data_crossval, return(skill_metrics) } + diff --git a/modules/old_modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R similarity index 100% rename from modules/old_modules/Downscaling/Downscaling.R rename to modules/Downscaling/Downscaling.R diff --git a/modules/old_modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R similarity index 100% rename from modules/old_modules/Downscaling/tmp/Analogs.R rename to modules/Downscaling/tmp/Analogs.R diff --git a/modules/old_modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R similarity index 100% rename from modules/old_modules/Downscaling/tmp/Intbc.R rename to modules/Downscaling/tmp/Intbc.R diff --git a/modules/old_modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R similarity index 100% rename from modules/old_modules/Downscaling/tmp/Interpolation.R rename to modules/Downscaling/tmp/Interpolation.R diff --git a/modules/old_modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R similarity index 100% rename from modules/old_modules/Downscaling/tmp/Intlr.R rename to modules/Downscaling/tmp/Intlr.R diff --git a/modules/old_modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R similarity index 100% rename from modules/old_modules/Downscaling/tmp/LogisticReg.R rename to modules/Downscaling/tmp/LogisticReg.R diff --git a/modules/old_modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R similarity index 100% rename from modules/old_modules/Downscaling/tmp/Utils.R rename to modules/Downscaling/tmp/Utils.R diff --git a/modules/old_modules/Indices/Indices.R b/modules/Indices/Indices.R similarity index 100% rename from modules/old_modules/Indices/Indices.R rename to modules/Indices/Indices.R diff --git a/modules/old_modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R similarity index 100% rename from modules/old_modules/Indices/R/compute_nao.R rename to modules/Indices/R/compute_nao.R diff --git a/modules/old_modules/Indices/R/compute_nino.R b/modules/Indices/R/compute_nino.R similarity index 100% rename from modules/old_modules/Indices/R/compute_nino.R rename to modules/Indices/R/compute_nino.R diff --git a/modules/old_modules/Indices/R/correlation_eno.R b/modules/Indices/R/correlation_eno.R similarity index 100% rename from modules/old_modules/Indices/R/correlation_eno.R rename to modules/Indices/R/correlation_eno.R diff --git a/modules/old_modules/Indices/R/drop_indices_dims.R b/modules/Indices/R/drop_indices_dims.R similarity index 100% rename from modules/old_modules/Indices/R/drop_indices_dims.R rename to modules/Indices/R/drop_indices_dims.R diff --git a/modules/old_modules/Indices/R/plot_deterministic_forecast.R b/modules/Indices/R/plot_deterministic_forecast.R similarity index 100% rename from modules/old_modules/Indices/R/plot_deterministic_forecast.R rename to modules/Indices/R/plot_deterministic_forecast.R diff --git a/modules/old_modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R similarity index 100% rename from modules/old_modules/Multimodel/Multimodel.R rename to modules/Multimodel/Multimodel.R diff --git a/modules/old_modules/Multimodel/Multimodel_skill.R b/modules/Multimodel/Multimodel_skill.R similarity index 100% rename from modules/old_modules/Multimodel/Multimodel_skill.R rename to modules/Multimodel/Multimodel_skill.R diff --git a/modules/old_modules/Multimodel/build_multimodel.R b/modules/Multimodel/build_multimodel.R similarity index 100% rename from modules/old_modules/Multimodel/build_multimodel.R rename to modules/Multimodel/build_multimodel.R diff --git a/modules/old_modules/Multimodel/clean_multimodel.R b/modules/Multimodel/clean_multimodel.R similarity index 100% rename from modules/old_modules/Multimodel/clean_multimodel.R rename to modules/Multimodel/clean_multimodel.R diff --git a/modules/old_modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R similarity index 100% rename from modules/old_modules/Multimodel/load_multimodel.R rename to modules/Multimodel/load_multimodel.R diff --git a/modules/old_modules/Multimodel/load_multimodel_mean.R b/modules/Multimodel/load_multimodel_mean.R similarity index 100% rename from modules/old_modules/Multimodel/load_multimodel_mean.R rename to modules/Multimodel/load_multimodel_mean.R diff --git a/modules/old_modules/Multimodel/load_multimodel_probs.R b/modules/Multimodel/load_multimodel_probs.R similarity index 100% rename from modules/old_modules/Multimodel/load_multimodel_probs.R rename to modules/Multimodel/load_multimodel_probs.R diff --git a/modules/old_modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R similarity index 100% rename from modules/old_modules/Statistics/Statistics.R rename to modules/Statistics/Statistics.R diff --git a/recipe_ecvs_ano_seas.yml b/recipe_ecvs_ano_seas.yml new file mode 100644 index 00000000..d41dd657 --- /dev/null +++ b/recipe_ecvs_ano_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'tas', freq: 'monthly_mean', units: 'C'} + #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1101' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 30 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas.yml b/recipe_ecvs_cal_seas.yml similarity index 93% rename from recipe_tas.yml rename to recipe_ecvs_cal_seas.yml index c6589a06..f94795e7 100644 --- a/recipe_tas.yml +++ b/recipe_ecvs_cal_seas.yml @@ -5,19 +5,17 @@ Description: Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - name: tas - freq: monthly_mean - units: C - flux: no + - {name: 'tas', freq: 'monthly_mean', units: 'C'} + #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} Datasets: System: - - {name: 'Meteo-France-System8'} - - {name: 'CMCC-SPS3.5'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} #- {name: 'UK-MetOffice-Glosea603'} ##- {name: 'NCEP-CFSv2'} - - {name: 'DWD-GCFS2.1'} - - {name: 'ECCC-GEM5.2-NEMO'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes diff --git a/recipe_subseasonal_ecvs.yml b/recipe_subseasonal_ecvs.yml new file mode 100644 index 00000000..3d7b0e64 --- /dev/null +++ b/recipe_subseasonal_ecvs.yml @@ -0,0 +1,139 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'weekly_mean', units: 'C'} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20241031 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20241031 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias enscorr rpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git/case_s2s/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl_ano_seas.yml b/recipe_tas_singl_ano_seas.yml new file mode 100644 index 00000000..3d2f6a9e --- /dev/null +++ b/recipe_tas_singl_ano_seas.yml @@ -0,0 +1,106 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + # name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0901' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + #- {name: Global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: all + Time_aggregation: + execute: yes + method: average + ini: [1,2,3,4] + end: [3,4,5,6] + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl.yml b/recipe_tas_singl_cal_seas.yml similarity index 96% rename from recipe_tas_singl.yml rename to recipe_tas_singl_cal_seas.yml index 695d7fd3..729bf1ec 100644 --- a/recipe_tas_singl.yml +++ b/recipe_tas_singl_cal_seas.yml @@ -29,17 +29,15 @@ Analysis: ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 40 #359.9 + - {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + # - {name: Global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: - compute: yes + compute: no cross_validation: no save: all Time_aggregation: diff --git a/sunset.sh b/sunset.sh index fcf712a0..786ce663 100644 --- a/sunset.sh +++ b/sunset.sh @@ -7,7 +7,7 @@ #SBATCH -e sunset_multimodel-%J.err #SBATCH --account=bsc32 #SBATCH --qos=gp_bsces -#SBATCH --constraint=highmem +#SBATCH --constraint=lowmem #### --qos=acc_bsces @@ -16,5 +16,7 @@ conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 #Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml -Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R diff --git a/sunsetv2.sh b/sunsetv2.sh index 80f91312..ef5a9bd6 100644 --- a/sunsetv2.sh +++ b/sunsetv2.sh @@ -15,7 +15,9 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_tasv2.yml +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_tasv2.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_seas.yml #Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From 15c98ae647c1a9f3019d392ea87f568efba2f3f6 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Tue, 17 Dec 2024 10:59:25 +0100 Subject: [PATCH 55/78] merging changes --- amdsunset.sh | 20 + conf/archive_decadal.yml | 797 ++++++++---------- conf/archive_reference.yml | 220 +++++ conf/archive_seasonal.yml | 326 +++++++ full_ecvs_multimodel_calibrated.R | 7 + modules/Loading/R/load_subseasonal.R | 9 +- .../R/plot_most_likely_terciles_map.R | 2 + recipe_ecvs_cal_mul_seas.yml | 2 +- sunset.sh | 2 +- 9 files changed, 928 insertions(+), 457 deletions(-) create mode 100644 amdsunset.sh create mode 100644 conf/archive_reference.yml create mode 100644 conf/archive_seasonal.yml diff --git a/amdsunset.sh b/amdsunset.sh new file mode 100644 index 00000000..c7ba0628 --- /dev/null +++ b/amdsunset.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH -n 50 +#SBATCH -N 1 +#SBATCH -t 4:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_mul_seas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R + diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 12ade341..bc25c717 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -1,5 +1,5 @@ gpfs: - src: "/gpfs/projects/bsc32/esarchive_cache/" + src_sys: "/gpfs/projects/bsc32/esarchive_cache/" System: # ---- EC-Earth3-i4: @@ -8,6 +8,7 @@ gpfs: src: hcst: "exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/" fcst: + startR: "exp/CMIP6/$dcpp$/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/$dcpp$/" first_dcppB_syear: 2021 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "clt":"Amon", "hfls":"Amon", @@ -33,489 +34,383 @@ gpfs: member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 initial_month: 11 sdate_add: 0 - reference_grid: "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" - - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/era5/" - monthly_mean: {"tas":"_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} - calendar: "standard" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" - + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" esarchive: - src: "/esarchive/" - System: + src_sys: "/esarchive/" + System: # ---- - EC-Earth3-i1: - name: "EC-Earth3-i1" - institution: "EC-Earth-Consortium" - src: - hcst: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" - fcst: - startR: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" - monthly_mean: - #NOTE: tos is under both Amon and Omon --> wait to be changed - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":["Amon", "Omon"]} - grid: {"tas":"gr"} - version: {"tas":"v20190713"} - daily_mean: - grid: {"clt":"gr", "hurs":"gr", "hursmin":"gr", "pr":"gr", "psl":"gr", "rsds":"gr", "sfcWind":"gr", "sfcWindmax":"gr", "tas":"gr", "tasmax":"gr", "tasmin":"gr"} - version: {"clt":"v20190713", "hurs":"v20190713", "hursmin":"v20190713", "pr":"v20190713", "psl":"v20190713", "rsds":"v20190713", "sfcWind":"v20190713", "sfcWindmax":"v20190713", "tas":"v20190713", "tasmax":"v20190713", "tasmin":"v20190713"} - calendar: "proleptic_gregorian" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20190713/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-196110.nc" #'r512x256' + EC-Earth3-i1: + name: "EC-Earth3-i1" + institution: "EC-Earth-Consortium" + src: + hcst: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" + fcst: + startR: "exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" + monthly_mean: + #NOTE: tos is under both Amon and Omon --> wait to be changed + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":["Amon", "Omon"]} + grid: {"tas":"gr"} + version: {"tas":"v20190713"} + daily_mean: + grid: {"clt":"gr", "hurs":"gr", "hursmin":"gr", "pr":"gr", "psl":"gr", + "rsds":"gr", "sfcWind":"gr", "sfcWindmax":"gr", "tas":"gr", + "tasmax":"gr", "tasmin":"gr"} + version: {"clt":"v20190713", "hurs":"v20190713", "hursmin":"v20190713", + "pr":"v20190713", "psl":"v20190713", "rsds":"v20190713", + "sfcWind":"v20190713", "sfcWindmax":"v20190713", + "tas":"v20190713", "tasmax":"v20190713", "tasmin":"v20190713"} + calendar: "proleptic_gregorian" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20190713/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-196110.nc" #'r512x256' # ---- - #NOTE: EC-Earth3-i2 the first file of each sdate has 2 time step only (Nov-Dec). - # The rest files are Jan to Dec. - EC-Earth3-i2: - name: "EC-Earth3-i2" - institution: "EC-Earth-Consortium" - src: - hcst: "exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" - monthly_mean: - table: {"tas":"Amon"} - grid: {"tas":"gr"} - version: {"tas":"v20200730"} - daily_mean: - grid: {"pr":"gr", "tas":"gr", "tasmax":"gr", "tasmin":"gr"} - version: {"pr":"v20200508", "tas":"v20200731", "tasmax":"v20200730", "tasmin":"v20200730"} - calendar: "proleptic_gregorian" - #NOTE:There are many members but not all of them are available on ESGF (only r6-10 available). Then, we might have some variables for the rest of the members (r1-5 and r11-15), but not for all the variables. That's why i'm only using r6-10 - member: r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r6i2p1f1/Amon/tas/gr/v20200730/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r6i2p1f1_gr_196011-196012.nc" #'r512x256' + #NOTE: EC-Earth3-i2 the first file of each sdate has 2 time step only (Nov-Dec). + # The rest files are Jan to Dec. + EC-Earth3-i2: + name: "EC-Earth3-i2" + institution: "EC-Earth-Consortium" + src: + hcst: "exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" + monthly_mean: + table: {"tas":"Amon"} + grid: {"tas":"gr"} + version: {"tas":"v20200730"} + daily_mean: + grid: {"pr":"gr", "tas":"gr", "tasmax":"gr", "tasmin":"gr"} + version: {"pr":"v20200508", "tas":"v20200731", "tasmax":"v20200730", "tasmin":"v20200730"} + calendar: "proleptic_gregorian" + #NOTE:There are many members but not all of them are available on ESGF (only r6-10 available). Then, we might have some variables for the rest of the members (r1-5 and r11-15), but not for all the variables. That's why i'm only using r6-10 + member: r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r6i2p1f1/Amon/tas/gr/v20200730/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r6i2p1f1_gr_196011-196012.nc" #'r512x256' # ---- - EC-Earth3-i4: - name: "EC-Earth3-i4" - institution: "EC-Earth-Consortium" - src: - hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" - fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" - startR: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" - first_dcppB_syear: 2021 -# hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" -# fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" -# src: {"1960:2020": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/", -# "2021:2021": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"} - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "clt":"Amon", "hfls":"Amon", - "hurs":"Amon", "huss":"Amon", "rsds":"Amon", "rsut":"Amon", "ta":"Amon", - "tasmax":"Amon", "tosa":"Amon", "ua":"Amon", "va":"Amon", "zg":"Amon", - "evspsbl":"Amon", "hfss":"Amon", "hursmin":"Amon", "rlut":"Amon", - "rsdt":"Amon", "sfcWind":"Amon", "tasmin":"Amon", "ts":"Amon", "uas":"Amon", - "vas":"Amon"} - grid: {"tas":"gr", "pr":"gr", "psl":"gr", "clt":"gr", "hfls":"gr", - "hurs":"gr", "huss":"gr", "rsds":"gr", "rsut":"gr", "ta":"gr", - "tasmax":"gr", "tosa":"gr", "ua":"gr", "va":"gr", "zg":"gr", - - "vas":"gr"} - version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910", "clt":"v20210910", - "hurs":"v20210910", "huss":"v20210910", "rsds":"v20210910", "rsut":"v20210910", "ta":"v20210910", - "tasmax":"v20210910", "tosa":"v20210910", "ua":"v20210910", "va":"v20210910", "zg":"v20210910", - "evspsbl":"v20210910", "hfss":"v20210910", "hursmin":"v20210910", "rlut":"v20210910", - "rsdt":"v20210910", "sfcWind":"v20210910", "tasmin":"v20210910", "ts":"v20210910", "uas":"v20210910", - "vas":"v20210910"} - daily_mean: - grid: {"tas":"gr", "pr":"gr", "psl":"gr"} - version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910"} - calendar: "proleptic_gregorian" - member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" + EC-Earth3-i4: + name: "EC-Earth3-i4" + institution: "EC-Earth-Consortium" + src: + hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" + fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" + startR: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/$dcpp$/" + first_dcppB_syear: 2021 + # hcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/" + # fcst: "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/" + # src: {"1960:2020": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/", + # "2021:2021": "exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppB-forecast/"} + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "clt":"Amon", "hfls":"Amon", + "hurs":"Amon", "huss":"Amon", "rsds":"Amon", "rsut":"Amon", "ta":"Amon", + "tasmax":"Amon", "tosa":"Amon", "ua":"Amon", "va":"Amon", "zg":"Amon", + "evspsbl":"Amon", "hfss":"Amon", "hursmin":"Amon", "rlut":"Amon", + "rsdt":"Amon", "sfcWind":"Amon", "tasmin":"Amon", "ts":"Amon", "uas":"Amon", + "vas":"Amon"} + grid: {"tas":"gr", "pr":"gr", "psl":"gr", "clt":"gr", "hfls":"gr", + "hurs":"gr", "huss":"gr", "rsds":"gr", "rsut":"gr", "ta":"gr", + "tasmax":"gr", "tosa":"gr", "ua":"gr", "va":"gr", "zg":"gr", + "vas":"gr"} + version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910", "clt":"v20210910", + "hurs":"v20210910", "huss":"v20210910", "rsds":"v20210910", "rsut":"v20210910", "ta":"v20210910", + "tasmax":"v20210910", "tosa":"v20210910", "ua":"v20210910", "va":"v20210910", "zg":"v20210910", + "evspsbl":"v20210910", "hfss":"v20210910", "hursmin":"v20210910", "rlut":"v20210910", + "rsdt":"v20210910", "sfcWind":"v20210910", "tasmin":"v20210910", "ts":"v20210910", "uas":"v20210910", + "vas":"v20210910"} + daily_mean: + grid: {"tas":"gr", "pr":"gr", "psl":"gr"} + version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910"} + calendar: "proleptic_gregorian" + member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" # ---- - HadGEM3-GC31-MM: - name: "HadGEM3-GC31-MM" - institution: "Met Office Hadley Centre" - src: - hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" - fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" - startR: "exp/CMIP6/$dcpp$/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/$dcpp$/" - first_dcppB_syear: 2019 - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} - grid: {"tas":"gn", "psl":"gr", "pr":"gn", "ts":"gr", "tos":"gr"} - #version depends on member and variable - version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200417", "ts":"v20200316", "tos":"v20200417"} - daily_mean: - grid: {"tasmin":"gn", "tasmax":"gn", "pr":"gn"} - version: {"tasmin":"v20200417", "tasmax":"v20200417", "pr":"v20200417"} - calendar: "360_day" - member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_196011-196012.nc" #'r432x324' + HadGEM3-GC31-MM: + name: "HadGEM3-GC31-MM" + institution: "Met Office Hadley Centre" + src: + hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" + fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" + startR: "exp/CMIP6/$dcpp$/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/$dcpp$/" + first_dcppB_syear: 2019 + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} + grid: {"tas":"gn", "psl":"gr", "pr":"gn", "ts":"gr", "tos":"gr"} + #version depends on member and variable + version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200417", "ts":"v20200316", "tos":"v20200417"} + daily_mean: + grid: {"tasmin":"gn", "tasmax":"gn", "pr":"gn"} + version: {"tasmin":"v20200417", "tasmax":"v20200417", "pr":"v20200417"} + calendar: "360_day" + member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_196011-196012.nc" #'r432x324' # ---- - BCC-CSM2-MR: - name: "BCC-CSM2-MR" - institution: "Beijing Climate Center, Beijing 100081, China" - src: - hcst: "exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn"} - #NOTE: It should be the lastest version with all the files inside - version: {"tas":"v20200101"} -# version: {"tas":[v20191126, v20191213, v20191219, v20200110, v20200114, v20200504, v20191209, v20191218, v20200101, v20200113, v20200401]} - daily_mean: - grid: {"pr":"gn", "sfcWind":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn", "uas":"gn", "vas":"gn"} - version: {"pr":"v20200114", "sfcWind":"v20200101", "tas":"v20200408", "tasmax":"v20200114", "tasmin":"v20200114", "uas":"v20200114", "vas":"v20200114"} - calendar: "365_day" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1 - initial_month: 1 - sdate_add: 1 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r8i1p1f1/Amon/tas/gn/v20200101/tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s2008-r8i1p1f1_gn_200801-201712.nc" + BCC-CSM2-MR: + name: "BCC-CSM2-MR" + institution: "Beijing Climate Center, Beijing 100081, China" + src: + hcst: "exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn"} + #NOTE: It should be the lastest version with all the files inside + version: {"tas":"v20200101"} +# version: {"tas":[v20191126, v20191213, v20191219, v20200110, v20200114, v20200504, v20191209, v20191218, v20200101, v20200113, v20200401]} + daily_mean: + grid: {"pr":"gn", "sfcWind":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn", "uas":"gn", "vas":"gn"} + version: {"pr":"v20200114", "sfcWind":"v20200101", "tas":"v20200408", "tasmax":"v20200114", "tasmin":"v20200114", "uas":"v20200114", "vas":"v20200114"} + calendar: "365_day" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1 + initial_month: 1 + sdate_add: 1 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r8i1p1f1/Amon/tas/gn/v20200101/tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s2008-r8i1p1f1_gn_200801-201712.nc" # ---- - CanESM5: - name: "CanESM5" - institution: - src: - hcst: "exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" - fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" - startR: "exp/canesm5/cmip6-$dcpp$/original_files/cmorfiles/DCPP/CCCma/CanESM5/$dcpp$/" - first_dcppB_syear: 2020 - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon", "tos":"Omon"} - - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn", "tos":"gr"} - version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429", "tos":"v20190429"} - daily_mean: - grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20190429", "tas":"v20190429", "tasmax":"v20190429", "tasmin":"v20190429"} - calendar: "365_day" - member: r1i1p2f1,r2i1p2f1,r3i1p2f1,r4i1p2f1,r5i1p2f1,r6i1p2f1,r7i1p2f1,r8i1p2f1, r9i1p2f1, r10i1p2f1, r11i1p2f1,r12i1p2f1,r13i1p2f1,r14i1p2f1,r15i1p2f1,r16i1p2f1,r17i1p2f1,r18i1p2f1, r19i1p2f1, r20i1p2f1,r21i1p2f1,r22i1p2f1,r23i1p2f1,r24i1p2f1,r25i1p2f1,r26i1p2f1,r27i1p2f1,r28i1p2f1, r29i1p2f1, r30i1p2f1, r31i1p2f1,r32i1p2f1,r33i1p2f1,r34i1p2f1,r35i1p2f1,r36i1p2f1,r37i1p2f1,r38i1p2f1, r39i1p2f1, r40i1p2f1 - initial_month: 1 #next year Jan - sdate_add: 0 - reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" + CanESM5: + name: "CanESM5" + institution: + src: + hcst: "exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/" + fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" + startR: "exp/canesm5/cmip6-$dcpp$/original_files/cmorfiles/DCPP/CCCma/CanESM5/$dcpp$/" + first_dcppB_syear: 2020 + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon", "tos":"Omon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn", "tos":"gr"} + version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429", "tos":"v20190429"} + daily_mean: + grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} + version: {"pr":"v20190429", "tas":"v20190429", "tasmax":"v20190429", "tasmin":"v20190429"} + calendar: "365_day" + member: r1i1p2f1,r2i1p2f1,r3i1p2f1,r4i1p2f1,r5i1p2f1,r6i1p2f1,r7i1p2f1,r8i1p2f1, r9i1p2f1, r10i1p2f1, r11i1p2f1,r12i1p2f1,r13i1p2f1,r14i1p2f1,r15i1p2f1,r16i1p2f1,r17i1p2f1,r18i1p2f1, r19i1p2f1, r20i1p2f1,r21i1p2f1,r22i1p2f1,r23i1p2f1,r24i1p2f1,r25i1p2f1,r26i1p2f1,r27i1p2f1,r28i1p2f1, r29i1p2f1, r30i1p2f1, r31i1p2f1,r32i1p2f1,r33i1p2f1,r34i1p2f1,r35i1p2f1,r36i1p2f1,r37i1p2f1,r38i1p2f1, r39i1p2f1, r40i1p2f1 + initial_month: 1 #next year Jan + sdate_add: 0 + reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" # ---- #NOTE: no data there - CESM1-1-CAM5-CMIP5: - name: "CESM1-1-CAM5-CMIP5" - institution: "National Center for Atmospheric Research" - src: - hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" - fcst: - startR: "exp/ncar/cesm-dple-$dcpp$/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon"} - grid: {"tas":"gn", "pr":"gn"} - version: {"tas":"v20200101", "pr":"v20200101"} -#Prepared - daily_mean: - grid: - version: - calendar: "365_day" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1, r9i1p1f1, r10i1p1f1, r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1,r17i1p1f1,r18i1p1f1, r19i1p1f1, r20i1p1f1,r21i1p1f1,r22i1p1f1,r23i1p1f1,r24i1p1f1,r25i1p1f1,r26i1p1f1,r27i1p1f1,r28i1p1f1, r29i1p1f1, r30i1p1f1, r31i1p1f1,r32i1p1f1,r33i1p1f1,r34i1p1f1,r35i1p1f1,r36i1p1f1,r37i1p1f1,r38i1p1f1, r39i1p1f1, r40i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" + CESM1-1-CAM5-CMIP5: + name: "CESM1-1-CAM5-CMIP5" + institution: "National Center for Atmospheric Research" + src: + hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" + fcst: + startR: "exp/ncar/cesm-dple-$dcpp$/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon"} + grid: {"tas":"gn", "pr":"gn"} + version: {"tas":"v20200101", "pr":"v20200101"} +#Pre pared + daily_mean: + grid: + version: + calendar: "365_day" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1, r9i1p1f1, r10i1p1f1, r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1,r17i1p1f1,r18i1p1f1, r19i1p1f1, r20i1p1f1,r21i1p1f1,r22i1p1f1,r23i1p1f1,r24i1p1f1,r25i1p1f1,r26i1p1f1,r27i1p1f1,r28i1p1f1, r29i1p1f1, r30i1p1f1, r31i1p1f1,r32i1p1f1,r33i1p1f1,r34i1p1f1,r35i1p1f1,r36i1p1f1,r37i1p1f1,r38i1p1f1, r39i1p1f1, r40i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- #NOTE: in tapes - CMCC-CM2-SR5: - name: "CMCC-CM2-SR5" - institution: "Euro-Mediterranean Center on Climate Change" - src: - hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" - fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" - startR: "exp/CMIP6/$dcpp$/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/$dcpp$/" - first_dcppB_syear: 2020 - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "prc":"Amon", "ts":"Amon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "prc":"gn", "ts":"gn"} - version: {"tas":"v20210312", "pr":"v20210312", "psl":"v20210312", "prc":"v20200101", "ts":"v20200101"} - daily_mean: - grid: {"pr":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20210909", "tasmax":"v20210909", "tasmin":"v20210909"} - calendar: "365_day" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20210312/tas_Amon_CMCC-CM2-SR5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" - -# ---- - FGOALS-f3-L: - name: "FGOALS-f3-L" - institution: "Chinese Academy of Sciences, Beijing 100029, China" - src: - hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/" - fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/" - startR: "exp/CMIP6/$dcpp$/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/$dcpp$/" - first_dcppB_syear: 2017 - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":"Omon"} - grid: {"tas":"gr", "pr":"gr", "psl":"gr","tos":"gn"} - version: {"tas":"v20220212", "pr":"v20220212", "psl":"v20220212", "tos":"v20220214"} -#Prepared - daily_mean: - grid: - version: - calendar: "365_day" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20220212/tas_Amon_FGOALS-f3-L_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-197012.nc" - -# ---- - IPSL-CM6A-LR: - name: "IPSL-CM6A-LR" - institution: "Institut Pierre-Simon Laplace" - src: - hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "sfcWind":"Amon"} - grid: {"tas":"gr", "pr":"gr", "psl":"gr", "sfcWind":"gr"} - version: {"tas":"v20200504", "pr":"v20200504", "psl":"v20200504", "sfcWind":"v20200504"} - daily_mean: - grid: {"pr":"gr", "tas":"gr", "tasmax":"gr", "tasmin":"gr"} - version: {"pr":"v20200108", "tas":"v20200504", "tasmax":"v20200504", "tasmin":"v20200504"} - calendar: "gregorian" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 - initial_month: 1 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20200504/tas_Amon_IPSL-CM6A-LR_dcppA-hindcast_s2008-r1i1p1f1_gr_200901-201812.nc" - -# ---- - MIROC6: - name: "MIROC6" - institution: "Model for Interdisciplinary Research on Climate" - src: - hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" - fcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" - startR: "exp/CMIP6/$dcpp$/MIROC6/DCPP/MIROC/MIROC6/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} - version: {"tas":"v20200417", "pr":"v20200504", "psl":"v20200504", "tasmin":"v20200417", "tasmax":"v20200504"} - daily_mean: - grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20191217", "tas":"v20200416", "tasmax":"v20200416", "tasmin":"v20200416"} - calendar: "standard" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200417/tas_Amon_MIROC6_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" - -# ---- - MPI-ESM1.2-HR: - name: "MPI-ESM1.2-HR" - institution: "Max-Planck-Institute for Meteorology" - src: - hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} - version: {"tas":"v20200320", "pr":"v20200320", "psl":"v20200320", "sfcWind":"v20200320"} - daily_mean: - grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20210128", "tas":"v20200320", "tasmax":"v20200101", "tasmin":"v20200101"} - calendar: "standard" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200320/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" - -# ---- - MPI-ESM1.2-LR: - name: "MPI-ESM1.2-LR" - institution: "Max-Planck-Institute for Meteorology" - src: - hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "ts":"gn"} - version: {"tas":"v20200101", "pr":"v20200101", "psl":"v20200101", "ts":"v20200101"} - daily_mean: - grid: - version: - calendar: "proleptic_gregorian" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1,r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MPI-ESM1-2-LR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" - -# ---- - MRI-ESM2-0: - name: "MRI-ESM2-0" - institution: "Meteorological Research Institute, Japan" - src: - hcst: "exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn"} - version: {"tas":"v20200101", "pr":"v20200101", "psl":"v20200101"} - daily_mean: - grid: {"pr":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20210122", "tasmax":"v20210122", "tasmin":"v20210122"} - calendar: "proleptic_gregorian" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 - initial_month: 11 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MRI-ESM2-0_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201312.nc" + CMCC-CM2-SR5: + name: "CMCC-CM2-SR5" + institution: "Euro-Mediterranean Center on Climate Change" + src: + hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" + fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" + startR: "exp/CMIP6/$dcpp$/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/$dcpp$/" + first_dcppB_syear: 2020 + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "prc":"Amon", "ts":"Amon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "prc":"gn", "ts":"gn"} + version: {"tas":"v20210312", "pr":"v20210312", "psl":"v20210312", "prc":"v20200101", "ts":"v20200101"} + daily_mean: + grid: {"pr":"gn", "tasmax":"gn", "tasmin":"gn"} + version: {"pr":"v20210909", "tasmax":"v20210909", "tasmin":"v20210909"} + calendar: "365_day" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20210312/tas_Amon_CMCC-CM2-SR5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- - #NOTE: NorCPM1-i1 and i2 are under the same directory - NorCPM1-i1: - name: "NorCPM1-i1" - institution: "NorESM Climate modeling Consortium" - src: - hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" - monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn"} - version: {"tas":"v20200320", "pr":"v20200320", "psl":"v20200320"} - daily_mean: - grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20191005", "tas":"v20191029", "tasmax":"v20191005", "tasmin":"v20191005"} - calendar: "noleap" - member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 - initial_month: 10 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20191029/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" + FGOALS-f3-L: + name: "FGOALS-f3-L" + institution: "Chinese Academy of Sciences, Beijing 100029, China" + src: + hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/" + fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/" + startR: "exp/CMIP6/$dcpp$/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/$dcpp$/" + first_dcppB_syear: 2017 + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":"Omon"} + grid: {"tas":"gr", "pr":"gr", "psl":"gr","tos":"gn"} + version: {"tas":"v20220212", "pr":"v20220212", "psl":"v20220212", "tos":"v20220214"} + # Prepared + daily_mean: + grid: + version: + calendar: "365_day" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20220212/tas_Amon_FGOALS-f3-L_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-197012.nc" # ---- - NorCPM1-i2: - name: "NorCPM1-i2" - institution: "NorESM Climate modeling Consortium" - src: - hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" - fcst: - startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" - monthly_mean: - table: {"pr":"Amon", "psl":"Amon"} - grid: {"pr":"gn", "psl":"gn"} - version: {"pr":"v20200101", "psl":"v20200101"} - daily_mean: - grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} - version: {"pr":"v20191005", "tas":"v20200101", "tasmax":"v20191005", "tasmin":"v20191005"} - calendar: "noleap" - member: r1i2p1f1,r2i2p1f1,r3i2p1f1,r4i2p1f1,r5i2p1f1,r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 - initial_month: 10 - sdate_add: 0 - reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i2p1f1/Amon/pr/gn/v20200101/pr_Amon_NorCPM1_dcppA-hindcast_s2008-r1i2p1f1_gn_200810-201812.nc" - - -# ===================================== + IPSL-CM6A-LR: + name: "IPSL-CM6A-LR" + institution: "Institut Pierre-Simon Laplace" + src: + hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "sfcWind":"Amon"} + grid: {"tas":"gr", "pr":"gr", "psl":"gr", "sfcWind":"gr"} + version: {"tas":"v20200504", "pr":"v20200504", "psl":"v20200504", "sfcWind":"v20200504"} + daily_mean: + grid: {"pr":"gr", "tas":"gr", "tasmax":"gr", "tasmin":"gr"} + version: {"pr":"v20200108", "tas":"v20200504", "tasmax":"v20200504", "tasmin":"v20200504"} + calendar: "gregorian" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 + initial_month: 1 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20200504/tas_Amon_IPSL-CM6A-LR_dcppA-hindcast_s2008-r1i1p1f1_gr_200901-201812.nc" - Reference: - GHCNv4: - name: - institution: - src: "obs/noaa/ghcn_v4/" - monthly_mean: {"tas":"", "tasanomaly":""} - daily_mean: - calendar: "gregorian" - reference_grid: "/esarchive/obs/noaa/ghcn_v4/monthly_mean/tasanomaly/tasanomaly_201811.nc" # ---- - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/" - daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", - "rsds":"daily_mean/rsds_f1h-r1440x721cds/", - "prlr":"daily_mean/prlr_f1h-r1440x721cds/", - "g300":"daily_mean/g300_f1h-r1440x721cds/", - "g500":"daily_mean/g500_f1h-r1440x721cds/", - "g850":"daily_mean/g850_f1h-r1440x721cds/", - "sfcWind":"daily_mean/sfcWind_f1h-r1440x721cds/", - "tasmax":"daily/tasmax-r1440x721cds/", - "tasmin":"daily/tasmin-r1440x721cds/", - "ta300":"daily_mean/ta300_f1h-r1440x721cds/", - "ta500":"daily_mean/ta500_f1h-r1440x721cds/", - "ta850":"daily_mean/ta850_f1h-r1440x721cds/", - "hurs":"daily_mean/hurs_f1h-r1440x721cds/"} - monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "rsds":"monthly_mean/rsds_f1h-r1440x721cds/", - "g300":"monthly_mean/g300_f1h-r1440x721cds/", - "g500":"monthly_mean/g500_f1h-r1440x721cds/", - "g850":"monthly_mean/g850_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", - "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", - "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", - "ta300":"montly_mean/ta300_f1h-r1440x721cds/", - "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", - "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", - "tos":"monthly_mean/tos_f1h-r1440x721cds/", - "sic":"monthly_mean/sic_f1h-r1440x721cds/"} - calendar: "gregorian" - reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + MIROC6: + name: "MIROC6" + institution: "Model for Interdisciplinary Research on Climate" + src: + hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" + fcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" + startR: "exp/CMIP6/$dcpp$/MIROC6/DCPP/MIROC/MIROC6/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} + version: {"tas":"v20200417", "pr":"v20200504", "psl":"v20200504", "tasmin":"v20200417", "tasmax":"v20200504"} + daily_mean: + grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} + version: {"pr":"v20191217", "tas":"v20200416", "tasmax":"v20200416", "tasmin":"v20200416"} + calendar: "standard" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200417/tas_Amon_MIROC6_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- -# missing info in script - NCEP1: + MPI-ESM1.2-HR: + name: "MPI-ESM1.2-HR" + institution: "Max-Planck-Institute for Meteorology" + src: + hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} + version: {"tas":"v20200320", "pr":"v20200320", "psl":"v20200320", "sfcWind":"v20200320"} + daily_mean: + grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} + version: {"pr":"v20210128", "tas":"v20200320", "tasmax":"v20200101", "tasmin":"v20200101"} + calendar: "standard" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200320/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- - JRA-55: - name: "JRA-55" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/jma/jra55/" - monthly_mean: {"tas":"monthly_mean/tas_f6h", "psl":"monthly_mean/psl_f6h", - "tos":"", "pr":"monthly_mean/pr_s0-3h", - "prlr":"monthly_mean/prlr_s0-3h"} - daily_mean: {"tas":"daily_mean/tas_f6h", "psl":"daily_mean/psl_f6h", - "prlr":"daily_mean/prlr_s0-3h", "sfcWind":"daily_mean/sfcWind_f6h"} - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/recon/jma/jra55/monthly_mean/tas_f6h/tas_200811.nc" + MPI-ESM1.2-LR: + name: "MPI-ESM1.2-LR" + institution: "Max-Planck-Institute for Meteorology" + src: + hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "ts":"gn"} + version: {"tas":"v20200101", "pr":"v20200101", "psl":"v20200101", "ts":"v20200101"} + daily_mean: + grid: + version: + calendar: "proleptic_gregorian" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1,r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MPI-ESM1-2-LR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- - GISTEMPv4: - name: "GISTEMPv4" - institution: "NASA Goddard Institute for Space Studies" - src: "obs/noaa-nasa/ghcnersstgiss/" - monthly_mean: {"tasanomaly":""} - daily_mean: - calendar: "standard" - reference_grid: "/esarchive/obs/noaa-nasa/ghcnersstgiss/monthly_mean/tasanomaly/tasanomaly_200811.nc" + MRI-ESM2-0: + name: "MRI-ESM2-0" + institution: "Meteorological Research Institute, Japan" + src: + hcst: "exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn"} + version: {"tas":"v20200101", "pr":"v20200101", "psl":"v20200101"} + daily_mean: + grid: {"pr":"gn", "tasmax":"gn", "tasmin":"gn"} + version: {"pr":"v20210122", "tasmax":"v20210122", "tasmin":"v20210122"} + calendar: "proleptic_gregorian" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MRI-ESM2-0_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201312.nc" # ---- - HadCRUT4: - name: "HadCRUT4" - institution: "Met Office Hadley Centre / Climatic Research Unit, University of East Anglia" - src: "obs/ukmo/hadcrut_v4.6/" - monthly_mean: {"tasanomaly":""} - daily_mean: - calendar: "standard" - reference_grid: "/esarchive/obs/ukmo/hadcrut_v4.6/monthly_mean/tasanomaly/tasanomaly_200811.nc" + #NOTE: NorCPM1-i1 and i2 are under the same directory + NorCPM1-i1: + name: "NorCPM1-i1" + institution: "NorESM Climate modeling Consortium" + src: + hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn"} + version: {"tas":"v20200320", "pr":"v20200320", "psl":"v20200320"} + daily_mean: + grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} + version: {"pr":"v20191005", "tas":"v20191029", "tasmax":"v20191005", "tasmin":"v20191005"} + calendar: "noleap" + member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 + initial_month: 10 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20191029/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" # ---- - HadSLP2: - name: "HadSLP2" - institution: - src: "obs/ukmo/hadslp_v2/" - monthly_mean: {"psl":""} - daily_mean: - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/obs/ukmo/hadslp_v2/monthly_mean/psl/psl_200811.nc" + NorCPM1-i2: + name: "NorCPM1-i2" + institution: "NorESM Climate modeling Consortium" + src: + hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" + fcst: + startR: "exp/CMIP6/$dcpp$/NorCPM1/DCPP/NCC/NorCPM1/$dcpp$/" + monthly_mean: + table: {"pr":"Amon", "psl":"Amon"} + grid: {"pr":"gn", "psl":"gn"} + version: {"pr":"v20200101", "psl":"v20200101"} + daily_mean: + grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} + version: {"pr":"v20191005", "tas":"v20200101", "tasmax":"v20191005", "tasmin":"v20191005"} + calendar: "noleap" + member: r1i2p1f1,r2i2p1f1,r3i2p1f1,r4i2p1f1,r5i2p1f1,r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 + initial_month: 10 + sdate_add: 0 + reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i2p1f1/Amon/pr/gn/v20200101/pr_Amon_NorCPM1_dcppA-hindcast_s2008-r1i2p1f1_gn_200810-201812.nc" diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml new file mode 100644 index 00000000..62267d07 --- /dev/null +++ b/conf/archive_reference.yml @@ -0,0 +1,220 @@ +gpfs: + src_ref: "/gpfs/projects/bsc32/esarchive_cache/" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + +esarchive: + src_ref: "/esarchive/" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/"} + daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", + "rsds":"daily_mean/rsds_f1h-r1440x721cds/", + "prlr":"daily_mean/prlr_f1h-r1440x721cds/", + "g300":"daily_mean/g300_f1h-r1440x721cds/", + "g500":"daily_mean/g500_f1h-r1440x721cds/", + "g850":"daily_mean/g850_f1h-r1440x721cds/", + "sfcWind":"daily_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"daily/tasmax-r1440x721cds/", + "tasmin":"daily/tasmin-r1440x721cds/", + "ta300":"daily_mean/ta300_f1h-r1440x721cds/", + "ta500":"daily_mean/ta500_f1h-r1440x721cds/", + "ta850":"daily_mean/ta850_f1h-r1440x721cds/", + "hurs":"daily_mean/hurs_f1h-r1440x721cds/"} + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "rsds":"monthly_mean/rsds_f1h-r1440x721cds/", + "g300":"monthly_mean/g300_f1h-r1440x721cds/", + "g500":"monthly_mean/g500_f1h-r1440x721cds/", + "g850":"monthly_mean/g850_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", + "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", + "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", + "ta300":"montly_mean/ta300_f1h-r1440x721cds/", + "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", + "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", + "tos":"monthly_mean/tos_f1h-r1440x721cds/", + "sic":"monthly_mean/sic_f1h-r1440x721cds/"} + calendar: "gregorian" + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + ERA5-Land: + name: "ERA5-Land" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5land/" + daily_mean: {"tas":"daily_mean/tas_f1h/", "rsds":"daily_mean/rsds_f1h/", + "prlr":"daily_mean/prlr_f1h/", "sfcWind":"daily_mean/sfcWind_f1h/", + "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} + monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", + "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", + "tdps":"monthly_mean/tdps_f1h/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" + UERRA: + name: "ECMWF UERRA" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/uerra_mescan/" + daily_mean: {"tas":"daily_mean/tas_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" + CERRA: + name: "ECMWF CERRA" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/cerra/" + daily_mean: {"hurs":"daily_mean/hurs_f3h-r2631x1113/", "ps":"daily_mean/ps_f3h-r2631x1113/", + "sfcWind":"daily_mean/sfcWind_f3h-r2631x1113/", + "tas":"daily_mean/tas_f3h-r2631x1113/", "winddir":"daily_mean/tas_f3h-r2631x1113/"} + monthly_mean: {"hurs":"monthly_mean/hurs_f3h-r2631x1113/", "ps":"monthly_mean/ps_f3h-r2631x1113/", + "sfcWind":"monthly_mean/sfcWind_f3h-r2631x1113/", + "tas":"monthly_mean/tas_f3h-r2631x1113/", + "winddir":"monthly_mean/winddir_f3h-r2631x1113/", + "tasmin":"monthly_mean/tasmin_f24h-r2631x1113/", + "tasmax":"monthly_mean/tasmax_f24h-r2631x1113/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" + CERRA-Land: + name: "ECMWF CERRA-Land" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/cerraland/" + daily_mean: {"prlr":"daily_mean/prlr_f6h-r2631x1113/"} + monthly_mean: {"prlr":"monthly_mean/prlr_f6h-r2631x1113/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/cerraland/monthly_mean/prlr_f6h-r2631x1113/prlr_200412.nc" + HadCRUT5: + name: "HadCRUT5" + institution: "Met Office" + src: "obs/ukmo/hadcrut_v5.0_analysis/" + monthly_mean: {"tasanomaly":"monthly_mean/tasanomaly/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/ukmo/hadcrut_v5.0_analysis/monthly_mean/tasanomaly/tasanomaly_202001.nc" + BEST: + name: "BEST" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "obs/berkeleyearth/berkeleyearth/" + daily_mean: {"tas":"daily_mean/tas/"} + monthly_mean: {"tas":"monthly_mean/tas/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/berkeleyearth/berkeleyearth/monthly_mean/tas/tas_201805.nc" + GHCNv4: + name: + institution: + src: "obs/noaa/ghcn_v4/" + monthly_mean: {"tas":"", "tasanomaly":""} + daily_mean: + calendar: "gregorian" + reference_grid: "/esarchive/obs/noaa/ghcn_v4/monthly_mean/tasanomaly/tasanomaly_201811.nc" + +# ---- +# missing info in script + NCEP1: + +# ---- + JRA-55: + name: "JRA-55" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/jma/jra55/" + monthly_mean: {"tas":"monthly_mean/tas_f6h", "psl":"monthly_mean/psl_f6h", + "tos":"", "pr":"monthly_mean/pr_s0-3h", + "prlr":"monthly_mean/prlr_s0-3h"} + daily_mean: {"tas":"daily_mean/tas_f6h", "psl":"daily_mean/psl_f6h", + "prlr":"daily_mean/prlr_s0-3h", "sfcWind":"daily_mean/sfcWind_f6h"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/jma/jra55/monthly_mean/tas_f6h/tas_200811.nc" + +# ---- + GISTEMPv4: + name: "GISTEMPv4" + institution: "NASA Goddard Institute for Space Studies" + src: "obs/noaa-nasa/ghcnersstgiss/" + monthly_mean: {"tasanomaly":""} + daily_mean: + calendar: "gregorian" + reference_grid: "/esarchive/obs/noaa-nasa/ghcnersstgiss/monthly_mean/tasanomaly/tasanomaly_200811.nc" + +# ---- + HadCRUT4: + name: "HadCRUT4" + institution: "Met Office Hadley Centre / Climatic Research Unit, University of East Anglia" + src: "obs/ukmo/hadcrut_v4.6/" + monthly_mean: {"tasanomaly":""} + daily_mean: + calendar: "gregorian" + reference_grid: "/esarchive/obs/ukmo/hadcrut_v4.6/monthly_mean/tasanomaly/tasanomaly_200811.nc" + +# ---- + HadSLP2: + name: "HadSLP2" + institution: + src: "obs/ukmo/hadslp_v2/" + monthly_mean: {"psl":""} + daily_mean: + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/ukmo/hadslp_v2/monthly_mean/psl/psl_200811.nc" + +# ------------------------- +mars: + src_ref: "/esarchive/scratch/aho/tmp/GRIB/" #"/mars/" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "GRIB_era5_tas/" + monthly_mean: {"tas":""} + calendar: "gregorian" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" + + +sample: + src_ref: + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "GRIB_era5_tas/" + monthly_mean: {"tas":"", "prlr":""} + calendar: "gregorian" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" + + +IFCA: + src_ref: "/home/jovyan/IMPETUS4CHANGE/data/BSC/" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "ERA5/" + daily_mean: {"psl":"psl_f1h-r1440x721cds/"} + calendar: "gregorian" + reference_grid: "/home/jovyan/IMPETUS4CHANGE/data/BSC/ERA5/daily_mean/psl_f1h-r1440x721cds/psl_201805.nc" + CERRA: + name: "ECMWF CERRA" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "CERRA/" + daily_mean: {"hurs":"daily_mean/hurs_f3h-r2631x1113/", "prlr":"daily_mean/prlr_f6h-r2631x1113/", + "sfcWind":"daily_mean/sfcWind_f3h-r2631x1113/", "tas":"daily_mean/tas_f3h-r2631x1113/", + "tasmax":"daily_mean/tasmax-r2631x1113/", "tasmin":"daily_mean/tasmin-r2631x1113/"} + monthly_mean: {"hurs":"monthly_mean/hurs_f3h-r2631x1113/", "prlr":"monthly_mean/prlr_f6h-r2631x1113/", + "sfcWind":"monthly_mean/_f3h-r2631x1113/", "tas":"monthly_mean/tas_f3h-r2631x1113/", + "tasmin":"monthly_mean/tasmin_f24h-r2631x1113/","tasmax":"monthly_mean/tasmax_f24h-r2631x1113/"} + calendar: "proleptic_gregorian" + reference_grid: "/home/jovyan/IMPETUS4CHANGE/data/BSC/CERRA/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" + + + diff --git a/conf/archive_seasonal.yml b/conf/archive_seasonal.yml new file mode 100644 index 00000000..52bb62bc --- /dev/null +++ b/conf/archive_seasonal.yml @@ -0,0 +1,326 @@ +## Structure description: +# filesystem: +# src: # main folders +# System_name: +# name: name of the system +# institution: +# src: system folder +# monthly_mean: {"variable_name1":"monthly_mean_variable1_path", +# "variable_name2":"monthly_mean_variable2_path"} +# daily_mean: {"variable_name1":"daily_mean_variable1_path"} +# nmember: +# fcst: # number of forecast members +# hcst: # number of hindcast members +# calendar: # calendar type +# reference_grid: .nc or .txt file with grid information +# land_sea_mask: path to the land-sea mask file +# time_stamp_lag: +1 when the time stamps in the file point to the month +# *after* the real date. 0 when the time stamps are correct. + +gpfs: + src_sys: "/gpfs/projects/bsc32/esarchive_cache/" + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_system51c3s.txt" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 50 + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind": "monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 124 + hcst: 24 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + DWD-GCFS2.1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/dwd/system21c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/", + "tasmin":"monthly_mean/tasmin_s0-24h/", + "tasmax":"monthly_mean/tasmax_s0-24h/" } + nmember: + fcst: 50 + hcst: 30 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system21_m1.txt" + ECCC-CanCM4i: + name: "ECCC CanCM4i (v3)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc3/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + +######################################################################### +esarchive: + src_sys: "/esarchive/" + System: + ECMWF-SEAS5: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system5c3s/" + daily_mean: {"tas":"daily_mean/tas_f6h/", "rsds":"daily/rsds_s0-24h/", + "prlr":"daily/prlr_s0-24h/", "tasmin":"daily/tasmin/", + "tasmax":"daily/tasmax/", "sfcWind":"daily_mean/sfcWind_f6h/", + "ta300":"daily_mean/ta300_f12h/", "ta500":"daily_mean/ta500_f12h/", + "ta850":"daily_mean/ta850_f12h/", "g300":"daily_mean/g300_f12h/", + "g500":"daily_mean/g500_f12h/", "g850":"daily_mean/g850_f12h/", + "tdps":"daily_mean/tdps_f6h/", "hurs":"daily_mean/hurs_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "rsds":"monthly_mean/rsds_s0-24h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", + "ta300":"monthly_mean/ta300_f12h/", "ta500":"monthly_mean/ta500_f12h/", + "ta850":"monthly_mean/ta850_f12h/", "g300":"monthly_mean/g300_f12h/", + "g500":"monthly_mean/g500_f12h/", "g850":"monthly_mean/g500_f12h/", + "tdps":"monthly_mean/tdps_f6h/", "psl":"monthly_mean/psl_f6h/", + "tos":"monthly_mean/tos_f6h/", "sic":"monthly_mean/sic_f24h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + daily_mean: {"tas":"daily_mean/tas_f6h/", "prlr":"daily/prlr_s0-24h/", + "sfcWind":"daily_mean/sfcWind_f6h/", + "uas":"daily_mean/uas_f6h/", "vas":"daily_mean/vas_f6h/", + "psl":"daily_mean/psl_f6h/", "tdps":"daily_mean/tdps_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "rsds":"monthly_mean/rsds_s0-24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", + "uas":"monthly_mean/uas_f6h/", "vas":"monthly_mean/vas_f6h/", + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_system51c3s.txt" + Meteo-France-System7: + name: "Meteo-France System 7" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system7c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", + "prlr":"monthly_mean/prlr_f24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "tos":"monthly_mean/tos_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + DWD-GCFS2.1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/dwd/system21c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_s0-24h/", + "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_s0-24h/", "tasmax":"monthly_mean/tasmax_s0-24h/"} + nmember: + fcst: 50 + hcst: 30 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system21_m1.txt" + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", + "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} + nmember: + fcst: 50 + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + JMA-CPS2: + name: "JMA System 2" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/jma/system2c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system2c3s.txt" + ECCC-CanCM4i: + name: "ECCC CanCM4i" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc1/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + UK-MetOffice-Glosea600: + name: "UK MetOffice GloSea 6 (v6.0)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system600-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", + "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f24h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + nmember: + fcst: 20 + hcst: 20 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" +mars: + src_sys: "/esarchive/scratch/aho/tmp/GRIB/" #"/mars/" + System: + ECMWF-SEAS5: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "GRIB_system5_tas_CORRECTED/" + monthly_mean: {"tas":""} + nmember: + fcst: 51 + hcst: 51 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" + +sample: + src_sys: + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: + monthly_mean: {"tas":"", "prlr":""} + nmember: + fcst: 15 + hcst: 15 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_GRIB_system51_m1.txt" + +IFCA: + src_sys: "/home/jovyan/IMPETUS4CHANGE/data/BSC/" + System: + ECMWF-SEAS5: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "ECMWF-SEAS5/" + daily_mean: {"tas":"daily_mean/tas_f6h/", "psl":"daily_mean/psl_f6h/", + "prlr":"daily_mean/prlr_s0-24h/", "sfcWind":"daily_mean/sfcWind_f6h/", + "tasmin":"daily_mean/tasmin/", "tasmax":"daily_mean/tasmax/", + "hurs":"daily_mean/hurs_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "hurs":"monthly_mean/hurs_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/home/jovyan/IMPETUS4CHANGE/data/BSC/ECMWF-SEAS5/monthly_mean/tas_f6h/tas_20180501.nc" + diff --git a/full_ecvs_multimodel_calibrated.R b/full_ecvs_multimodel_calibrated.R index 757af6aa..763eed56 100644 --- a/full_ecvs_multimodel_calibrated.R +++ b/full_ecvs_multimodel_calibrated.R @@ -80,6 +80,13 @@ skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' tmp_probs <- list(probs_fcst = res$probs$probs_fcst[[1]][[1]]) + + +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = timesteps, + #latitude = 20, longitude = 20) + latitude = lats, longitude = lons) +info(recipe_aux$Run$logger,dim(tmp_probs$probs_fcst)) + Visualization(recipe = recipe_aux, data = data_aux, probabilities = tmp_probs, skill_metrics = skill_metrics, significance = TRUE) diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 793b46ec..50628834 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -33,7 +33,7 @@ load_subseasonal <- function(recipe) { ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name # get datasets dict: - archive <- read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] + archive <-get_archive(recipe) # read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] exp_descrip <- archive$System[[exp.name]] freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) @@ -64,13 +64,13 @@ load_subseasonal <- function(recipe) { var_dir_exp <- exp_descrip[[frequency]][variable] # ----------- - obs.path <- paste0(archive$src, obs.dir, "$var_dir$", + obs.path <- paste0(archive$src_ref, obs.dir, "$var_dir$", "$var$_$file_date$.nc") - hcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + hcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", "$var$_$file_date$.nc") - fcst.path <- paste0(archive$src, hcst.dir, "$var_dir$", + fcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", "$var$_$file_date$.nc") # Define regrid parameters: @@ -384,3 +384,4 @@ load_subseasonal <- function(recipe) { } + diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 0c5dcfe3..ee91b9ff 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -85,6 +85,8 @@ plot_most_likely_terciles <- function(recipe, along = c("var"), indices = var, drop = 'selected') + info(recipe$Run$logger,"HERE") + info(recipe$Run$logger, str(var_probs)) var_probs <- Reorder(var_probs, c("syear", "time", "bin", "longitude", "latitude")) diff --git a/recipe_ecvs_cal_mul_seas.yml b/recipe_ecvs_cal_mul_seas.yml index 52c2fe00..c2c1e371 100644 --- a/recipe_ecvs_cal_mul_seas.yml +++ b/recipe_ecvs_cal_mul_seas.yml @@ -82,7 +82,7 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 30 # Optional, int: number of cores, defaults to 1 + ncores: 20 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE alpha: 0.05 Output_format: scorecards diff --git a/sunset.sh b/sunset.sh index bdbbedbf..6577a9ae 100644 --- a/sunset.sh +++ b/sunset.sh @@ -7,7 +7,7 @@ #SBATCH -e sunset_multimodel-%J.err #SBATCH --account=bsc32 #SBATCH --qos=gp_bsces -#SBATCH --constraint=lowmem +#SBATCH --constraint=highmem #### --qos=acc_bsces -- GitLab From 060bf1e10011d4e354acd67e873d75286f411ffc Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 18 Dec 2024 13:53:53 +0100 Subject: [PATCH 56/78] conf and save --- conf/archive.yml | 464 ------------------------ conf/archive_reference.yml | 4 + conf/archive_subseasonal.yml | 20 + full_ecvs_calibration.R | 1 + modules/Crossval/Crossval_Calibration.R | 4 +- modules/Crossval/Crossval_metrics.R | 2 +- modules/Visualization/output_size.yml | 10 +- recipe_ecvs_ano_mul_seas.yml | 4 +- recipe_ecvs_ano_seas.yml | 6 +- recipe_subseasonal_ecvs.yml | 36 +- recipe_tas_singl_cal_seas.yml | 2 +- subsunset.sh | 23 ++ sunset.sh | 5 +- 13 files changed, 80 insertions(+), 501 deletions(-) delete mode 100644 conf/archive.yml create mode 100644 subsunset.sh diff --git a/conf/archive.yml b/conf/archive.yml deleted file mode 100644 index fe2e043f..00000000 --- a/conf/archive.yml +++ /dev/null @@ -1,464 +0,0 @@ -gpfs: - src: "/gpfs/projects/bsc32/esarchive_cache/" - System: - ECMWF-SEAS5.1: - name: "ECMWF SEAS5 (v5.1)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ecmwf/system51c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_s0-24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 51 - hcst: 25 - calendar: "proleptic_gregorian" - time_stamp_lag: "0" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" - land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" - CMCC-SPS3.5: - name: "CMCC-SPS3.5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/cmcc/system35c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 50 - hcst: 40 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system35c3s.txt" - Meteo-France-System8: - name: "Meteo-France System 8" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/meteofrance/system8c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_s0-24h/", - "sfcWind": "monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 51 - hcst: 25 - time_stamp_lag: "+1" - calendar: "proleptic_gregorian" - reference_grid: "conf/grid_description/griddes_system7c3s.txt" - UK-MetOffice-Glosea603: - name: "UK MetOffice GloSea 6 (v6.03)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ukmo/glosea6_system603-c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 62 - hcst: 28 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_ukmo600.txt" - UK-MetOffice-Glosea601: - name: "UK MetOffice GloSea 6 (v6.01)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ukmo/glosea6_system601-c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 62 - hcst: 28 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_ukmo600.txt" - NCEP-CFSv2: - name: "NCEP CFSv2" - institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 124 - hcst: 24 - calendar: "gregorian" - time_stamp_lag: "0" - reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" - DWD-GCFS2.1: - name: "DWD GCFS 2.1" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/dwd/system21c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 50 - hcst: 30 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system21_m1.txt" - ECCC-GEM5.2-NEMO: - name: "ECCC GEM5.2-NEMO" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/eccc/eccc5/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_s0-24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 20 - hcst: 20 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_eccc1.txt" - ECCC-CanCM4i: - name: "ECCC CanCM4i (v3)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/eccc/eccc3/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_s0-24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} - nmember: - fcst: 10 - hcst: 10 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_eccc1.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/" - monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} - calendar: "standard" - reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" - -######################################################################### -esarchive: - src: "/esarchive/" - System: - ECMWF-SEAS5: - name: "ECMWF SEAS5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ecmwf/system5c3s/" - daily_mean: {"tas":"daily_mean/tas_f6h/", "rsds":"daily/rsds_s0-24h/", - "prlr":"daily/prlr_s0-24h/", "tasmin":"daily/tasmin/", - "tasmax":"daily/tasmax/", "sfcWind":"daily_mean/sfcWind_f6h/", - "ta300":"daily_mean/ta300_f12h/", "ta500":"daily_mean/ta500_f12h/", - "ta850":"daily_mean/ta850_f12h/", "g300":"daily_mean/g300_f12h/", - "g500":"daily_mean/g500_f12h/", "g850":"daily_mean/g850_f12h/", - "tdps":"daily_mean/tdps_f6h/", "hurs":"daily_mean/hurs_f6h/"} - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "rsds":"monthly_mean/rsds_s0-24h/", - "prlr":"monthly_mean/prlr_s0-24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", - "tasmax":"monthly_mean/tasmax_f24h/", - "ta300":"monthly_mean/ta300_f12h/", "ta500":"monthly_mean/ta500_f12h/", - "ta850":"monthly_mean/ta850_f12h/", "g300":"monthly_mean/g300_f12h/", - "g500":"monthly_mean/g500_f12h/", "g850":"monthly_mean/g500_f12h/", - "tdps":"monthly_mean/tdps_f6h/", "psl":"monthly_mean/psl_f6h/", - "tos":"monthly_mean/tos_f6h/", "sic":"monthly_mean/sic_f24h/"} - nmember: - fcst: 51 - hcst: 25 - calendar: "proleptic_gregorian" - time_stamp_lag: "0" - reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" - land_sea_mask: "/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc" - ECMWF-SEAS5.1: - name: "ECMWF SEAS5 (v5.1)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ecmwf/system51c3s/" - daily_mean: {"tas":"daily_mean/tas_f6h/", "prlr":"daily/prlr_s0-24h/", - "sfcWind":"daily_mean/sfcWind_f6h/", - "uas":"daily_mean/uas_f6h/", "vas":"daily_mean/vas_f6h/", - "psl":"daily_mean/psl_f6h/", "tdps":"daily_mean/tdps_f6h/"} - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "rsds":"monthly_mean/rsds_s0-24h/", - "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", - "uas":"monthly_mean/uas_f6h/", "vas":"monthly_mean/vas_f6h/", - "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} - nmember: - fcst: 51 - hcst: 25 - calendar: "proleptic_gregorian" - time_stamp_lag: "0" - reference_grid: "conf/grid_description/griddes_system51c3s.txt" - Meteo-France-System7: - name: "Meteo-France System 7" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/meteofrance/system7c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "g500":"monthly_mean/g500_f12h/", - "prlr":"monthly_mean/prlr_f24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", - "tos":"monthly_mean/tos_f6h/"} - nmember: - fcst: 51 - hcst: 25 - time_stamp_lag: "+1" - calendar: "proleptic_gregorian" - reference_grid: "conf/grid_description/griddes_system7c3s.txt" - DWD-GCFS2.1: - name: "DWD GCFS 2.1" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/dwd/system21_m1/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", - "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} - nmember: - fcst: 50 - hcst: 30 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system21_m1.txt" - CMCC-SPS3.5: - name: "CMCC-SPS3.5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/cmcc/system35c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", - "g500":"monthly_mean/g500_f12h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} - nmember: - fcst: 50 - hcst: 40 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system35c3s.txt" - JMA-CPS2: - name: "JMA System 2" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/jma/system2c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} - nmember: - fcst: 10 - hcst: 10 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system2c3s.txt" - ECCC-CanCM4i: - name: "ECCC CanCM4i" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/eccc/eccc1/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} - nmember: - fcst: 10 - hcst: 10 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_eccc1.txt" - UK-MetOffice-Glosea600: - name: "UK MetOffice GloSea 6 (v6.0)" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/ukmo/glosea6_system600-c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", - "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_s0-24h/"} - nmember: - fcst: 62 - hcst: 28 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_ukmo600.txt" - NCEP-CFSv2: - name: "NCEP CFSv2" - institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} - nmember: - fcst: 20 - hcst: 20 - calendar: "gregorian" - time_stamp_lag: "0" - reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/" - daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", - "rsds":"daily_mean/rsds_f1h-r1440x721cds/", - "prlr":"daily_mean/prlr_f1h-r1440x721cds/", - "g300":"daily_mean/g300_f1h-r1440x721cds/", - "g500":"daily_mean/g500_f1h-r1440x721cds/", - "g850":"daily_mean/g850_f1h-r1440x721cds/", - "sfcWind":"daily_mean/sfcWind_f1h-r1440x721cds/", - "tasmax":"daily/tasmax_f1h-r1440x721cds/", - "tasmin":"daily/tasmin_f1h-r1440x721cds/", - "ta300":"daily_mean/ta300_f1h-r1440x721cds/", - "ta500":"daily_mean/ta500_f1h-r1440x721cds/", - "ta850":"daily_mean/ta850_f1h-r1440x721cds/", - "hurs":"daily_mean/hurs_f1h-r1440x721cds/"} - monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", - "psl":"monthly_mean/psl_f1h-r1440x721cds/", - "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "rsds":"monthly_mean/rsds_f1h-r1440x721cds/", - "g300":"monthly_mean/g300_f1h-r1440x721cds/", - "g500":"monthly_mean/g500_f1h-r1440x721cds/", - "g850":"monthly_mean/g850_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", - "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", - "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", - "ta300":"montly_mean/ta300_f1h-r1440x721cds/", - "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", - "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", - "tos":"monthly_mean/tos_f1h-r1440x721cds/", - "sic":"monthly_mean/sic_f1h-r1440x721cds/"} - calendar: "standard" - reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" - ERA5-Land: - name: "ERA5-Land" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5land/" - daily_mean: {"tas":"daily_mean/tas_f1h/", "rsds":"daily_mean/rsds_f1h/", - "prlr":"daily_mean/prlr_f1h/", "sfcWind":"daily_mean/sfcWind_f1h/", - "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} - monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", - "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", - "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", - "tdps":"monthly_mean/tdps_f1h/"} - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" - UERRA: - name: "ECMWF UERRA" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/uerra_mescan/" - daily_mean: {"tas":"daily_mean/tas_f6h/"} - monthly_mean: {"tas":"monthly_mean/tas_f6h/"} - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" - CERRA: - name: "ECMWF CERRA" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/cerra/" - daily_mean: {"hurs":"daily_mean/hurs_f3h-r2631x1113/", "ps":"daily_mean/ps_f3h-r2631x1113/", - "sfcWind":"daily_mean/sfcWind_f3h-r2631x1113/", - "tas":"daily_mean/tas_f3h-r2631x1113/", "winddir":"daily_mean/tas_f3h-r2631x1113/"} - monthly_mean: {"hurs":"monthly_mean/hurs_f3h-r2631x1113/", "ps":"monthly_mean/ps_f3h-r2631x1113/", - "sfcWind":"monthly_mean/sfcWind_f3h-r2631x1113/", - "tas":"monthly_mean/tas_f3h-r2631x1113/", - "winddir":"monthly_mean/winddir_f3h-r2631x1113/", - "tasmin":"monthly_mean/tasmin_f24h-r2631x1113/", - "tasmax":"monthly_mean/tasmax_f24h-r2631x1113/"} - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" - CERRA-Land: - name: "ECMWF CERRA-Land" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/cerraland/" - daily_mean: {"prlr":"daily_mean/prlr_f6h-r2631x1113/"} - monthly_mean: {"prlr":"monthly_mean/prlr_f6h-r2631x1113/"} - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/recon/ecmwf/cerraland/monthly_mean/prlr_f6h-r2631x1113/prlr_200412.nc" - HadCRUT5: - name: "HadCRUT5" - institution: "Met Office" - src: "obs/ukmo/hadcrut_v5.0_analysis/" - monthly_mean: {"tasanomaly":"monthly_mean/tasanomaly/"} - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/obs/ukmo/hadcrut_v5.0_analysis/monthly_mean/tasanomaly/tasanomaly_202001.nc" - BEST: - name: "BEST" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "obs/berkeleyearth/berkeleyearth/" - daily_mean: {"tas":"daily_mean/tas/"} - monthly_mean: {"tas":"monthly_mean/tas/"} - calendar: "proleptic_gregorian" - reference_grid: "/esarchive/obs/berkeleyearth/berkeleyearth/monthly_mean/tas/tas_201805.nc" - -mars: - src: "/esarchive/scratch/aho/tmp/GRIB/" #"/mars/" - System: - ECMWF-SEAS5: - name: "ECMWF SEAS5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "GRIB_system5_tas_CORRECTED/" - monthly_mean: {"tas":""} - nmember: - fcst: 51 - hcst: 51 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "GRIB_era5_tas/" - monthly_mean: {"tas":""} - calendar: "standard" - reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" - -sample: - src: - System: - ECMWF-SEAS5.1: - name: "ECMWF SEAS5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: - monthly_mean: {"tas":"", "prlr":""} - nmember: - fcst: 15 - hcst: 15 - calendar: "proleptic_gregorian" - time_stamp_lag: "0" - reference_grid: "conf/grid_description/griddes_GRIB_system51_m1.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "GRIB_era5_tas/" - monthly_mean: {"tas":"", "prlr":""} - calendar: "standard" - reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" - -IFCA: - src: "/home/jovyan/IMPETUS4CHANGE/data/BSC/" - System: - ECMWF-SEAS5: - name: "ECMWF SEAS5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "ECMWF-SEAS5/" - daily_mean: {"tas":"daily_mean/tas_f6h/", "psl":"daily_mean/psl_f6h/", - "prlr":"daily_mean/prlr_s0-24h/", "sfcWind":"daily_mean/sfcWind_f6h/", - "tasmin":"daily_mean/tasmin/", "tasmax":"daily_mean/tasmax/", - "hurs":"daily_mean/hurs_f6h/"} - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "hurs":"monthly_mean/hurs_f6h/", - "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/"} - nmember: - fcst: 51 - hcst: 25 - calendar: "proleptic_gregorian" - time_stamp_lag: "0" - reference_grid: "/home/jovyan/IMPETUS4CHANGE/data/BSC/ECMWF-SEAS5/monthly_mean/tas_f6h/tas_20180501.nc" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "ERA5/" - daily_mean: {"psl":"psl_f1h-r1440x721cds/"} - calendar: "standard" - reference_grid: "/home/jovyan/IMPETUS4CHANGE/data/BSC/ERA5/daily_mean/psl_f1h-r1440x721cds/psl_201805.nc" - CERRA: - name: "ECMWF CERRA" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "CERRA/" - daily_mean: {"hurs":"daily_mean/hurs_f3h-r2631x1113/", "prlr":"daily_mean/prlr_f6h-r2631x1113/", - "sfcWind":"daily_mean/sfcWind_f3h-r2631x1113/", "tas":"daily_mean/tas_f3h-r2631x1113/", - "tasmax":"daily_mean/tasmax-r2631x1113/", "tasmin":"daily_mean/tasmin-r2631x1113/"} - monthly_mean: {"hurs":"monthly_mean/hurs_f3h-r2631x1113/", "prlr":"monthly_mean/prlr_f6h-r2631x1113/", - "sfcWind":"monthly_mean/_f3h-r2631x1113/", "tas":"monthly_mean/tas_f3h-r2631x1113/", - "tasmin":"monthly_mean/tasmin_f24h-r2631x1113/","tasmax":"monthly_mean/tasmax_f24h-r2631x1113/"} - calendar: "proleptic_gregorian" - reference_grid: "/home/jovyan/IMPETUS4CHANGE/data/BSC/CERRA/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml index 063ed470..67ff6d20 100644 --- a/conf/archive_reference.yml +++ b/conf/archive_reference.yml @@ -5,6 +5,10 @@ gpfs: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "psl":"weekly_mean/psl_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/"} monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", "psl":"monthly_mean/psl_f1h-r1440x721cds/", "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index 300884dc..14a4ebb9 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -1,3 +1,23 @@ +gpfs: + src_sys: "/gpfs/projects/bsc32/esarchive_cache/" + System: + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", + "prlr":"weekly_mean/s2s/prlr_f24h/", + "tasmax":"weekly_mean/s2s/tasmax_f24h/", + "tasmin":"weekly_mean/s2s/tasmin_f24h/", + "sfcWind":"weekly_mean/s2s/sfcWind_f24h/", + "rsds":"weekly_mean/s2s/rsds_f24h/"} + daily_mean: {"tas":"daily_mean/s2s/tas_f6h"} + nmember: + fcst: 48 + hcst: 12 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" # Do we need it for subseasonal? + reference_grid: "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20040624.nc" # is it the same as seasonal? esarchive: src_sys: "/esarchive/" System: diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R index 64f8d78b..3c3f2c6e 100644 --- a/full_ecvs_calibration.R +++ b/full_ecvs_calibration.R @@ -7,6 +7,7 @@ source("modules/Aggregation/Aggregation.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] #recipe_file <- "recipe_tas_singl_cal_seas.yml" +#recipe_file <- "recipe_subseasonal_ecvs.yml" #recipe <- read_atomic_recipe(recipe_file) recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index a8fea870..5051f12f 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -279,8 +279,8 @@ Crossval_Calibration <- function(recipe, data) { tmp_lims2 <- list() # What to save hcst category limits or obs category limits? # TODO saving: -recipe$Analysis$Workflow$Probabilities$save <- FALSE -if (recipe$Analysis$Workflow$Probabilities$save) { +#recipe$Analysis$Workflow$Probabilities$save <- FALSE +if (recipe$Analysis$Workflow$Probabilities$save == 'all') { for(ps in 1:length(categories)) { tmp_lims3 <- drop(lims[[ps]]) for (l in 1:dim(lims[[ps]])['bin']) { diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 2aa7b4b6..c4ad978a 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -276,7 +276,7 @@ Crossval_metrics <- function(recipe, data_crossval, return(res) }) # Save metrics - if (recipe$Analysis$Workflow$Skill$save == TRUE) { + if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { save_metrics(recipe = recipe, metrics = skill_metrics, data_cube = data_crossval$hcst, agg = 'global', diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 3b02a190..6d7e5b60 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -118,5 +118,13 @@ region: #units inches dot_size: 2 # xlonshft: 180 plot_margin: !expr c(0, 4.1, 4.1, 2.1) - + Kuwait: + cylindrical_equidistant: + skill_metrics: + intxlon: 1 + intylat: 1 + country.borders: TRUE + width: 5 + bar_label_scale: 0.8 + axes_label_scale: 0.8 # Add other regions diff --git a/recipe_ecvs_ano_mul_seas.yml b/recipe_ecvs_ano_mul_seas.yml index 42b32c2e..fe43221c 100644 --- a/recipe_ecvs_ano_mul_seas.yml +++ b/recipe_ecvs_ano_mul_seas.yml @@ -41,7 +41,7 @@ Analysis: Anomalies: compute: yes cross_validation: no - save: none + save: no Time_aggregation: execute: yes method: average @@ -57,7 +57,7 @@ Analysis: cross_validation: yes Probabilities: percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. - save: none + save: 'all' Indicators: index: no Visualization: diff --git a/recipe_ecvs_ano_seas.yml b/recipe_ecvs_ano_seas.yml index 5d349bbf..d86672de 100644 --- a/recipe_ecvs_ano_seas.yml +++ b/recipe_ecvs_ano_seas.yml @@ -5,7 +5,7 @@ Description: Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - - {name: 'tas', freq: 'monthly_mean', units: 'C'} + - {name: 'prlr', freq: 'monthly_mean', units: 'C'} #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} Datasets: System: @@ -24,7 +24,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '1101' + sdate: '1201' fcst_year: '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' @@ -57,7 +57,7 @@ Analysis: cross_validation: yes Probabilities: percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. - save: none + save: 'all' Indicators: index: no Visualization: diff --git a/recipe_subseasonal_ecvs.yml b/recipe_subseasonal_ecvs.yml index 3d7b0e64..bdeb0992 100644 --- a/recipe_subseasonal_ecvs.yml +++ b/recipe_subseasonal_ecvs.yml @@ -35,13 +35,13 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: 20241031 #%Y%m%d # Cambiar a 2023 + sdate: 20231207 #%Y%m%d # Cambiar a 2023 #- '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' # - '0201' # ... - fcst_year: 20241031 # Forecast initialization year 'YYYY' (Optional, int) + fcst_year: 20231207 # Forecast initialization year 'YYYY' (Optional, int) hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) @@ -57,11 +57,12 @@ Analysis: #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} # To request more regions to be divided in atomic recipes, add them this way: # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} - - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + - {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + #- {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} Regrid: method: bilinear # Interpolation method (Mandatory, str) - type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + type: "conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', # or CDO-accepted grid. (Mandatory, str) Workflow: # This is the section of the recipe where the parameters for each module are specified @@ -69,8 +70,8 @@ Analysis: method: evmos # Calibration method. (Mandatory, str) save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) Skill: - metric: mean_bias enscorr rpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) - save: 'all' # Options: 'all', 'none' (Mandatory, str) + metric: mean_bias enscorr rpss # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) Statistics: metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) save: 'all' # Options: 'all', 'none' (Mandatory, str) @@ -106,34 +107,19 @@ Analysis: col1_width: NULL # Optional, int: to adjust width of first column in scorecards table col2_width: NULL # Optional, int: to adjust width of second column in scorecards table calculate_diff: False # Mandatory, bool: True/False - ncores: 16 # Number of cores to be used in parallel computation. + ncores: 6 # Number of cores to be used in parallel computation. # If left empty, defaults to 1. (Optional, int) remove_NAs: yes # Whether to remove NAs. # If left empty, defaults to no/false. (Optional, bool) Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. Run: - filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. # Default value is 'INFO'. (Optional, str) Terminal: yes # Optional, bool: Whether to display log messages in the terminal. # Default is yes/true. - output_dir: /esarchive/scratch/nperez/git/case_s2s/ # Output directory. Must have write permissions. (Mandatory, str) - code_dir: /esarchive/scratch/nperez/git/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) # fill only if using autosubmit - auto_conf: - script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script - expid: a6wq # replace with your EXPID - hpc_user: bsc032762 # replace with your hpc username - wallclock: 01:00 # wallclock for single-model jobs, hh:mm - wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. - processors_per_job: 4 # processors to request for each single-model job. - processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. - custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. - custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. - platform: nord3v2 # platform (for now, only nord3v2 is available) - email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: nuria.perez@bsc.es # replace with your email address - notify_completed: yes # notify me by email when a job finishes - notify_failed: yes # notify me by email when a job fails diff --git a/recipe_tas_singl_cal_seas.yml b/recipe_tas_singl_cal_seas.yml index 79dcf27d..8eec7e19 100644 --- a/recipe_tas_singl_cal_seas.yml +++ b/recipe_tas_singl_cal_seas.yml @@ -5,7 +5,7 @@ Description: Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - - {name: prlr, freq: monthly_mean, units: mm, flux: yes} + - {name: tas, freq: monthly_mean, units: C, flux: yes} Datasets: System: - {name: 'ECMWF-SEAS5.1'} diff --git a/subsunset.sh b/subsunset.sh new file mode 100644 index 00000000..fd276aa1 --- /dev/null +++ b/subsunset.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -n 64 +#SBATCH -N 1 +#SBATCH -t 10:00:00 +#SBATCH -J sunset_sub +#SBATCH -o sunset_sub-%J.out +#SBATCH -e sunset_sub-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=lowmem + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_subseasonal_ecv.yml + + diff --git a/sunset.sh b/sunset.sh index 6577a9ae..bd6f1d55 100644 --- a/sunset.sh +++ b/sunset.sh @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH -n 112 #SBATCH -N 1 -#SBATCH -t 4:00:00 +#SBATCH -t 10:00:00 #SBATCH -J sunset_multimodel #SBATCH -o sunset_multimodel-%J.out #SBATCH -e sunset_multimodel-%J.err @@ -18,5 +18,6 @@ conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 #Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml -Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_mul_seas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_mul_seas.yml + -- GitLab From ff9ce34cc64c11ab971f4cefa6a25cfee3ed73cb Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 18 Dec 2024 13:55:52 +0100 Subject: [PATCH 57/78] remove unneeded logger --- modules/Visualization/R/plot_most_likely_terciles_map.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index ee91b9ff..0c5dcfe3 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -85,8 +85,6 @@ plot_most_likely_terciles <- function(recipe, along = c("var"), indices = var, drop = 'selected') - info(recipe$Run$logger,"HERE") - info(recipe$Run$logger, str(var_probs)) var_probs <- Reorder(var_probs, c("syear", "time", "bin", "longitude", "latitude")) -- GitLab From 2ef28b3d32258eba703262c5c241d1d992e071eb Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 18 Dec 2024 17:05:07 +0100 Subject: [PATCH 58/78] reorder removed --- full_ecvs_anomalies.R | 36 ++++++++++--------------------- full_ecvs_calibration.R | 35 ++++++++++-------------------- full_ecvs_multimodel_anomalies.R | 4 +++- full_ecvs_multimodel_calibrated.R | 17 +++------------ recipe_ecvs_ano_mul_seas.yml | 2 +- recipe_ecvs_ano_seas.yml | 2 +- recipe_ecvs_cal_mul_seas.yml | 2 +- recipe_ecvs_cal_seas.yml | 2 +- recipe_tas_singl_cal_seas.yml | 10 ++++----- 9 files changed, 37 insertions(+), 73 deletions(-) diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 97bcd115..a4c46693 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -16,46 +16,32 @@ data_summary(data$hcst, recipe) data_summary(data$obs, recipe) data_summary(data$fcst, recipe) - #### Reorde for MostLikely to plot EU centered: - if (tolower(recipe$Analysis$Region$name) == 'global') { - data$hcst <- CST_Subset(data$hcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$fcst <- CST_Subset(data$fcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$obs <- CST_Subset(data$obs, along = 'longitude', - indices=c(181:360, 1:180)) - data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 - data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 - data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe, data = data) + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + data_summary(data$fcst, recipe) } - #### - - -data_agg <- Aggregation(recipe = recipe, data = data) -data_summary(data_agg$hcst, recipe) -data_summary(data_agg$obs, recipe) -data_summary(data_agg$fcst, recipe) - source("modules/Crossval/Crossval_anomalies.R") -res <- Crossval_anomalies(recipe = recipe, data = data_agg) +res <- Crossval_anomalies(recipe = recipe, data = data) source("modules/Crossval/Crossval_metrics.R") skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) # Required to plot a forecast: -data_agg$fcst <- res$fcst +data$fcst <- res$fcst tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) -nlats <- as.numeric(dim(data_agg$hcst$data)['latitude']) -nlons <- as.numeric(dim(data_agg$hcst$data)['longitude']) -ntimes <- as.numeric(dim(data_agg$hcst$data)['time']) +nlats <- as.numeric(dim(data$hcst$data)['latitude']) +nlons <- as.numeric(dim(data$hcst$data)['longitude']) +ntimes <- as.numeric(dim(data$hcst$data)['time']) # TODO: consider more than terciles: dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, latitude = nlats, longitude = nlons) -Visualization(recipe = recipe, data = data_agg, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) ## Check logo size is appropiated for your maps: source("tools/add_logo.R") diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R index 3c3f2c6e..3f673f29 100644 --- a/full_ecvs_calibration.R +++ b/full_ecvs_calibration.R @@ -17,46 +17,33 @@ data_summary(data$hcst, recipe) data_summary(data$obs, recipe) data_summary(data$fcst, recipe) - #### Reorde for MostLikely to plot EU centered: - if (tolower(recipe$Analysis$Region$name) == 'global') { - data$hcst <- CST_Subset(data$hcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$fcst <- CST_Subset(data$fcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$obs <- CST_Subset(data$obs, along = 'longitude', - indices=c(181:360, 1:180)) - data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180] - 360 - data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180] - 360 - data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180] - 360 + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe, data = data) + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + data_summary(data$fcst, recipe) } - #### - - -data_agg <- Aggregation(recipe = recipe, data = data) -data_summary(data_agg$hcst, recipe) -data_summary(data_agg$obs, recipe) -data_summary(data_agg$fcst, recipe) source("modules/Crossval/Crossval_Calibration.R") -res <- Crossval_Calibration(recipe = recipe, data = data_agg) +res <- Crossval_Calibration(recipe = recipe, data = data) source("modules/Crossval/Crossval_metrics.R") skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) # Required to plot a forecast: -data_agg$fcst <- res$fcst +data$fcst <- res$fcst tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) -nlats <- as.numeric(dim(data_agg$hcst$data)['latitude']) -nlons <- as.numeric(dim(data_agg$hcst$data)['longitude']) -ntimes <- as.numeric(dim(data_agg$hcst$data)['time']) +nlats <- as.numeric(dim(data$hcst$data)['latitude']) +nlons <- as.numeric(dim(data$hcst$data)['longitude']) +ntimes <- as.numeric(dim(data$hcst$data)['time']) # TODO: consider more than terciles dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, latitude = nlats, longitude = nlons) -Visualization(recipe = recipe, data = data_agg, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) ## Check logo size is appropiated for your maps: source("tools/add_logo.R") diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 4c7e16ab..a343e8bc 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -28,7 +28,9 @@ for (sys in models) { # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) data <- Units(recipe = recipe_aux, data = data) - # data <- Aggregation(recipe = recipe_aux, data = data) + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe_aux, data = data) + } # verification individual models product <- Crossval_anomalies(recipe = recipe_aux, data = data) skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, diff --git a/full_ecvs_multimodel_calibrated.R b/full_ecvs_multimodel_calibrated.R index 763eed56..21d2f9d3 100644 --- a/full_ecvs_multimodel_calibrated.R +++ b/full_ecvs_multimodel_calibrated.R @@ -28,21 +28,10 @@ for (sys in models) { recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) - #### Reorde for MostLikely to plot EU centered: - if (tolower(recipe_aux$Analysis$Region$name) == 'global') { - data$hcst <- CST_Subset(data$hcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$fcst <- CST_Subset(data$fcst, along = 'longitude', - indices=c(181:360, 1:180)) - data$obs <- CST_Subset(data$obs, along = 'longitude', - indices=c(181:360, 1:180)) - data$hcst$coords$longitude[1:180] <- data$hcst$coords$longitude[1:180]-360 - data$fcst$coords$longitude[1:180] <- data$fcst$coords$longitude[1:180]-360 - data$obs$coords$longitude[1:180] <- data$obs$coords$longitude[1:180]-360 - } - #### data <- Units(recipe = recipe_aux, data = data) - data <- Aggregation(recipe = recipe_aux, data = data) + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe_aux, data = data) + } # verification individual models product <- Crossval_Calibration(recipe = recipe_aux, data = data) skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, diff --git a/recipe_ecvs_ano_mul_seas.yml b/recipe_ecvs_ano_mul_seas.yml index fe43221c..5be9fce8 100644 --- a/recipe_ecvs_ano_mul_seas.yml +++ b/recipe_ecvs_ano_mul_seas.yml @@ -31,7 +31,7 @@ Analysis: ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - - {name: 'Global', latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. diff --git a/recipe_ecvs_ano_seas.yml b/recipe_ecvs_ano_seas.yml index d86672de..1ad587c6 100644 --- a/recipe_ecvs_ano_seas.yml +++ b/recipe_ecvs_ano_seas.yml @@ -31,7 +31,7 @@ Analysis: ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - - {name: 'Global', latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. diff --git a/recipe_ecvs_cal_mul_seas.yml b/recipe_ecvs_cal_mul_seas.yml index c2c1e371..89e781f8 100644 --- a/recipe_ecvs_cal_mul_seas.yml +++ b/recipe_ecvs_cal_mul_seas.yml @@ -31,7 +31,7 @@ Analysis: ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - - {name: 'Global', latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. diff --git a/recipe_ecvs_cal_seas.yml b/recipe_ecvs_cal_seas.yml index f887042d..01a9dba5 100644 --- a/recipe_ecvs_cal_seas.yml +++ b/recipe_ecvs_cal_seas.yml @@ -31,7 +31,7 @@ Analysis: ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - - {name: 'Global', latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. diff --git a/recipe_tas_singl_cal_seas.yml b/recipe_tas_singl_cal_seas.yml index 8eec7e19..d649e082 100644 --- a/recipe_tas_singl_cal_seas.yml +++ b/recipe_tas_singl_cal_seas.yml @@ -21,13 +21,13 @@ Analysis: Time: sdate: '1201' fcst_year: '2024' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_start: '1996' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months Region: - - {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} - # - {name: Global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + #- {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + - {name: Global, latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" @@ -38,7 +38,7 @@ Analysis: cross_validation: no save: all Time_aggregation: - execute: yes + execute: no #yes method: average ini: [1,2,3,4] end: [3,4,5,6] -- GitLab From 281d632c4aad30452c1bf08088020a2f5c645cac Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Fri, 20 Dec 2024 14:18:45 +0100 Subject: [PATCH 59/78] Fix enscor typo and archive --- conf/archive_reference.yml | 8 ++++-- full_ecvs_calibration.R | 39 ++++++++++++++++----------- full_ecvs_multimodel_anomalies.R | 5 ++-- modules/Crossval/Crossval_metrics.R | 2 +- modules/Visualization/output_size.yml | 6 ++--- recipe_ecvs_cal_seas.yml | 2 +- recipe_subseasonal_ecvs.yml | 8 +++--- subsunset.sh | 2 +- 8 files changed, 44 insertions(+), 28 deletions(-) diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml index 67ff6d20..a1c7bf5b 100644 --- a/conf/archive_reference.yml +++ b/conf/archive_reference.yml @@ -24,8 +24,12 @@ esarchive: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", - "prlr":"weekly_mean/prlr_f1h-r1440x721cds/"} + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/", + "tasmax":"weekly_mean/tasmax_f24h-r1440x721cds/", + "tasmin":"weekly_mean/tasmin_f24h-r1440x721cds/", + "rsds":"weekly_mean/rsds_f1h-r1440x721cds/", + "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/"} daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", "rsds":"daily_mean/rsds_f1h-r1440x721cds/", "prlr":"daily_mean/prlr_f1h-r1440x721cds/", diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R index 3f673f29..bdf10d5a 100644 --- a/full_ecvs_calibration.R +++ b/full_ecvs_calibration.R @@ -8,20 +8,25 @@ args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] #recipe_file <- "recipe_tas_singl_cal_seas.yml" #recipe_file <- "recipe_subseasonal_ecvs.yml" -#recipe <- read_atomic_recipe(recipe_file) -recipe <- prepare_outputs(recipe_file) + +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) + # Load datasets data <- Loading(recipe) data <- Units(recipe, data) data_summary(data$hcst, recipe) data_summary(data$obs, recipe) -data_summary(data$fcst, recipe) - +if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) +} if (recipe$Analysis$Workflow$Time_aggregation$execute) { data <- Aggregation(recipe = recipe, data = data) data_summary(data$hcst, recipe) data_summary(data$obs, recipe) - data_summary(data$fcst, recipe) + if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) + } } source("modules/Crossval/Crossval_Calibration.R") @@ -31,19 +36,23 @@ source("modules/Crossval/Crossval_metrics.R") skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) -# Required to plot a forecast: -data$fcst <- res$fcst -tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) -nlats <- as.numeric(dim(data$hcst$data)['latitude']) -nlons <- as.numeric(dim(data$hcst$data)['longitude']) -ntimes <- as.numeric(dim(data$hcst$data)['time']) +if (!is.null(data$fcst)) { + # Required to plot a forecast: + data$fcst <- res$fcst + tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) + nlats <- as.numeric(dim(data$hcst$data)['latitude']) + nlons <- as.numeric(dim(data$hcst$data)['longitude']) + ntimes <- as.numeric(dim(data$hcst$data)['time']) -# TODO: consider more than terciles -dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, - latitude = nlats, longitude = nlons) + # TODO: consider more than terciles + dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, + latitude = nlats, longitude = nlons) -Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) +} else { + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) +} ## Check logo size is appropiated for your maps: source("tools/add_logo.R") diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index a343e8bc..e92a143c 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -5,7 +5,8 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") #args = commandArgs(trailingOnly = TRUE) #recipe_file <- args[1] -recipe_file <- "recipe_tas.yml" +#recipe_file <- "recipe_tas.yml" +recipe_file <- "recipe_ecvs_ano_mul_seas.yml" # recipe_file <- "recipe_tas_decadal.yml" # Will it work with a single job? #recipe <- read_atomic_recipe(recipe_file) @@ -28,7 +29,7 @@ for (sys in models) { # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) data <- Units(recipe = recipe_aux, data = data) - if (recipe$Analysis$Workflow$Time_aggregation$execute) { + if (recipe_aux$Analysis$Workflow$Time_aggregation$execute) { data <- Aggregation(recipe = recipe_aux, data = data) } # verification individual models diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index c4ad978a..605e1f2b 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -167,7 +167,7 @@ Crossval_metrics <- function(recipe, data_crossval, alpha = alpha, ncores = ncores) skill_metrics$enscorr <- enscorr$corr - skill_metrics$enscorr_signigicance <- enscorr$sign + skill_metrics$enscorr_significance <- enscorr$sign } if ('mean_bias' %in% requested_metrics) { if (!is.null(data_crossval$hcst.full_val$data)) { diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 6d7e5b60..c4a054f0 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -113,10 +113,10 @@ region: #units inches height: 5 cylindrical_equidistant: most_likely_terciles: - width: 8.5 - height: 8.5 + width: 10 + height: 8 dot_size: 2 - # xlonshft: 180 + col_mask: 'cornsilk3' plot_margin: !expr c(0, 4.1, 4.1, 2.1) Kuwait: cylindrical_equidistant: diff --git a/recipe_ecvs_cal_seas.yml b/recipe_ecvs_cal_seas.yml index 01a9dba5..dd7cae44 100644 --- a/recipe_ecvs_cal_seas.yml +++ b/recipe_ecvs_cal_seas.yml @@ -5,7 +5,7 @@ Description: Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - #- {name: 'tas', freq: 'monthly_mean', units: 'C'} + - {name: 'tas', freq: 'monthly_mean', units: 'C'} - {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} Datasets: System: diff --git a/recipe_subseasonal_ecvs.yml b/recipe_subseasonal_ecvs.yml index bdeb0992..d46c6a8a 100644 --- a/recipe_subseasonal_ecvs.yml +++ b/recipe_subseasonal_ecvs.yml @@ -35,13 +35,13 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: 20231207 #%Y%m%d # Cambiar a 2023 + sdate: 2023 #%Y%m%d # Cambiar a 2023 #- '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' # - '0201' # ... - fcst_year: 20231207 # Forecast initialization year 'YYYY' (Optional, int) + fcst_year: #20231207 # Forecast initialization year 'YYYY' (Optional, int) hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) @@ -66,6 +66,8 @@ Analysis: # or CDO-accepted grid. (Mandatory, str) Workflow: # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no Calibration: method: evmos # Calibration method. (Mandatory, str) save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) @@ -107,7 +109,7 @@ Analysis: col1_width: NULL # Optional, int: to adjust width of first column in scorecards table col2_width: NULL # Optional, int: to adjust width of second column in scorecards table calculate_diff: False # Mandatory, bool: True/False - ncores: 6 # Number of cores to be used in parallel computation. + ncores: 9 # Number of cores to be used in parallel computation. # If left empty, defaults to 1. (Optional, int) remove_NAs: yes # Whether to remove NAs. # If left empty, defaults to no/false. (Optional, bool) diff --git a/subsunset.sh b/subsunset.sh index fd276aa1..6e75546f 100644 --- a/subsunset.sh +++ b/subsunset.sh @@ -18,6 +18,6 @@ conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 #Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml -Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_subseasonal_ecv.yml +Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_subseasonal_ecvs.yml -- GitLab From 2bcb1ea03489059defc961d2a0947aa50f67454d Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Tue, 14 Jan 2025 16:19:14 +0100 Subject: [PATCH 60/78] config and logo loop --- conf/archive_seasonal.yml | 27 +++- conf/archive_subseasonal.yml | 2 +- conf/grid_description/griddes_eccc5.txt | 22 +++ full_ecvs_calibration.R | 12 +- modules/Visualization/R/plot_ensemble_mean.R | 19 ++- modules/Visualization/R/plot_metrics.R | 22 ++- .../R/plot_most_likely_terciles_map.R | 27 +++- modules/Visualization/Visualization.R | 61 +++++---- modules/Visualization/output_size.yml | 8 +- recipe_ecvs_cal_mul_seas.yml | 8 +- recipe_ecvs_cal_seas.yml | 6 +- recipe_ecvs_cal_subseas.yml | 128 ++++++++++++++++++ recipe_subseasonal_ecvs.yml | 2 +- tools/add_logo.R | 38 ++---- 14 files changed, 298 insertions(+), 84 deletions(-) create mode 100644 conf/grid_description/griddes_eccc5.txt create mode 100644 recipe_ecvs_cal_subseas.yml diff --git a/conf/archive_seasonal.yml b/conf/archive_seasonal.yml index 9bbe5040..6324871f 100644 --- a/conf/archive_seasonal.yml +++ b/conf/archive_seasonal.yml @@ -80,7 +80,7 @@ gpfs: NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" + src: "exp/ncep/system2c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", @@ -121,6 +121,18 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" + ECCC-GEM5.2-NEMO: + name: "ECCC GEM5.2-NEMO" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc5/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/"} + nmember: + fcst: 20 + hcst: 20 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc5.txt" Reference: ERA5: name: "ERA5" @@ -237,6 +249,19 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" + JMA-MRI-CPS3: + name: "JMA MRI-CPS3" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/jma/system3c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_s0-24h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + nmember: + fcst: 155 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index 14a4ebb9..0fccdf4f 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -17,7 +17,7 @@ gpfs: hcst: 12 calendar: "proleptic_gregorian" time_stamp_lag: "0" # Do we need it for subseasonal? - reference_grid: "/esarchive/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20040624.nc" # is it the same as seasonal? + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20050109.nc" # is it the same as seasonal? esarchive: src_sys: "/esarchive/" System: diff --git a/conf/grid_description/griddes_eccc5.txt b/conf/grid_description/griddes_eccc5.txt new file mode 100644 index 00000000..782a1b4d --- /dev/null +++ b/conf/grid_description/griddes_eccc5.txt @@ -0,0 +1,22 @@ +# +# gridID 1 +# +gridtype = generic +gridsize = 1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R index bdf10d5a..5409d41f 100644 --- a/full_ecvs_calibration.R +++ b/full_ecvs_calibration.R @@ -49,15 +49,13 @@ if (!is.null(data$fcst)) { latitude = nlats, longitude = nlons) - Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, probabilities = tmp_probs, + logo = "tools/BSC_logo_95.jpg") } else { - Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, log = "tools/BSC_logo_95.jpg") } -## Check logo size is appropiated for your maps: -source("tools/add_logo.R") -add_logo(recipe, "tools/BSC_logo_95.jpg") #"rsz_rsz_bsc_logo.png") - -# scorecards computation: diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 1a3c6c68..cd633131 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -1,6 +1,7 @@ +source("tools/add_logo.R") plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, output_conf, - method = 'median') { + method = 'median', logo = NULL) { ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { @@ -270,6 +271,22 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, args = c(base_args, list(toptitle = toptitle, fileout = fileout))) + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + system_command <- paste("convert -density 300", fileout, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(fileout), ".", + extension)) + system(system_command) + unlink(fileout) + fileout <- paste0(tools::file_path_sans_ext(fileout), ".", + extension) + } + if (!is.null(logo)) { + add_logo(file = fileout, logo = logo) + } } } } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 49523be6..89b6edda 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,8 +1,8 @@ library(stringr) library(lubridate) - +source("tools/add_logo.R") plot_metrics <- function(recipe, data_cube, metrics, - outdir, significance = F, output_conf) { + outdir, significance = F, output_conf, logo) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive # data_cube: s2dv_cube object with the corresponding hindcast data @@ -93,7 +93,7 @@ plot_metrics <- function(recipe, data_cube, metrics, units <- NULL # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90:_specs", "bss10_specs", + "rpss_specs", "bss90_specs", "bss10_specs", "rmsss", "msss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) metric <- var_metric[[name]] @@ -462,6 +462,22 @@ plot_metrics <- function(recipe, data_cube, metrics, args = c(base_args, list(toptitle = toptitle, fileout = fileout))) + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + system_command <- paste("convert -density 300", fileout, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(fileout), ".", + extension)) + system(system_command) + unlink(fileout) + fileout <- paste0(tools::file_path_sans_ext(fileout), ".", + extension) + } + if (!is.null(logo)) { + add_logo(file = fileout, logo = logo) + } } } } diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 0c5dcfe3..39f8f29c 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -8,7 +8,7 @@ source("modules/Visualization/R/tmp/Utils.R") source("modules/Visualization/R/tmp/PlotEquiMap.R") source("modules/Visualization/R/tmp/ColorBar_onebox.R") source("modules/Visualization/R/tmp/GradientCatsColorBar.R") - +source("tools/add_logo.R") ## TODO: Change name plot_most_likely_terciles <- function(recipe, fcst, @@ -16,7 +16,8 @@ plot_most_likely_terciles <- function(recipe, mask, dots, outdir, - output_conf) { + output_conf, + logo = NULL) { ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily @@ -235,10 +236,10 @@ plot_most_likely_terciles <- function(recipe, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / ", - time_labels[i], " ", years[i], - " / Start date: ", + "Start date: ", format(as.Date(i_syear, format = "%Y%m%d"), - "%d-%m-%Y")) + "%d-%m-%Y"), "\n Valid time: ", + time_labels[i], " ", years[i]) } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), @@ -300,6 +301,22 @@ plot_most_likely_terciles <- function(recipe, } } dev.off() + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + system_command <- paste("convert -density 300", fileout, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(fileout), ".", + extension)) + system(system_command) + unlink(fileout) + fileout <- paste0(tools::file_path_sans_ext(fileout), ".", + extension) + } + if (!is.null(logo)) { + add_logo(file = fileout, logo = logo) + } } } } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index bf800f0e..0d52983a 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -18,7 +18,8 @@ Visualization <- function(recipe, statistics = NULL, probabilities = NULL, significance = F, - output_conf = NULL) { + output_conf = NULL, + logo = NULL) { # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects @@ -96,18 +97,21 @@ Visualization <- function(recipe, if (is.logical(significance)) { plot_metrics(recipe = recipe, data_cube = data$hcst, metrics = skill_metrics, outdir = outdir, - significance = significance, output_conf = output_conf) + significance = significance, output_conf = output_conf, + logo = logo) info(recipe$Run$logger, paste("Skill metrics significance set as", significance)) } else { if (significance %in% c('both', 'dots')) { plot_metrics(recipe, data$hcst, skill_metrics, outdir, - significance = 'dots', output_conf = output_conf) + significance = 'dots', output_conf = output_conf, + logo = logo) info(recipe$Run$logger, "Skill metrics significance as dots") } if (significance %in% c('both', 'mask')) { plot_metrics(recipe, data$hcst, skill_metrics, outdir, - significance = 'mask', output_conf = output_conf) + significance = 'mask', output_conf = output_conf, + logo = log) info(recipe$Run$logger, "Skill metrics significance as mask") } } @@ -122,7 +126,7 @@ Visualization <- function(recipe, if ("statistics" %in% plots) { if (!is.null(statistics)) { plot_metrics(recipe, data$hcst, statistics, outdir, - significance, output_conf = output_conf) + significance, output_conf = output_conf, logo = logo) } else { error(recipe$Run$logger, paste0("The statistics plots have been requested, but the ", @@ -152,7 +156,7 @@ Visualization <- function(recipe, plot_ensemble_mean(recipe, data$fcst, outdir, mask = NULL, dots = NULL, output_conf = output_conf, - method = method) + method = method, logo = logo) } # Plots with masked if (recipe$Analysis$Workflow$Visualization$mask_ens %in% @@ -170,7 +174,7 @@ Visualization <- function(recipe, mask = skill_metrics$enscorr, dots = NULL, outdir, output_conf = output_conf, - method = method) + method = method, logo = logo) } } # Plots with dotted negative correlated in ens-mean-fcst @@ -188,7 +192,7 @@ Visualization <- function(recipe, mask = NULL, dots = skill_metrics$enscorr, outdir, output_conf = output_conf, - method = method) + method = method, logo = logo) } } @@ -217,7 +221,8 @@ Visualization <- function(recipe, probabilities, mask = NULL, dots = NULL, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + logo = logo) } # Plots with masked terciles if (recipe$Analysis$Workflow$Visualization$mask_terciles %in% @@ -235,7 +240,8 @@ Visualization <- function(recipe, probabilities, mask = skill_metrics$rpss, dots = NULL, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + logo = logo) } } # Plots with dotted terciles @@ -253,7 +259,8 @@ Visualization <- function(recipe, probabilities, mask = NULL, dots = skill_metrics$rpss, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + logo = logo) } } } else { @@ -264,21 +271,21 @@ Visualization <- function(recipe, } # Convert plots to user-chosen format - if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && - tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { - extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) - plot_files <- list.files(path = recipe$Run$output_dir, pattern = "\\.pdf$", - recursive = TRUE, full.names = TRUE) - for (file in plot_files) { - system_command <- paste("convert -density 300", file, - "-resize 40% -alpha remove", - paste0(tools::file_path_sans_ext(file), ".", - extension)) - system(system_command) - } - unlink(plot_files) - info(recipe$Run$logger, - paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) - } +# if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && +# tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { +# extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) +# plot_files <- list.files(path = recipe$Run$output_dir, pattern = "\\.pdf$", +# recursive = TRUE, full.names = TRUE) +# for (file in plot_files) { +# system_command <- paste("convert -density 300", file, +# "-resize 40% -alpha remove", +# paste0(tools::file_path_sans_ext(file), ".", +# extension)) +# system(system_command) +# } +# unlink(plot_files) +# info(recipe$Run$logger, +# paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) +# } } diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index c4a054f0..71452489 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -113,11 +113,11 @@ region: #units inches height: 5 cylindrical_equidistant: most_likely_terciles: - width: 10 - height: 8 + width: 12 + height: 9 dot_size: 2 - col_mask: 'cornsilk3' - plot_margin: !expr c(0, 4.1, 4.1, 2.1) + col_mask: 'white' + plot_margin: !expr c(0, 4.1, 5.5, 2.1) Kuwait: cylindrical_equidistant: skill_metrics: diff --git a/recipe_ecvs_cal_mul_seas.yml b/recipe_ecvs_cal_mul_seas.yml index 89e781f8..a4576dc1 100644 --- a/recipe_ecvs_cal_mul_seas.yml +++ b/recipe_ecvs_cal_mul_seas.yml @@ -12,9 +12,9 @@ Analysis: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} - #- {name: 'UK-MetOffice-Glosea603'} + - {name: 'UK-MetOffice-Glosea603'} ##- {name: 'NCEP-CFSv2'} - #- {name: 'DWD-GCFS2.1'} + - {name: 'DWD-GCFS2.1'} #- {name: 'ECCC-GEM5.2-NEMO'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: @@ -24,8 +24,8 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '1201' - fcst_year: '2024' + sdate: '0101' + fcst_year: '2025' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months diff --git a/recipe_ecvs_cal_seas.yml b/recipe_ecvs_cal_seas.yml index dd7cae44..3417d2d9 100644 --- a/recipe_ecvs_cal_seas.yml +++ b/recipe_ecvs_cal_seas.yml @@ -24,8 +24,8 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '1201' - fcst_year: '2024' + sdate: '0101' + fcst_year: '2025' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months @@ -82,7 +82,7 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 30 # Optional, int: number of cores, defaults to 1 + ncores: 16 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE alpha: 0.05 Output_format: scorecards diff --git a/recipe_ecvs_cal_subseas.yml b/recipe_ecvs_cal_subseas.yml new file mode 100644 index 00000000..01b60087 --- /dev/null +++ b/recipe_ecvs_cal_subseas.yml @@ -0,0 +1,128 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'weekly_mean', units: 'C'} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20250109 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20250109 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + #- {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: evmos # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_subseasonal_ecvs.yml b/recipe_subseasonal_ecvs.yml index d46c6a8a..2be485a0 100644 --- a/recipe_subseasonal_ecvs.yml +++ b/recipe_subseasonal_ecvs.yml @@ -72,7 +72,7 @@ Analysis: method: evmos # Calibration method. (Mandatory, str) save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) Skill: - metric: mean_bias enscorr rpss # List of skill metrics separated by spaces or commas. (Mandatory, str) + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) save: 'none' # Options: 'all', 'none' (Mandatory, str) Statistics: metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) diff --git a/tools/add_logo.R b/tools/add_logo.R index 2a44275f..92259a2d 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -1,34 +1,18 @@ -add_logo <- function(recipe, logo, logo_resize_percentage = 0.25) { - # recipe: SUNSET recipe +add_logo <- function(file, logo, logo_resize_percentage = 0.25) { + # file # logo: URL to the logo - system <- list.files(paste0(recipe$Run$output_dir, "/plots/")) - - files <- lapply(system, function(x) { - f <- list.files(paste0(recipe$Run$output_dir, "/plots/", - system, "/"), - recursive = TRUE, - full.names = TRUE) - })[[1]] - dim(files) <- c(file = length(files)) - - Apply(list(files), target_dims = NULL, - fun = function(x, logo, logo_resize_percentage) { - fig_width <- as.numeric(system(paste("identify -format '%w'", x), + fig_width <- as.numeric(system(paste("identify -format '%w'", file), intern = TRUE)) - logo_height <- as.numeric(system(paste("convert", logo, + logo_height <- as.numeric(system(paste("convert", logo, "-resize", fig_width * 0.1, "-format '%h' info:", sep=" "), intern = TRUE)) - system(paste0("convert ", x, - " -gravity south -background white -splice 0x", - logo_height, " extended_fig.png")) - system(paste0("convert extended_fig.png \\( ", - logo, " -resize ", fig_width * logo_resize_percentage, - " \\) -gravity southeast -composite ", - x)) - }, - logo = logo, - logo_resize_percentage = logo_resize_percentage, - ncores = 1) + system(paste0("convert ", file, + " -gravity south -background white -splice 0x", + logo_height, " extended_fig.png")) + system(paste0("convert extended_fig.png \\( ", + logo, " -resize ", fig_width * logo_resize_percentage, + " \\) -gravity southeast -composite ", + file)) file.remove("extended_fig.png") } -- GitLab From 4f00464484c2ef6312d938e6b83d04967ee096ce Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 15 Jan 2025 12:43:01 +0100 Subject: [PATCH 61/78] Use .drop_dims() in Crossval_anomalies() and save probabilities --- modules/Crossval/Crossval_anomalies.R | 51 +++++++++++---------------- modules/Saving/R/drop_dims.R | 12 ++++--- modules/Saving/R/save_probabilities.R | 1 - 3 files changed, 28 insertions(+), 36 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 606368b7..74a1e900 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -171,13 +171,14 @@ Crossval_anomalies <- function(recipe, data) { ncores = ncores) tmp_lims2 <- list() - for(ps in 1:length(categories)) { - tmp_lims3 <- drop(lims[[ps]]) + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- tmp_lims3[l,,,] - if (!('var' %in% names(dim(tmp_lims)))) { - dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) - } + ## TODO: Use Subset + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l) tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) } @@ -248,6 +249,7 @@ Crossval_anomalies <- function(recipe, data) { probs_obs <- list() all_names <- NULL + ## TODO: Add .drop_dims() here for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { @@ -259,45 +261,34 @@ Crossval_anomalies <- function(recipe, data) { "_to_", categories[[ps]][perc]) } probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_fcst) } all_names <- c(all_names, name_elem) } } names(probs_hcst) <- all_names - if (!('var' %in% names(dim(probs_hcst[[1]])))) { - probs_hcst <- lapply(probs_hcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } + probs_hcst <- lapply(probs_hcst, .drop_dims) names(probs_obs) <- all_names - if (!('var' %in% names(dim(probs_obs[[1]])))) { - probs_obs <- lapply(probs_obs, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - + probs_obs <- lapply(probs_obs, .drop_dims) if (!is.null(data$fcst)) { names(probs_fcst) <- all_names - if (!('var' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - if (!('syear' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(syear = 1, dim(x)) - return(x)}) - } + probs_fcst <- lapply(probs_fcst, .drop_dims) } + if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = probs_hcst, diff --git a/modules/Saving/R/drop_dims.R b/modules/Saving/R/drop_dims.R index 7361faa8..be68a333 100644 --- a/modules/Saving/R/drop_dims.R +++ b/modules/Saving/R/drop_dims.R @@ -1,8 +1,14 @@ # version victoria https://earth.bsc.es/gitlab/es/auto-s2s/-/blob/dev-Loading-multivar/modules/Skill/Skill.R +# This function takes an array and drops dimensions of length = 1 that are not +# needed by the saving or plotting functions, aka 'droppable_dims'. +# The essential dimensions are: 'var', 'bin', 'time', and 'latitude'/'longitude' +# or 'region'. +# The 'droppable' dimensions are: 'dat', 'sday', 'sweek', 'ensemble', 'nobs', +# 'nexp, 'exp_memb', 'obs_memb'. .drop_dims <- function(metric_array) { # Define dimensions that are not essential for saving droppable_dims <- c("dat", "sday", "sweek", "ensemble", "nobs", - "nexp", "exp_memb", "obs_memb", "bin") + "nexp", "exp_memb", "obs_memb") # , "bin") # Select non-essential dimensions of length 1 dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), droppable_dims) @@ -18,10 +24,6 @@ return(metric_array) } - - - - ## TODO: Replace with ClimProjDiags::Subset and add var and dat dimensions #.drop_dims <- function(metric_array) { # # Drop all singleton dimensions diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index c6d5f222..e283c8fd 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -59,7 +59,6 @@ save_probabilities <- function(recipe, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { subset_probs <- lapply(probs, function(x) { -- GitLab From 88b53fc51d9793f02b4d097d9727e6586260f60e Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 15 Jan 2025 15:25:55 +0100 Subject: [PATCH 62/78] Fix dimensions and add TODO --- modules/Crossval/Crossval_anomalies.R | 13 +++++++------ modules/Crossval/Crossval_metrics.R | 7 +++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 74a1e900..0366ad55 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -280,6 +280,8 @@ Crossval_anomalies <- function(recipe, data) { all_names <- c(all_names, name_elem) } } + ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move + ## reorganizing and renaming to save_probabilities() names(probs_hcst) <- all_names probs_hcst <- lapply(probs_hcst, .drop_dims) names(probs_obs) <- all_names @@ -297,12 +299,11 @@ Crossval_anomalies <- function(recipe, data) { save_probabilities(recipe = recipe, probs = probs_obs, data_cube = data$hcst, agg = agg, type = "obs") - # TODO Forecast -# if (!is.null(probs_fcst)) { -# save_probabilities(recipe = recipe, probs = probs_fcst, -# data_cube = data$fcst, agg = agg, -# type = "fcst") -# } + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } } # Save ensemble mean for multimodel option: hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 605e1f2b..db24913f 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -276,6 +276,9 @@ Crossval_metrics <- function(recipe, data_crossval, return(res) }) # Save metrics + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) + if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { save_metrics(recipe = recipe, metrics = skill_metrics, @@ -283,10 +286,6 @@ Crossval_metrics <- function(recipe, data_crossval, outdir = recipe$Run$output_dir) } recipe$Run$output_dir <- original - # reduce dimension to work with Visualization module: - skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) - skill_metrics <- lapply(skill_metrics, function(x){ - InsertDim(x, pos = 1, len = 1, name = 'var')}) return(skill_metrics) } -- GitLab From 4bde6b50d1b47b0fb7cbdf75176f5b8030c01708 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 15 Jan 2025 16:46:51 +0100 Subject: [PATCH 63/78] Adjust probability dimensions in Crossval_Calibration --- modules/Crossval/Crossval_Calibration.R | 100 +++++++++---------- recipe_bigpredidata_oper_subseasonal_tas.yml | 83 +++++++++++++++ 2 files changed, 129 insertions(+), 54 deletions(-) create mode 100644 recipe_bigpredidata_oper_subseasonal_tas.yml diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 5051f12f..de8460b5 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -277,17 +277,17 @@ Crossval_Calibration <- function(recipe, data) { prob_lims = categories, ncores = ncores) tmp_lims2 <- list() -# What to save hcst category limits or obs category limits? -# TODO saving: -#recipe$Analysis$Workflow$Probabilities$save <- FALSE -if (recipe$Analysis$Workflow$Probabilities$save == 'all') { - for(ps in 1:length(categories)) { - tmp_lims3 <- drop(lims[[ps]]) + # What to save hcst category limits or obs category limits? + # TODO saving: + #recipe$Analysis$Workflow$Probabilities$save <- FALSE + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- tmp_lims3[l,,,] - if (!('var' %in% names(dim(tmp_lims)))) { - dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) - } + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l) tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) } @@ -301,24 +301,24 @@ if (recipe$Analysis$Workflow$Probabilities$save == 'all') { } # Compute Probabilities for (ps in 1:length(categories)) { - # Get only the probabilities of the central day in sday - central_day <- (dim(cal_hcst_ev_res)['sday'] + 1)/2 - tmp <- Subset(cal_hcst_ev_res, along = 'sday', indices = central_day) - hcst_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'bin', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = lims_cal_hcst_tr_res[[ps]], - ncores = ncores) - tmp <- Subset(data$obs$data, along = 'sday', indices = central_day) - obs_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'bin', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = lims_obs_tr_res[[ps]], - ncores = ncores) + # Get only the probabilities of the central day in sday + central_day <- (dim(cal_hcst_ev_res)['sday'] + 1)/2 + tmp <- Subset(cal_hcst_ev_res, along = 'sday', indices = central_day) + hcst_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_cal_hcst_tr_res[[ps]], + ncores = ncores) + tmp <- Subset(data$obs$data, along = 'sday', indices = central_day) + obs_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) if (!is.null(data$fcst)) { fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', prob_thresholds = NULL, @@ -355,7 +355,7 @@ if (recipe$Analysis$Workflow$Probabilities$save == 'all') { info(recipe$Run$logger, "#### Calibrated and Probabilities Done #####") # TODO saving: -recipe$Analysis$Workflow$Calibration$save <- FALSE + recipe$Analysis$Workflow$Calibration$save <- FALSE if (recipe$Analysis$Workflow$Calibration$save != FALSE) { info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") # recipe$Run$output_dir <- paste0(recipe$Run$output_dir, @@ -377,6 +377,7 @@ recipe$Analysis$Workflow$Calibration$save <- FALSE probs_obs <- list() all_names <- NULL + ## TODO: Add .drop_dims() here for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { @@ -384,49 +385,40 @@ recipe$Analysis$Workflow$Calibration$save <- FALSE } else if (perc == length(categories[[ps]]) + 1) { name_elem <- paste0("above_", categories[[ps]][perc-1]) } else { - name_elem <- paste0("from_", categories[[ps]][perc-1], + name_elem <- paste0("from_", categories[[ps]][perc-1], "_to_", categories[[ps]][perc]) } probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_fcst) } all_names <- c(all_names, name_elem) } } + ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move + ## reorganizing and renaming to save_probabilities() names(probs_hcst) <- all_names - if (!('var' %in% names(dim(probs_hcst[[1]])))) { - probs_hcst <- lapply(probs_hcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } + probs_hcst <- lapply(probs_hcst, .drop_dims) names(probs_obs) <- all_names - if (!('var' %in% names(dim(probs_obs[[1]])))) { - probs_obs <- lapply(probs_obs, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - + probs_obs <- lapply(probs_obs, .drop_dims) if (!is.null(data$fcst)) { names(probs_fcst) <- all_names - if (!('var' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - if (!('syear' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(syear = 1, dim(x)) - return(x)}) - } + probs_fcst <- lapply(probs_fcst, .drop_dims) } + browser() if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = probs_hcst, diff --git a/recipe_bigpredidata_oper_subseasonal_tas.yml b/recipe_bigpredidata_oper_subseasonal_tas.yml new file mode 100644 index 00000000..322811ff --- /dev/null +++ b/recipe_bigpredidata_oper_subseasonal_tas.yml @@ -0,0 +1,83 @@ + +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: subseasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: weekly_mean, units: C} + # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + # - {name: sfcWind, freq: monthly_mean} + Datasets: + System: + - {name: 'NCEP-CFSv2', member: 'all'} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: 20250102 + fcst_year: '2025' # Optional, int: Forecast year 'YYYY' + hcst_start: '2011' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 4 # Mandatory, int: Last leadtime time step in months + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: bias # Mandatory, str: bias, evmos, mse_min, crps_min, rpc_based + cross_validation: yes + save: all + Skill: + metric: mean_bias rpss + save: none + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. This avoids saving the probs of 10 to 90 + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean + NA_color: white + multi_panel: no + dots: no + mask_terciles: yes + shapefile: /esarchive/scratch/cdelgado/aspect_outputs/casestudy-wine/shp_spain/recintos_provinciales_inspire_peninbal_etrs89/recintos_provinciales_inspire_peninbal_etrs89.shp + file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/bigpredidata_fqa.R # replace with the path to your script + expid: a78g # replace with your EXPID + hpc_user: bsc032413 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 16 + custom_directives: ['#SBATCH --constraint=medmem'] + platform: nord3v2 + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails -- GitLab From 7680000dc6a6fd0a9eda13cbb14497071c6c7ef9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 16 Jan 2025 16:10:03 +0100 Subject: [PATCH 64/78] Bugfix: typo in get_times() for subseasonal case --- modules/Saving/R/get_times.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R index ec36b102..bcb6dfd8 100644 --- a/modules/Saving/R/get_times.R +++ b/modules/Saving/R/get_times.R @@ -17,7 +17,7 @@ switch(fcst.horizon, "seasonal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- leadtimes; ref <- 'hours since '; + "subseasonal" = {time <- leadtimes; ref <- 'hours since '; stdname <- ''}, "decadal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}) -- GitLab From ebdd30506dda7ac390426d47476b5e0ead638e17 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 20 Jan 2025 11:35:06 +0100 Subject: [PATCH 65/78] Bugfix: use correct s2dv_cube --- modules/Crossval/Crossval_Calibration.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index de8460b5..83b92731 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -291,6 +291,7 @@ Crossval_Calibration <- function(recipe, data) { tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) } + ## TODO: Fix logic if (recipe$Analysis$Workflow$Probabilities$save == 'yes') { save_percentiles(recipe = recipe, percentiles = tmp_lims2, data_cube = data$obs, @@ -418,16 +419,15 @@ Crossval_Calibration <- function(recipe, data) { names(probs_fcst) <- all_names probs_fcst <- lapply(probs_fcst, .drop_dims) } - browser() if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = probs_hcst, data_cube = data$hcst, agg = agg, type = "hcst") save_probabilities(recipe = recipe, probs = probs_obs, - data_cube = data$hcst, agg = agg, + data_cube = data$obs, agg = agg, type = "obs") - save_probabilities(recipe = recipe, probs = probs_fcst, + save_probabilities(recipe = recipe, probs = probs_fcst, data_cube = data$fcst, agg = agg, type = "fcst") } -- GitLab From 53e97564f50ecce393077a1751f0a70b3eb6ac68 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 20 Jan 2025 16:05:45 +0100 Subject: [PATCH 66/78] Bugfix: compute quantiles for 1 category --- modules/Crossval/Crossval_Calibration.R | 18 +++++++++++++----- modules/Crossval/Crossval_anomalies.R | 8 ++++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 83b92731..04138eb9 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -86,10 +86,12 @@ Crossval_Calibration <- function(recipe, data) { apply_to = NULL, alpha = NULL, ncores = ncores) - lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + res <- quantile(as.vector(x), ps, na.rm = na.rm) + dim(res) <- c(bin = length(ps)) + return(res)})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, @@ -97,7 +99,9 @@ Crossval_Calibration <- function(recipe, data) { lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + res <- quantile(as.vector(x), ps, na.rm = na.rm) + dim(res) <- c(bin = length(ps)) + return(res)})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, @@ -265,14 +269,18 @@ Crossval_Calibration <- function(recipe, data) { lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + res <- quantile(as.vector(x), ps, na.rm = na.rm) + dim(res) <- c(bin = length(ps)) + return(res)})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims <- Apply(data$obs$data, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + res <- quantile(as.vector(x), ps, na.rm = na.rm) + dim(res) <- c(bin = length(ps)) + return(res)})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 0366ad55..7303cf98 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -152,7 +152,9 @@ Crossval_anomalies <- function(recipe, data) { lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + res <- quantile(as.vector(x), ps, na.rm = na.rm) + dim(res) <- c(bin = length(ps)) + return(res)})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -165,7 +167,9 @@ Crossval_anomalies <- function(recipe, data) { lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + res <- quantile(as.vector(x), ps, na.rm = na.rm) + dim(res) <- c(bin = length(ps)) + return(res)})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) -- GitLab From 4490b63d43d233abf34c0e5ca8abfe830476347b Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 21 Jan 2025 14:56:04 +0100 Subject: [PATCH 67/78] Adapt percentiles check to the case where only one set of percentiles is requested --- tools/check_recipe.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 9339c7a3..3dd186c7 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -659,10 +659,15 @@ check_recipe <- function(recipe) { "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- TRUE } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, - paste("Parameter 'Probabilities:percentiles' expects a list.", - "See documentation in the wiki for examples.")) - error_status <- TRUE + if (length(recipe$Analysis$Workflow$Probabilities$percentiles) == 1) { + recipe$Analysis$Workflow$Probabilities$percentiles <- + list(recipe$Analysis$Workflow$Probabilities$percentiles) + } else { + error(recipe$Run$logger, + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- TRUE + } } # Saving checks SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") -- GitLab From 3c36db2a1ea606a31ff4eba1538b3b51aea0aea7 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 22 Jan 2025 12:37:59 +0100 Subject: [PATCH 68/78] Adjustsments for subseasonal data saving --- modules/Saving/R/get_filename.R | 34 ++++++++++++++--------------- modules/Saving/R/get_times.R | 10 +++++++++ modules/Saving/R/save_percentiles.R | 21 ++++++++++-------- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index b2345691..c3d30d27 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -17,12 +17,12 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { if (tolower(recipe$Analysis$Horizon) == "decadal") { # to not save the month and day in the filename (needed for the multimodel) - date <- substr(date,1,4) + date <- substr(date, 1, 4) # for the models initialised in January - it may be better to do this in save_* functions archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive exp.name <- recipe$Analysis$Datasets$System$name - if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1){ - date <- as.character(as.numeric(date)-1) + if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1) { + date <- as.character(as.numeric(date) - 1) } } @@ -41,24 +41,24 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { hcst_end <- recipe$Analysis$Time$hcst_end switch(tolower(file.type), - "skill" = {type_info <- "-skill_"}, - "corr" = {type_info <- "-corr_"}, - "exp" = {type_info <- paste0("_", date, "_")}, - "obs" = {type_info <- paste0("-obs_", date, "_")}, - "percentiles" = {type_info <- "-percentiles_"}, - "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}, - "rps_syear" = {type_info <- "rps_syear"}, - "rps_clim_syear" = {type_info <- "rps_clim_syear"}, - "crps_syear" = {type_info <- "crps_syear"}, - "crps_clim_syear" = {type_info <- "crps_clim_syear"}, - "crps" = {type_info <- "crps"}, - "mean_bias" = {type_info <- "mean_bias"}, + "skill" = {type_info <- "-skill"}, + "corr" = {type_info <- "-corr"}, + "exp" = {type_info <- paste0("_", date)}, + "obs" = {type_info <- paste0("-obs_", date)}, + "percentiles" = {type_info <- "-percentiles"}, + "probs" = {type_info <- paste0("-probs_", date)}, + "bias" = {type_info <- paste0("-bias_", date)}, + "rps_syear" = {type_info <- "_rps_syear"}, + "rps_clim_syear" = {type_info <- "_rps_clim_syear"}, + "crps_syear" = {type_info <- "_crps_syear"}, + "crps_clim_syear" = {type_info <- "_crps_clim_syear"}, + "crps" = {type_info <- "_crps"}, + "mean_bias" = {type_info <- "_mean_bias"}, {type_info <- paste0(file.type)}) # Build file name filename <- paste0("scorecards_", system, "_", reference, "_", var, - "_", type_info, "_", hcst_start, "-", hcst_end, + type_info, "_", hcst_start, "-", hcst_end, "_s", shortdate) } else { if (tolower(recipe$Analysis$Horizon) == "decadal") { diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R index bcb6dfd8..c997e6e0 100644 --- a/modules/Saving/R/get_times.R +++ b/modules/Saving/R/get_times.R @@ -10,6 +10,16 @@ # Compute initial date fcst.horizon <- tolower(recipe$Analysis$Horizon) # Generate time dimensions and the corresponding metadata. + ## TODO: This addresses subseasonal case, but does not work well + ## when there is missing data. + if (any(c("sweek", "sday") %in% names(dim(data_cube$attrs$Dates)))) { + central_day <- (dim(data_cube$attrs$Dates)[["sday"]] + 1) / 2 + central_week <- (dim(data_cube$attrs$Dates)[["sweek"]] + 1) / 2 + data_cube$attrs$Dates <- Subset(data_cube$attrs$Dates, + along = c("sday", "sweek"), + indices = list(sday = central_day, + sweek = central_week)) + } dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) leadtimes <- as.numeric(dates - init_date)/3600 diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index d5dfae16..0071eb88 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -5,6 +5,11 @@ save_percentiles <- function(recipe, outdir = NULL) { # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. + + # percentiles <- lapply(percentiles, + # function(x) { + # .drop_dims(x) + # }) archive <- get_archive(recipe) # Define grid dimensions and names lalo <- c('longitude', 'latitude') @@ -35,16 +40,12 @@ save_percentiles <- function(recipe, # Generate vector containing leadtimes if (fcst.horizon == 'decadal') { - # if (global_attributes$system == 'Multimodel') { - # init_month <- 11 #TODO: put as if init_month is January - # } else { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # } - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start) + 1, '-01-01'), cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + substr(recipe$Analysis$Time$sdate, 5, 8)), + format = '%Y%m%d', cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -62,6 +63,8 @@ save_percentiles <- function(recipe, } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } + } else if (fcst.horizon == 'subseasonal') { + fcst.sdate <- as.character(recipe$Analysis$Time$sdate) } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, -- GitLab From d4f2129ba65d45c3b9b21d6e3dad039cb0cbb68d Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 24 Jan 2025 15:48:33 +0100 Subject: [PATCH 69/78] Use CST_MergeDims() instead of MergeDims(); fix subseasonal saving issues, add metadata to metrics --- modules/Crossval/Crossval_Calibration.R | 80 ++++++------ modules/Crossval/Crossval_metrics.R | 28 ++-- modules/Crossval/R/tmp/CST_MergeDims.R | 162 ++++++++++++++++++++++++ modules/Saving/R/get_filename.R | 14 +- modules/Saving/R/get_times.R | 2 +- modules/Saving/R/save_metrics.R | 25 +++- tools/libs.R | 3 + 7 files changed, 248 insertions(+), 66 deletions(-) create mode 100644 modules/Crossval/R/tmp/CST_MergeDims.R diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 04138eb9..ec24fd6d 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -275,39 +275,38 @@ Crossval_Calibration <- function(recipe, data) { output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) - lims <- Apply(data$obs$data, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { - lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, na.rm = na.rm) - dim(res) <- c(bin = length(ps)) - return(res)})}, - output_dims = lapply(categories, function(x) {'bin'}), - prob_lims = categories, - ncores = ncores) + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + res <- quantile(as.vector(x), ps, na.rm = na.rm) + dim(res) <- c(bin = length(ps)) + return(res)})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) tmp_lims2 <- list() - # What to save hcst category limits or obs category limits? - # TODO saving: - #recipe$Analysis$Workflow$Probabilities$save <- FALSE - if (recipe$Analysis$Workflow$Probabilities$save == 'all') { - for (ps in 1:length(categories)) { - ## TODO: use .drop_dims() - tmp_lims3 <- .drop_dims(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, - along = "bin", - indices = l) - tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) - names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) - } - ## TODO: Fix logic - if (recipe$Analysis$Workflow$Probabilities$save == 'yes') { - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) + + # What to save hcst category limits or obs category limits? + # TODO saving: + #recipe$Analysis$Workflow$Probabilities$save <- FALSE + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['bin']) { + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l, + drop = "selected") + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- paste0("p_", as.character(categories[[ps]][l]*100)) + } } + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) } } -} # Compute Probabilities for (ps in 1:length(categories)) { # Get only the probabilities of the central day in sday @@ -390,12 +389,12 @@ Crossval_Calibration <- function(recipe, data) { for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]) + name_elem <- paste0("below_", categories[[ps]][perc]*100) } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]) + name_elem <- paste0("above_", categories[[ps]][perc-1]*100) } else { - name_elem <- paste0("from_", categories[[ps]][perc-1], - "_to_", categories[[ps]][perc]) + name_elem <- paste0("from_", categories[[ps]][perc-1]*100, + "_to_", categories[[ps]][perc]*100) } probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], along = 'bin', @@ -428,13 +427,14 @@ Crossval_Calibration <- function(recipe, data) { probs_fcst <- lapply(probs_fcst, .drop_dims) } if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = probs_hcst, - data_cube = data$hcst, agg = agg, - type = "hcst") - save_probabilities(recipe = recipe, probs = probs_obs, - data_cube = data$obs, agg = agg, - type = "obs") + c('all', 'bins_only')) { + ## Saving only forecast probabilities for now + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$obs, agg = agg, + # type = "obs") save_probabilities(recipe = recipe, probs = probs_fcst, data_cube = data$fcst, agg = agg, type = "fcst") diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index db24913f..6475f478 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -112,21 +112,21 @@ Crossval_metrics <- function(recipe, data_crossval, } if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { # The evaluation of all metrics are done with extra sample - data_crossval$hcst$data <- MergeDims(data_crossval$hcst$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs$data <- MergeDims(data_crossval$obs$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + data_crossval$hcst <- CST_MergeDims(data_crossval$hcst, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs <- CST_MergeDims(data_crossval$obs, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) data_crossval$ref_obs_tr <- MergeDims(data_crossval$ref_obs_tr, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$hcst.full_val$data <- MergeDims(data_crossval$hcst.full_val$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs.full_val$data <- MergeDims(data_crossval$obs.full_val$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$hcst.full_val <- CST_MergeDims(data_crossval$hcst.full_val, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs.full_val <- CST_MergeDims(data_crossval$obs.full_val, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) } if ('crps' %in% requested_metrics) { diff --git a/modules/Crossval/R/tmp/CST_MergeDims.R b/modules/Crossval/R/tmp/CST_MergeDims.R new file mode 100644 index 00000000..bb06bf60 --- /dev/null +++ b/modules/Crossval/R/tmp/CST_MergeDims.R @@ -0,0 +1,162 @@ +#'Function to Merge Dimensions +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function merges two dimensions of the array \code{data} in a +#''s2dv_cube' object into one. The user can select the dimensions to merge and +#'provide the final name of the dimension. The user can select to remove NA +#'values or keep them. +#' +#'@param data An 's2dv_cube' object +#'@param merge_dims A character vector indicating the names of the dimensions to +#' merge. +#'@param rename_dim a character string indicating the name of the output +#' dimension. If left at NULL, the first dimension name provided in parameter +#' \code{merge_dims} will be used. +#'@param na.rm A logical indicating if the NA values should be removed or not. +#' +#'@examples +#'data <- 1 : c(2 * 3 * 4 * 5 * 6 * 7) +#'dim(data) <- c(time = 7, lat = 2, lon = 3, monthly = 4, member = 6, +#' dataset = 5, var = 1) +#'data[2,,,,,,] <- NA +#'data[c(3,27)] <- NA +#'data <- list(data = data) +#'class(data) <- 's2dv_cube' +#'new_data <- CST_MergeDims(data, merge_dims = c('time', 'monthly')) +#'new_data <- CST_MergeDims(data, merge_dims = c('lon', 'lat'), rename_dim = 'grid') +#'new_data <- CST_MergeDims(data, merge_dims = c('time', 'monthly'), na.rm = TRUE) +#'@export +CST_MergeDims <- function(data, merge_dims = c('ftime', 'monthly'), + rename_dim = NULL, na.rm = FALSE) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + if (is.null(rename_dim)) { + rename_dim <- merge_dims[1] + } + # data + data$data <- MergeDims(data$data, merge_dims = merge_dims, + rename_dim = rename_dim, na.rm = na.rm) + # dims + data$dims <- dim(data$data) + + # rename_dim + if (length(rename_dim) > 1) { + rename_dim <- as.character(rename_dim[1]) + } + # coords + data$coords[merge_dims] <- NULL + data$coords[[rename_dim]] <- 1:dim(data$data)[rename_dim] + attr(data$coords[[rename_dim]], 'indices') <- TRUE + + # attrs + if (all(merge_dims %in% names(dim(data$attrs$Dates)))) { + original_timezone <- attr(data$attrs$Dates[1], "tzone") + data$attrs$Dates <- MergeDims(data$attrs$Dates, merge_dims = merge_dims, + rename_dim = rename_dim, na.rm = na.rm) + # Transform dates back to POSIXct + data$attrs$Dates <- as.POSIXct(data$attrs$Dates, + origin = "1970-01-01", + tz = original_timezone) + + } else if (any(merge_dims %in% names(dim(data$attrs$Dates)))) { + warning("The dimensions of 'Dates' array will be different from ", + "the temporal dimensions in 'data'. Parameter 'merge_dims' ", + "only includes one temporal dimension of 'Dates'.") + } + return(data) +} +#'Function to Split Dimension +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function merges two dimensions of an array into one. The +#'user can select the dimensions to merge and provide the final name of the +#'dimension. The user can select to remove NA values or keep them. +#' +#'@param data An n-dimensional array with named dimensions +#'@param merge_dims A character vector indicating the names of the dimensions to +#' merge. +#'@param rename_dim A character string indicating the name of the output +#' dimension. If left at NULL, the first dimension name provided in parameter +#' \code{merge_dims} will be used. +#'@param na.rm A logical indicating if the NA values should be removed or not. +#' +#'@examples +#'data <- 1 : 20 +#'dim(data) <- c(time = 10, lat = 2) +#'new_data <- MergeDims(data, merge_dims = c('time', 'lat')) +#'@import abind +#'@importFrom ClimProjDiags Subset +#'@export +MergeDims <- function(data, merge_dims = c('time', 'monthly'), + rename_dim = NULL, na.rm = FALSE) { + # check data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (is.null(dim(data))) { + stop("Parameter 'data' must have dimensions.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + dims <- dim(data) + # check merge_dims + if (is.null(merge_dims)) { + stop("Parameter 'merge_dims' cannot be NULL.") + } + if (!is.character(merge_dims)) { + stop("Parameter 'merge_dims' must be a character vector ", + "indicating the names of the dimensions to be merged.") + } + if (length(merge_dims) > 2) { + warning("Only two dimensions can be merge, only the first two ", + "dimension will be used. To merge further dimensions ", + "consider to use this function multiple times.") + merge_dims <- merge_dims[1 : 2] + } else if (length(merge_dims) < 2) { + stop("Parameter 'merge_dims' must be of length two.") + } + if (is.null(rename_dim)) { + rename_dim <- merge_dims[1] + } + if (length(rename_dim) > 1) { + warning("Parameter 'rename_dim' has length greater than 1 ", + "and only the first element will be used.") + rename_dim <- as.character(rename_dim[1]) + } + if (!any(names(dims) %in% merge_dims)) { + stop("Parameter 'merge_dims' must match with dimension ", + "names in parameter 'data'.") + } + pos1 <- which(names(dims) == merge_dims[1]) + pos2 <- which(names(dims) == merge_dims[2]) + if (length(pos1) == 0 | length(pos2) == 0) { + stop("Parameter 'merge_dims' must match with dimension ", + "names in parameter 'data'.") + } + if (pos1 > pos2) { + pos1 <- pos1 - 1 + } + data <- lapply(1:dims[pos2], function(x) {Subset(data, along = pos2, + indices = x, drop = 'selected')}) + data <- abind(data, along = pos1) + names(dim(data)) <- names(dims)[-pos2] + if (!is.null(rename_dim)) { + names(dim(data))[pos1] <- rename_dim + } + if (na.rm) { + nas <- which(is.na(Subset(data, along = -pos1, indices = 1))) + if (length(nas) != 0) { + nas <- unlist(lapply(nas, function(x) { + if(all(is.na(Subset(data, along = pos1, + indices = x)))) { + return(x)}})) + data <- Subset(data, along = pos1, indices = -nas) + } + } + return(data) +} diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index c3d30d27..2766c17a 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -48,13 +48,13 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "percentiles" = {type_info <- "-percentiles"}, "probs" = {type_info <- paste0("-probs_", date)}, "bias" = {type_info <- paste0("-bias_", date)}, - "rps_syear" = {type_info <- "_rps_syear"}, - "rps_clim_syear" = {type_info <- "_rps_clim_syear"}, - "crps_syear" = {type_info <- "_crps_syear"}, - "crps_clim_syear" = {type_info <- "_crps_clim_syear"}, - "crps" = {type_info <- "_crps"}, - "mean_bias" = {type_info <- "_mean_bias"}, - {type_info <- paste0(file.type)}) + "rps_syear" = {type_info <- "-rps_syear"}, + "rps_clim_syear" = {type_info <- "-rps_clim_syear"}, + "crps_syear" = {type_info <- "-crps_syear"}, + "crps_clim_syear" = {type_info <- "-crps_clim_syear"}, + "crps" = {type_info <- "-crps"}, + "mean_bias" = {type_info <- "-mean_bias"}, + {type_info <- paste0("-", file.type)}) # Build file name filename <- paste0("scorecards_", system, "_", reference, "_", var, diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R index c997e6e0..81069234 100644 --- a/modules/Saving/R/get_times.R +++ b/modules/Saving/R/get_times.R @@ -12,7 +12,7 @@ # Generate time dimensions and the corresponding metadata. ## TODO: This addresses subseasonal case, but does not work well ## when there is missing data. - if (any(c("sweek", "sday") %in% names(dim(data_cube$attrs$Dates)))) { + if (all(c("sweek", "sday") %in% names(dim(data_cube$attrs$Dates)))) { central_day <- (dim(data_cube$attrs$Dates)[["sday"]] + 1) / 2 central_week <- (dim(data_cube$attrs$Dates)[["sweek"]] + 1) / 2 data_cube$attrs$Dates <- Subset(data_cube$attrs$Dates, diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index db7ceeca..d0b8dc4f 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -5,8 +5,14 @@ save_metrics <- function(recipe, agg = "global", outdir = NULL, module = "skill") { + # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. + + # Sanity checks + if (!is.list(metrics) || is.null(names(metrics))) { + stop("'metrics' should be a named list.") + } # Define grid dimensions and names lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) @@ -36,12 +42,12 @@ save_metrics <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar } if (fcst.horizon == 'decadal') { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, '-01-01'), cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + substr(recipe$Analysis$Time$sdate, 5, 8)), + format = '%Y%m%d', cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -56,6 +62,8 @@ save_metrics <- function(recipe, } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } + } else if (fcst.horizon == 'subseasonal') { + fcst.sdate <- as.character(recipe$Analysis$Time$sdate) } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, @@ -95,6 +103,15 @@ save_metrics <- function(recipe, }) } } + metric <- names(subset_metric[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val + metadata <- list(metric = list(name = metric, + long_name = long_name, + missing_value = missing_val)) + data_cube$attrs$Variable$metadata[metric] <- metadata + attr(subset_metric[[i]], 'variables') <- metadata ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code extra_string <- get_filename(NULL, recipe, variable, diff --git a/tools/libs.R b/tools/libs.R index 40146786..5973f219 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -37,6 +37,9 @@ source("tools/get_archive.R") source("tools/Utils.R") source("tools/restructure_recipe.R") # source("tools/add_dims.R") # Not sure if necessary yet +## To be removed after next release of CSTools: +source("modules/Crossval/R/tmp/CST_MergeDims.R") + # Settings options(bitmapType = 'cairo') -- GitLab From c0d3fabc3b4ff208959c441a2b578718e0017423 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Mon, 27 Jan 2025 14:36:10 +0100 Subject: [PATCH 70/78] test calibration --- conf/archive_seasonal.yml | 14 ++ conf/grid_description/griddes_ncep-cfsv2.txt | 58 +++++--- conf/grid_description/griddes_ukmo603.txt | 22 +++ full_ecvs_calibration.R | 14 +- full_ecvs_multimodel_calibrated.R | 2 +- modules/Crossval/Crossval_Calibration.R | 8 +- .../Units/R/transform_units_precipitation.R | 2 + recipe_ecvs_ano_mul_seas.yml | 2 +- recipe_ecvs_cal_mul_seas.yml | 8 +- recipe_ecvs_cal_subseas.yml | 17 +-- recipe_prlr_cal_subseas.yml | 129 ++++++++++++++++++ recipe_tas_singl_cal_seas.yml | 8 +- sunset.sh | 4 +- 13 files changed, 245 insertions(+), 43 deletions(-) create mode 100644 conf/grid_description/griddes_ukmo603.txt create mode 100644 recipe_prlr_cal_subseas.yml diff --git a/conf/archive_seasonal.yml b/conf/archive_seasonal.yml index 6324871f..9a8bc54a 100644 --- a/conf/archive_seasonal.yml +++ b/conf/archive_seasonal.yml @@ -77,6 +77,20 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" + UK-MetOffice-Glosea603: + name: "UK MetOffice GloSea 6 (v6.03)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system603-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo603.txt" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? diff --git a/conf/grid_description/griddes_ncep-cfsv2.txt b/conf/grid_description/griddes_ncep-cfsv2.txt index 6d8abe86..320b2342 100644 --- a/conf/grid_description/griddes_ncep-cfsv2.txt +++ b/conf/grid_description/griddes_ncep-cfsv2.txt @@ -1,18 +1,44 @@ # -# Grid description file for NCEP CFSv2 -# -gridtype = lonlat -gridsize = 64800 -xname = lon -xlongname = Longitude -xunits = degrees_east -yname = lat -ylongname = Latitude -yunits = degrees_north -xsize = 360 -ysize = 180 -xfirst = 0.5 -xinc = 1 -yfirst = 89.5 -yinc = -1 +# gridID 1 # +gridtype = gaussian +gridsize = 72960 +datatype = float +xsize = 384 +ysize = 190 +xname = longitude +xlongname = "longitude" +xunits = "degrees_east" +yname = latitude +ylongname = "latitude" +yunits = "degrees_north" +numLPE = 95 +xfirst = 0 +xinc = 0.938 +yvals = 89.27671 88.33975 87.39726 86.45352 85.5093 84.56486 83.62029 82.67563 + 81.73093 80.78619 79.84142 78.89663 77.95183 77.00702 76.06219 75.11737 + 74.17253 73.22769 72.28284 71.338 70.39315 69.4483 68.50343 67.55858 + 66.61372 65.66885 64.72399 63.77913 62.83426 61.88939 60.94453 59.99966 + 59.05479 58.10992 57.16505 56.22018 55.27531 54.33043 53.38556 52.44069 + 51.49582 50.55094 49.60607 48.66119 47.71632 46.77144 45.82657 44.88169 + 43.93682 42.99194 42.04707 41.10219 40.15731 39.21244 38.26756 37.32269 + 36.37781 35.43293 34.48805 33.54317 32.5983 31.65342 30.70854 29.76367 + 28.81879 27.87391 26.92903 25.98416 25.03928 24.0944 23.14952 22.20464 + 21.25977 20.31489 19.37001 18.42513 17.48025 16.53537 15.5905 14.64562 + 13.70074 12.75586 11.81098 10.8661 9.921226 8.976347 8.031468 7.08659 + 6.141711 5.196833 4.251954 3.307075 2.362197 1.417318 0.4724393 -0.4724393 + -1.417318 -2.362197 -3.307075 -4.251954 -5.196833 -6.141711 -7.08659 + -8.031468 -8.976347 -9.921226 -10.8661 -11.81098 -12.75586 -13.70074 + -14.64562 -15.5905 -16.53537 -17.48025 -18.42513 -19.37001 -20.31489 + -21.25977 -22.20464 -23.14952 -24.0944 -25.03928 -25.98416 -26.92903 + -27.87391 -28.81879 -29.76367 -30.70854 -31.65342 -32.5983 -33.54317 + -34.48805 -35.43293 -36.37781 -37.32269 -38.26756 -39.21244 -40.15731 + -41.10219 -42.04707 -42.99194 -43.93682 -44.88169 -45.82657 -46.77144 + -47.71632 -48.66119 -49.60607 -50.55094 -51.49582 -52.44069 -53.38556 + -54.33043 -55.27531 -56.22018 -57.16505 -58.10992 -59.05479 -59.99966 + -60.94453 -61.88939 -62.83426 -63.77913 -64.72399 -65.66885 -66.61372 + -67.55858 -68.50343 -69.4483 -70.39315 -71.338 -72.28284 -73.22769 -74.17253 + -75.11737 -76.06219 -77.00702 -77.95183 -78.89663 -79.84142 -80.78619 + -81.73093 -82.67563 -83.62029 -84.56486 -85.5093 -86.45352 -87.39726 + -88.33975 -89.27671 + diff --git a/conf/grid_description/griddes_ukmo603.txt b/conf/grid_description/griddes_ukmo603.txt new file mode 100644 index 00000000..26b33631 --- /dev/null +++ b/conf/grid_description/griddes_ukmo603.txt @@ -0,0 +1,22 @@ +# gridID 1 +# +gridtype = generic +gridsize = 1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R index 5409d41f..3b521f22 100644 --- a/full_ecvs_calibration.R +++ b/full_ecvs_calibration.R @@ -8,6 +8,7 @@ args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] #recipe_file <- "recipe_tas_singl_cal_seas.yml" #recipe_file <- "recipe_subseasonal_ecvs.yml" +#recipe_file <- "recipe_prlr_cal_subseas.yml" recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) @@ -51,11 +52,18 @@ if (!is.null(data$fcst)) { Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs, - logo = "tools/BSC_logo_95.jpg") + logo = NULL) + #logo = "tools/BSC_logo_95.jpg") } else { Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, log = "tools/BSC_logo_95.jpg") } - - +saveRDS(object = list(product = res, + metrics = skill_metrics, + fcst_probs = tmp_probs), + file = paste0(recipe$Run$output_dir, "/outputs/", + recipe$Analysis$Datasets$System$name, "_", + recipe$Analysis$Variables$name, "_", + recipe$Analysis$Time$fcst_year, ".RDS")) +recipe$Run$output_dir diff --git a/full_ecvs_multimodel_calibrated.R b/full_ecvs_multimodel_calibrated.R index 21d2f9d3..2a3c8854 100644 --- a/full_ecvs_multimodel_calibrated.R +++ b/full_ecvs_multimodel_calibrated.R @@ -29,7 +29,7 @@ for (sys in models) { # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) data <- Units(recipe = recipe_aux, data = data) - if (recipe$Analysis$Workflow$Time_aggregation$execute) { + if (recipe_aux$Analysis$Workflow$Time_aggregation$execute) { data <- Aggregation(recipe = recipe_aux, data = data) } # verification individual models diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 5051f12f..cfcaa197 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -89,7 +89,7 @@ Crossval_Calibration <- function(recipe, data) { lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, @@ -97,7 +97,7 @@ Crossval_Calibration <- function(recipe, data) { lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, @@ -265,14 +265,14 @@ Crossval_Calibration <- function(recipe, data) { lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims <- Apply(data$obs$data, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R index d0dd7ffd..987b5e7c 100644 --- a/modules/Units/R/transform_units_precipitation.R +++ b/modules/Units/R/transform_units_precipitation.R @@ -83,6 +83,8 @@ transform_units_precipitation <- function(data, original_units, new_units, num_days <- .days_in_month(date, cal = .cal) res <- x * num_days }, ncores = ncores)$output1 + } else if (freq == "weekly_mean") { + data_list[[var_index]] <- data_list[[var_index]] * 7 } } diff --git a/recipe_ecvs_ano_mul_seas.yml b/recipe_ecvs_ano_mul_seas.yml index 5be9fce8..3df9d133 100644 --- a/recipe_ecvs_ano_mul_seas.yml +++ b/recipe_ecvs_ano_mul_seas.yml @@ -15,7 +15,7 @@ Analysis: - {name: 'UK-MetOffice-Glosea603'} ##- {name: 'NCEP-CFSv2'} - {name: 'DWD-GCFS2.1'} - - {name: 'ECCC-GEM5.2-NEMO'} + #- {name: 'ECCC-GEM5.2-NEMO'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: no diff --git a/recipe_ecvs_cal_mul_seas.yml b/recipe_ecvs_cal_mul_seas.yml index a4576dc1..c70b883b 100644 --- a/recipe_ecvs_cal_mul_seas.yml +++ b/recipe_ecvs_cal_mul_seas.yml @@ -5,16 +5,16 @@ Description: Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - - {name: 'tas', freq: 'monthly_mean', units: 'C'} - #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + #- {name: 'tas', freq: 'monthly_mean', units: 'C'} + - {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} Datasets: System: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} - - {name: 'UK-MetOffice-Glosea603'} + #- {name: 'UK-MetOffice-Glosea603'} ##- {name: 'NCEP-CFSv2'} - - {name: 'DWD-GCFS2.1'} + #- {name: 'DWD-GCFS2.1'} #- {name: 'ECCC-GEM5.2-NEMO'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: diff --git a/recipe_ecvs_cal_subseas.yml b/recipe_ecvs_cal_subseas.yml index 01b60087..51610f2b 100644 --- a/recipe_ecvs_cal_subseas.yml +++ b/recipe_ecvs_cal_subseas.yml @@ -9,7 +9,8 @@ Analysis: # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) # units: desired data units for each variable. Only available for temperature, # precipitation, and pressure variables. - - {name: 'tas', freq: 'weekly_mean', units: 'C'} + #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: yes} # name: 'tas' # freq: 'weekly_mean' # units: 'C' @@ -35,13 +36,13 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: 20250109 #%Y%m%d # Cambiar a 2023 + sdate: 20250123 #%Y%m%d # Cambiar a 2023 #- '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' # - '0201' # ... - fcst_year: 20250109 # Forecast initialization year 'YYYY' (Optional, int) + fcst_year: 20250123 # Forecast initialization year 'YYYY' (Optional, int) hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) @@ -61,7 +62,7 @@ Analysis: # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} Regrid: - method: bilinear # Interpolation method (Mandatory, str) + method: conservative # Interpolation method (Mandatory, str) type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', # or CDO-accepted grid. (Mandatory, str) Workflow: @@ -73,16 +74,16 @@ Analysis: save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) Skill: metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) - save: 'all' # Options: 'all', 'none' (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) Statistics: metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) - save: 'all' # Options: 'all', 'none' (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) Probabilities: # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds percentiles: [[1/3, 2/3]] # for quantiles and probability categories. Each set of thresholds should be # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) - save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) Visualization: plots: skill_metrics forecast_ensemble_mean most_likely_terciles multi_panel: no @@ -110,7 +111,7 @@ Analysis: col1_width: NULL # Optional, int: to adjust width of first column in scorecards table col2_width: NULL # Optional, int: to adjust width of second column in scorecards table calculate_diff: False # Mandatory, bool: True/False - ncores: 16 # Number of cores to be used in parallel computation. + ncores: 32 # Number of cores to be used in parallel computation. # If left empty, defaults to 1. (Optional, int) remove_NAs: yes # Whether to remove NAs. # If left empty, defaults to no/false. (Optional, bool) diff --git a/recipe_prlr_cal_subseas.yml b/recipe_prlr_cal_subseas.yml new file mode 100644 index 00000000..55f0516e --- /dev/null +++ b/recipe_prlr_cal_subseas.yml @@ -0,0 +1,129 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: FALSE} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20241024 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20241024 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + #- {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + #- {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: "bias" #evmos # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3], [1/10], [9/10]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'none' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 32 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_tas_singl_cal_seas.yml b/recipe_tas_singl_cal_seas.yml index d649e082..3605b43c 100644 --- a/recipe_tas_singl_cal_seas.yml +++ b/recipe_tas_singl_cal_seas.yml @@ -26,8 +26,8 @@ Analysis: ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 3 # Mandatory, int: Last leadtime time step in months Region: - #- {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} - - {name: Global, latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + - {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + #- {name: Global, latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" @@ -51,8 +51,8 @@ Analysis: save: 'all' cross_validation: yes Probabilities: - percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. - save: all + percentiles: [[1/3, 2/3], [1/10], [9/10]] + save: none Indicators: index: no Visualization: diff --git a/sunset.sh b/sunset.sh index bd6f1d55..8d29b8c3 100644 --- a/sunset.sh +++ b/sunset.sh @@ -16,8 +16,8 @@ conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 #Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml -#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_mul_seas.yml -Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_mul_seas.yml +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_mul_seas.yml -- GitLab From dfd3c9625405446f7c4514df8ce11f83c19321be Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 27 Jan 2025 15:39:44 +0100 Subject: [PATCH 71/78] Add as.array() to quantile() computation to avoid issue with 'bin' dimension --- modules/Crossval/Crossval_Calibration.R | 25 +++++++++++++------------ modules/Crossval/Crossval_anomalies.R | 24 ++++++++---------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index ec24fd6d..86bc565f 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -1,4 +1,9 @@ # take the output of Flor/s2s/subseasonal_loading.R +nteractive mode + +Edit inline +View file @ d4f2129 + source("modules/Crossval/R/tmp/GetProbs.R") Crossval_Calibration <- function(recipe, data) { @@ -89,9 +94,8 @@ Crossval_Calibration <- function(recipe, data) { lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, na.rm = na.rm) - dim(res) <- c(bin = length(ps)) - return(res)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, @@ -99,9 +103,8 @@ Crossval_Calibration <- function(recipe, data) { lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, na.rm = na.rm) - dim(res) <- c(bin = length(ps)) - return(res)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, @@ -269,18 +272,16 @@ Crossval_Calibration <- function(recipe, data) { lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, na.rm = na.rm) - dim(res) <- c(bin = length(ps)) - return(res)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) lims <- Apply(obs, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, na.rm = na.rm) - dim(res) <- c(bin = length(ps)) - return(res)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 7303cf98..a08efc31 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -82,21 +82,15 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), - ps, na.rm = na.rm) - dim(res) <- c(bin = length(res)) - return(res) - })}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, prob_lims = categories, ncores = ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, - na.rm = na.rm) - dim(res) <- c(bin = length(res)) - return(res) - })}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, prob_lims = categories, ncores = ncores) #store results @@ -152,9 +146,8 @@ Crossval_anomalies <- function(recipe, data) { lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, na.rm = na.rm) - dim(res) <- c(bin = length(ps)) - return(res)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -167,9 +160,8 @@ Crossval_anomalies <- function(recipe, data) { lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, na.rm = na.rm) - dim(res) <- c(bin = length(ps)) - return(res)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) -- GitLab From a2dcb84596066cac7eb54df9c8ac73f75d7e39da Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 27 Jan 2025 15:42:35 +0100 Subject: [PATCH 72/78] Add as.array() to quantile() calls in multimodel scripts --- modules/Crossval/Crossval_multimodel_Calibration.R | 10 ++++++---- modules/Crossval/Crossval_multimodel_anomalies.R | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/modules/Crossval/Crossval_multimodel_Calibration.R b/modules/Crossval/Crossval_multimodel_Calibration.R index 229abd0a..3f7788e0 100644 --- a/modules/Crossval/Crossval_multimodel_Calibration.R +++ b/modules/Crossval/Crossval_multimodel_Calibration.R @@ -111,8 +111,8 @@ Crossval_multimodel_Calibration <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), - ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), + ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -120,7 +120,8 @@ Crossval_multimodel_Calibration <- function(recipe, data) { target_dims = c('syear'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), + ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, ncores = ncores) @@ -176,7 +177,8 @@ Crossval_multimodel_Calibration <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R index fd7fd0fe..4032a02e 100644 --- a/modules/Crossval/Crossval_multimodel_anomalies.R +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -98,8 +98,8 @@ Crossval_multimodel_anomalies <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), - ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), + ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -107,7 +107,8 @@ Crossval_multimodel_anomalies <- function(recipe, data) { target_dims = c('syear'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, ncores = ncores) @@ -156,7 +157,8 @@ Crossval_multimodel_anomalies <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) -- GitLab From ff2a75e6cf5f1e994960206210f25c5a6e1b96b9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 27 Jan 2025 15:43:48 +0100 Subject: [PATCH 73/78] Remove editor lines --- modules/Crossval/Crossval_Calibration.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 86bc565f..fe9d85f6 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -1,9 +1,4 @@ # take the output of Flor/s2s/subseasonal_loading.R -nteractive mode - -Edit inline -View file @ d4f2129 - source("modules/Crossval/R/tmp/GetProbs.R") Crossval_Calibration <- function(recipe, data) { -- GitLab From 2f26401331a881b307238cdccb0015f3a7c58fa6 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 5 Feb 2025 17:13:46 +0100 Subject: [PATCH 74/78] Bugfix: Reverse order of append() arguments when transforming probs to list --- modules/Crossval/Crossval_Calibration.R | 19 +++++++++---------- modules/Crossval/Crossval_anomalies.R | 18 +++++++++--------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index fe9d85f6..22530405 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -381,7 +381,6 @@ Crossval_Calibration <- function(recipe, data) { probs_obs <- list() all_names <- NULL - ## TODO: Add .drop_dims() here for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { @@ -392,22 +391,22 @@ Crossval_Calibration <- function(recipe, data) { name_elem <- paste0("from_", categories[[ps]][perc-1]*100, "_to_", categories[[ps]][perc]*100) } - probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + probs_hcst <- append(probs_hcst, + list(Subset(hcst_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_hcst) - probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(obs_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_obs) + drop = 'selected'))) if (!is.null(data$fcst)) { - probs_fcst <- append(list(Subset(fcst_probs[[ps]], + probs_fcst <- append(probs_fcst, + list(Subset(fcst_probs[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_fcst) + drop = 'selected'))) } all_names <- c(all_names, name_elem) } diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index a08efc31..43b8e6e1 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -256,22 +256,22 @@ Crossval_anomalies <- function(recipe, data) { name_elem <- paste0("from_", categories[[ps]][perc-1], "_to_", categories[[ps]][perc]) } - probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + probs_hcst <- append(probs_hcst, + list(Subset(hcst_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_hcst) - probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(obs_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_obs) + drop = 'selected'))) if (!is.null(data$fcst)) { - probs_fcst <- append(list(Subset(fcst_probs[[ps]], + probs_fcst <- append(probs_fcst, + list(Subset(fcst_probs[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_fcst) + drop = 'selected'))) } all_names <- c(all_names, name_elem) } -- GitLab From c9ea25c853bb6da483bb06847e663e0b81e2ad6b Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Fri, 7 Feb 2025 13:52:58 +0100 Subject: [PATCH 75/78] multimodel anomalies --- conf/archive_subseasonal.yml | 3 +- modules/Crossval/Crossval_Calibration.R | 46 ++++--- modules/Crossval/Crossval_anomalies.R | 22 +++ .../Crossval/Crossval_multimodel_anomalies.R | 1 + modules/Loading/R/dates2load.R | 5 +- modules/Loading/R/load_subseasonal.R | 23 ++++ modules/Saving/R/get_filename.R | 1 + recipe_daily_prlr_cal_subseas.yml | 129 ++++++++++++++++++ recipe_prlr_cal_subseas.yml | 6 +- subsunset.sh | 13 +- 10 files changed, 217 insertions(+), 32 deletions(-) create mode 100644 recipe_daily_prlr_cal_subseas.yml diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index 0fccdf4f..c6b595be 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -11,7 +11,8 @@ gpfs: "tasmin":"weekly_mean/s2s/tasmin_f24h/", "sfcWind":"weekly_mean/s2s/sfcWind_f24h/", "rsds":"weekly_mean/s2s/rsds_f24h/"} - daily_mean: {"tas":"daily_mean/s2s/tas_f6h"} + daily_mean: {"tas":"daily_mean/s2s/tas_f6h", + "prlr":"daily_mean/s2s/prlr_f6h/"} nmember: fcst: 48 hcst: 12 diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index fe9d85f6..91063390 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -1,7 +1,7 @@ # take the output of Flor/s2s/subseasonal_loading.R source("modules/Crossval/R/tmp/GetProbs.R") -Crossval_Calibration <- function(recipe, data) { +Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { cal_method <- recipe$Analysis$Workflow$Calibration$method cross.method <- recipe$Analysis$cross.method @@ -77,7 +77,6 @@ Crossval_Calibration <- function(recipe, data) { na.fill = FALSE, na.rm = TRUE, apply_to = NULL, alpha = NULL, ncores = ncores) - cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, cal.method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', @@ -85,7 +84,10 @@ Crossval_Calibration <- function(recipe, data) { na.fill = FALSE, na.rm = TRUE, apply_to = NULL, alpha = NULL, ncores = ncores) - + if (correct_negative) { + cal_hcst_tr[cal_hcst_tr < 0] <- 0 + cal_hcst_ev[cal_hcst_ev < 0] <- 0 + } lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { @@ -141,7 +143,7 @@ Crossval_Calibration <- function(recipe, data) { } } info(recipe$Run$logger, - "#### Anomalies Cross-validation loop ended #####") + "#### Calibration Cross-validation loop ended #####") gc() if (tolower(recipe$Analysis$Horizon) %in% c('seasonal', 'decadal')) { @@ -227,6 +229,8 @@ Crossval_Calibration <- function(recipe, data) { alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', dat_dim = NULL, ncores = ncores) + # For compatibility with subseasonal: + obs <- data$obs$data } else { # if subseasonal # merge sample dimensions and select central week hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), @@ -239,7 +243,7 @@ Crossval_Calibration <- function(recipe, data) { indices = (dim(data$obs$data)['sweek'] + 1) / 2) fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, drop = 'selected') - fcst_cal <- Calibration(exp = hcst, + data$fcst$data <- Calibration(exp = hcst, obs = obs, exp_cor = fcst, cal.method = cal_method, @@ -259,10 +263,15 @@ Crossval_Calibration <- function(recipe, data) { alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', dat_dim = NULL, ncores = ncores) - - + if (!('sday' %in% names(dim(data$fcst$data)))) { + data$fcst$data <- InsertDim(data$fcst$data, + len = 1, pos = 3, name = 'sday') + } + } # end condition subseasonal + if (correct_negative) { + hcst_cal[hcst_cal < 0] <- 0 + data$fcst$data[data$fcst$data < 0] <- 0 } - # Terciles limits using the whole hindcast period: lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { @@ -282,9 +291,8 @@ Crossval_Calibration <- function(recipe, data) { ncores = ncores) tmp_lims2 <- list() - # What to save hcst category limits or obs category limits? + # SAVING 'lims' which are the observed category limits: # TODO saving: - #recipe$Analysis$Workflow$Probabilities$save <- FALSE if (recipe$Analysis$Workflow$Probabilities$save == 'all') { for (ps in 1:length(categories)) { ## TODO: use .drop_dims() @@ -295,9 +303,12 @@ Crossval_Calibration <- function(recipe, data) { indices = l, drop = "selected") tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) - names(tmp_lims2)[length(tmp_lims2)] <- paste0("p_", as.character(categories[[ps]][l]*100)) + names(tmp_lims2)[length(tmp_lims2)] <- + paste0("p_", as.character(categories[[ps]][l]*100)) } } + info(recipe$Run$logger, + "SAVING OBSERVED CATEGORY LIMITS") save_percentiles(recipe = recipe, percentiles = tmp_lims2, data_cube = data$obs, agg = "global", outdir = NULL) @@ -358,22 +369,18 @@ Crossval_Calibration <- function(recipe, data) { } info(recipe$Run$logger, "#### Calibrated and Probabilities Done #####") -# TODO saving: - recipe$Analysis$Workflow$Calibration$save <- FALSE if (recipe$Analysis$Workflow$Calibration$save != FALSE) { info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") -# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, -# "/outputs/Anomalies/") # Save forecast if ((recipe$Analysis$Workflow$Calibration$save %in% c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') } # Save hindcast - if (recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = hcst, type = 'hcst') - } + #if (recipe$Analysis$Workflow$Anomalies$save %in% + # c('all', 'exp_only')) { + # save_forecast(recipe = recipe, data_cube = hcst, type = 'hcst') + #} } # Save probability bins probs_hcst <- list() @@ -438,6 +445,7 @@ Crossval_Calibration <- function(recipe, data) { return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(obs_lims = lims, hcst_lims = lims_fcst), #cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, # obs_tr = lims_ano_obs_tr_res), probs = list(hcst_ev = hcst_probs_ev, diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index a08efc31..0cae57d5 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -66,6 +66,17 @@ Crossval_anomalies <- function(recipe, data) { indices = cross[[t]]$eval.dexes, drop = 'selected') obs_ev <- Subset(data$obs$data, along = 'syear', indices = cross[[t]]$eval.dexes, drop = 'selected') + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + central_day <- (dim(data$hcst$data)['sday'] + 1)/2 + hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + + obs_tr <- MergeDims(obs_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + # 'sday' dim to select the central day + hcst_ev <- Subset(hcst_ev, along = 'sday', indices = central_day) + + } # compute climatology: clim_obs_tr <- MeanDims(obs_tr, 'syear') clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) @@ -93,6 +104,16 @@ Crossval_anomalies <- function(recipe, data) { na.rm = na.rm))})}, prob_lims = categories, ncores = ncores) + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + ano_hcst_tr <- SplitDim(ano_hcst_tr, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + ano_obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + } + #store results ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, along = length(dim(ano_hcst_ev)) + 1) @@ -110,6 +131,7 @@ Crossval_anomalies <- function(recipe, data) { info(recipe$Run$logger, "#### Anomalies Cross-validation loop ended #####") gc() + browser() # Add dim names: names(dim(ano_hcst_ev_res)) <- ev_dim_names names(dim(ano_obs_ev_res)) <- ev_dim_names diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R index 4032a02e..3b92f17d 100644 --- a/modules/Crossval/Crossval_multimodel_anomalies.R +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -73,6 +73,7 @@ Crossval_multimodel_anomalies <- function(recipe, data) { along = length(dim(ano_obs_tr)) + 1) # Anomalies of individual models + ano_hcst_ev <- ano_hcst_tr <- NULL for (sys in 1:length(data$hcst)) { hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', indices = cross[[t]]$train.dexes) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 08a15bb8..2c0b9301 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -27,7 +27,7 @@ dates2load <- function(recipe, logger) { file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) file_dates <- .add_dims(file_dates) - } else if (temp_freq == "weekly_mean") { + } else if (temp_freq %in% c("weekly_mean", "daily")) { sday <- recipe$sday_window if (is.null(sday)) { sday <- 3 @@ -57,10 +57,11 @@ dates2load <- function(recipe, logger) { hcst.end = as.numeric(recipe$hcst_end), ftime_min = recipe$ftime_min, ftime_max = recipe$ftime_max, out = 'hcst') - } else { + } else { file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) } + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { if (temp_freq == "monthly_mean") { diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index cb1fa7f3..329be209 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -231,6 +231,29 @@ load_subseasonal <- function(recipe) { dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] # Separate Start() call for monthly vs daily data if (store.freq == "weekly_mean") { + # ---------------------------------------------------- + # Code to fill missing dates from hindcast attributes: + # It fails if all years have an NA + y <- 1 + while (any(is.na(dates[,,y,]))) { + y <- y + 1 + } + # y is a year without missing time steps + complete_year <- dates[,,y,] + # Loop over years to replace missing dates + for (years in 1:dim(dates)['syear']) { + that_year <- dates[,,years,] + if (any(is.na(that_year))) { + pos_missed <- which(is.na(that_year)) + actual_year <- that_year + actual_year <- format(actual_year[which(!is.na(actual_year))[1]], + "%Y") + that_year[pos_missed] <- as.POSIXct(as.Date(paste0(actual_year, + format(complete_year[pos_missed], "-%m-%d")))) + dates[,,years,] <- that_year + } + } + # ----------------------------------------------------- dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m%d") dim(dates_file) <- dim(dates) obs <- Start(dat = obs.path, diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index 2766c17a..7266ac70 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -10,6 +10,7 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { if (tolower(recipe$Analysis$Horizon) == "subseasonal") { shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") dd <- "week" + shortdate <- date } else { shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") dd <- "month" diff --git a/recipe_daily_prlr_cal_subseas.yml b/recipe_daily_prlr_cal_subseas.yml new file mode 100644 index 00000000..0bb7a905 --- /dev/null +++ b/recipe_daily_prlr_cal_subseas.yml @@ -0,0 +1,129 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'prlr', freq: 'daily', units: 'mm', flux: FALSE} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20241024 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20241024 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + #- {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + #- {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: "bias" #evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3], [1/10], [9/10]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'all' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 32 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_prlr_cal_subseas.yml b/recipe_prlr_cal_subseas.yml index 55f0516e..c3a62c37 100644 --- a/recipe_prlr_cal_subseas.yml +++ b/recipe_prlr_cal_subseas.yml @@ -71,10 +71,10 @@ Analysis: execute: no Calibration: method: "bias" #evmos # Calibration method. (Mandatory, str) - save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) Skill: metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) - save: 'none' # Options: 'all', 'none' (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) Statistics: metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) save: 'none' # Options: 'all', 'none' (Mandatory, str) @@ -83,7 +83,7 @@ Analysis: percentiles: [[1/3, 2/3], [1/10], [9/10]] # for quantiles and probability categories. Each set of thresholds should be # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) - save: 'none' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) Visualization: plots: skill_metrics forecast_ensemble_mean most_likely_terciles multi_panel: no diff --git a/subsunset.sh b/subsunset.sh index 6e75546f..598a76f3 100644 --- a/subsunset.sh +++ b/subsunset.sh @@ -1,10 +1,10 @@ #!/bin/bash -#SBATCH -n 64 +#SBATCH -n 64 #SBATCH -N 1 #SBATCH -t 10:00:00 -#SBATCH -J sunset_sub -#SBATCH -o sunset_sub-%J.out -#SBATCH -e sunset_sub-%J.err +#SBATCH -J sunset_subprlr +#SBATCH -o sunset_subprlr-%J.out +#SBATCH -e sunset_subprlr-%J.err #SBATCH --account=bsc32 #SBATCH --qos=gp_bsces #SBATCH --constraint=lowmem @@ -16,8 +16,7 @@ conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 #Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml -#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml - -Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_subseasonal_ecvs.yml +Rscript /home/bsc/bsc032339/full_test.R recipe_prlr_cal_subseas.yml +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_mul_seas.yml -- GitLab From 81be2646fc039ebb43a97b72abe587f60f686eaf Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 20 Feb 2025 14:56:08 +0100 Subject: [PATCH 76/78] QM archive ECMWF-S2S --- conf/archive_subseasonal.yml | 18 +- modules/Crossval/Crossval_Calibration.R | 210 +++++++++++++++--------- modules/Loading/R/dates2load.R | 27 ++- modules/Loading/R/load_subseasonal.R | 64 ++++++-- modules/Loading/R/subseas_file_dates.R | 10 +- recipe_daily_subseas.yml | 131 +++++++++++++++ recipe_ecvs_cal_seas.yml | 2 +- recipe_ecvs_cal_subseas.yml | 6 +- recipe_prlr_cal_subseas.yml | 7 +- 9 files changed, 366 insertions(+), 109 deletions(-) create mode 100644 recipe_daily_subseas.yml diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index c6b595be..a1540879 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -1,10 +1,23 @@ gpfs: src_sys: "/gpfs/projects/bsc32/esarchive_cache/" System: + ECMWF-ENS-EXT: + name: "ECMWF-ENS-EXT" + insitution: "ECMWF" + srchc: "exp/ecmwf/s2s-monthly_ensforhc/" + srcfc: "exp/ecmwf/s2s-monthly_ensfor/" + weekly_mean: {"prlr":"weekly_mean/prlr_s0-6h/"} + nmember: + fcst: 51 + hcst: 11 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/s2s-monthly_ensforhc/weekly_mean/prlr_s0-6h/prlr_20221205.nc" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" + srchc: "exp/ncep/cfs-v2/" + srcfc: "exp/ncep/cfs-v2/" weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", "prlr":"weekly_mean/s2s/prlr_f24h/", "tasmax":"weekly_mean/s2s/tasmax_f24h/", @@ -12,7 +25,8 @@ gpfs: "sfcWind":"weekly_mean/s2s/sfcWind_f24h/", "rsds":"weekly_mean/s2s/rsds_f24h/"} daily_mean: {"tas":"daily_mean/s2s/tas_f6h", - "prlr":"daily_mean/s2s/prlr_f6h/"} + "prlr":"daily_mean/s2s/prlr_f6h/", + "psl":"daily_mean/s2s/psl_f6h/"} nmember: fcst: 48 hcst: 12 diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 91063390..e357853b 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -46,7 +46,6 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { obs_probs_ev <- lapply(categories, function(x){NULL}) for (t in 1:length(cross)) { info(recipe$Run$logger, paste("crossval:", t)) - # subset years: Subset works at BSC not at Athos ## training indices obs_tr <- Subset(data$obs$data, along = 'syear', @@ -70,20 +69,39 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { rename_dim = 'syear', na.rm = FALSE) } - cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = FALSE, na.rm = TRUE, - apply_to = NULL, - alpha = NULL, ncores = ncores) - cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = FALSE, na.rm = TRUE, - apply_to = NULL, - alpha = NULL, ncores = ncores) + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + cal_hcst_tr <- QuantileMapping(exp = hcst_tr, obs = obs_tr, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = TRUE, qstep = 0.1, + ncores = ncores) + cal_hcst_ev <- QuantileMapping(exp = hcst_tr, obs = obs_tr, + exp_cor = hcst_ev, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = TRUE, qstep = 0.1, + ncores = ncores) + } else { + cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores) + cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores) + } + # If precipitation is negative: if (correct_negative) { cal_hcst_tr[cal_hcst_tr < 0] <- 0 cal_hcst_ev[cal_hcst_ev < 0] <- 0 @@ -118,10 +136,26 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { length(cross[[t]]$eval.dexes))) obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], - length(cross[[t]]$train.dexes))) - + length(cross[[t]]$train.dexes))) + for (ps in 1:length(categories)) { + lims_cal_hcst_tr[[ps]] <- InsertDim(lims_cal_hcst_tr[[ps]], + pos = 1, len = 1, name = 'sday') + lims_obs_tr[[ps]] <- InsertDim(lims_obs_tr[[ps]], + pos = 1, len = 1, name = 'sday') + } } - + # check order before storing: + cal_hcst_ev <- Reorder(cal_hcst_ev, + names(dim(data$hcst$data))) + cal_hcst_tr <- Reorder(cal_hcst_tr, + names(dim(data$hcst$data))) + obs_tr <- Reorder(obs_tr, names(dim(data$obs$data))) + for (ps in 1:length(categories)) { + lims_cal_hcst_tr[[ps]] <- Reorder(lims_cal_hcst_tr[[ps]], + c('bin', names(dim(data$obs$data))[-c(5,9)])) + lims_obs_tr[[ps]] <- Reorder(lims_obs_tr[[ps]], + c('bin', names(dim(data$obs$data))[-5])) + } #store results cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, along = length(dim(cal_hcst_ev)) + 1) @@ -145,72 +179,56 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { info(recipe$Run$logger, "#### Calibration Cross-validation loop ended #####") gc() - - if (tolower(recipe$Analysis$Horizon) %in% c('seasonal', 'decadal')) { - names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'sday', 'sweek', + names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'sday', 'sweek', 'unneeded', 'time', - 'latitude', 'longitude', 'ensemble', 'syear') - names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'loop', 'time', - 'latitude', 'longitude', 'ensemble', 'syear') - names(dim(obs_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', - 'latitude', 'longitude', 'unneeded', 'syear') - obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', - indices = 1, drop = 'selected') - cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', - indices = 1, drop = 'selected') - for(ps in 1:length(categories)) { - names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', - 'sweek', - 'time', 'latitude', 'longitude', - 'syear') - names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', - 'time', 'latitude', 'longitude', - 'unneeded', 'syear') - lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], - along = 'unneeded', indices = 1, - drop = 'selected') - } - } else { #if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { - names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'unneeded', 'sweek', - 'time', 'latitude', 'longitude', 'ensemble', - 'sday', 'syear') - names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'loop', 'sweek', 'time', - 'latitude', 'longitude', 'ensemble', 'sday', - 'syear') - names(dim(obs_tr_res)) <- c('dat', 'var', 'ensemble', 'sweek', 'time', - 'latitude', 'longitude', 'unneeded', - 'sday', 'syear') - obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', - indices = 1, drop = 'selected') - cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', - indices = 1, drop = 'selected') - - for (ps in 1:length(categories)) { - names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sweek', - 'time', 'latitude', 'longitude', - 'syear') - names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sweek', - 'time', 'latitude', 'longitude', - 'unneeded', 'syear') - lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], - along = 'unneeded', indices = 1, - drop = 'selected') - } + 'latitude', 'longitude', + 'ensemble', 'syear') + names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'sday', 'sweek', + 'loop', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + names(dim(obs_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'latitude', 'longitude', 'unneeded', 'syear') + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', + 'sweek', 'time', + 'latitude', 'longitude', + 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', + 'unneeded', 'syear') + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, + drop = 'selected') } - # Make categories rounded number to use as names: - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Calibration/") - # Forecast calibration: if (!is.null(data$fcst)) { if (tolower(recipe$Analysis$Horizon) %in% c('seasonal', 'decadal')) { - data$fcst$data <- Calibration(exp = data$hcst$data, - obs = data$obs$data, + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + data$fcst$data <- QuantileMapping(exp = data$hcst$data, + obs = data$obs$data, + exp_cor = data$fcst$data, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = TRUE, qstep = 0.1, + ncores = ncores) + hcst_cal <- QuantileMapping(exp = data$hcst$data, + obs = data$obs$data, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = TRUE, qstep = 0.1, + ncores = ncores) + } else { + data$fcst$data <- Calibration(exp = data$hcst$data, + obs = data$obs$data, exp_cor = data$fcst$data, cal.method = cal_method, multi.model = FALSE, @@ -219,7 +237,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', dat_dim = NULL, ncores = ncores) - hcst_cal <- Calibration(exp = data$hcst$data, + hcst_cal <- Calibration(exp = data$hcst$data, obs = data$obs$data, cal.method = cal_method, eval.method = 'in-sample', @@ -229,6 +247,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', dat_dim = NULL, ncores = ncores) + } # For compatibility with subseasonal: obs <- data$obs$data } else { # if subseasonal @@ -243,7 +262,26 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { indices = (dim(data$obs$data)['sweek'] + 1) / 2) fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, drop = 'selected') - data$fcst$data <- Calibration(exp = hcst, + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + data$fcst$data <- QuantileMapping(exp = hcst, + obs = obs, + exp_cor = fcst, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = TRUE, qstep = 0.1, + ncores = ncores) + hcst_cal <- QuantileMapping(exp = hcst, + obs = obs, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = TRUE, qstep = 0.1, + ncores = ncores) + } else { + data$fcst$data <- Calibration(exp = hcst, obs = obs, exp_cor = fcst, cal.method = cal_method, @@ -253,7 +291,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', dat_dim = NULL, ncores = ncores) - hcst_cal <- Calibration(exp = hcst, + hcst_cal <- Calibration(exp = hcst, obs = obs, cal.method = cal_method, eval.method = 'in-sample', @@ -263,6 +301,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', dat_dim = NULL, ncores = ncores) + } if (!('sday' %in% names(dim(data$fcst$data)))) { data$fcst$data <- InsertDim(data$fcst$data, len = 1, pos = 3, name = 'sday') @@ -293,6 +332,15 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { # SAVING 'lims' which are the observed category limits: # TODO saving: + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { for (ps in 1:length(categories)) { ## TODO: use .drop_dims() diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 2c0b9301..a371984e 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -82,9 +82,34 @@ dates2load <- function(recipe, logger) { # if no fcst year is requested: file_dates.fcst <- NULL } + #----------------------------------------------------------------- + if (FALSE) { + #if (recipe$Analysis$Datasets$System$name == 'ECMWF-ENS-EXT') { + if (recipe$Analysis$Variables$freq =='weekly_mean') { + info(recipe$Run$logger, + "hcst period defined in recipe not used for ECMWF-EXT-ENS model.") + sd <- as.Date(as.character(recipe$Analysis$Time$sdate), format ="%Y%m%d") + if (tolower(weekdays(sd)) != tolower(recipe$Analysis$Time$week_day)) { + info(recipe$Run$logger, + paste("sdate is not a", recipe$Analysis$Time$week_day)) + } + year <- as.numeric(format(sd, "%Y")) + fcst_sweek_ind <- (recipe$Analysis$Time$sweek_window + 1)/2 + fcst.sdate_to_start <- as.character(format(sd + + 7 *((fcst_sweek_ind - 1)/2), "%Y%m%d")) + + file_dates <- subseas_file_dates(fcst.sdate_to_start, + recipe$Analysis$Time$sweek_window, + recipe$Analysis$Time$sday_window, + year - 20, model = 'ECMWF-ENS-EXT', + var = recipe$Analysis$Variables$name, + year - 1, recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, "hcst") + file_dates.fcst <- recipe$Analysis$Time$fcst_year + } + } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun - } # adds the correspondent dims to each sdate array diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 329be209..65d2cffa 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -35,13 +35,12 @@ load_subseasonal <- function(recipe) { # get datasets dict: archive <-get_archive(recipe) # read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] exp_descrip <- archive$System[[exp.name]] - freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) reference_descrip <- archive$Reference[[ref.name]] freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) obs.dir <- reference_descrip$src - fcst.dir <- exp_descrip$src - hcst.dir <- exp_descrip$src + fcst.dir <- exp_descrip$srcfc + hcst.dir <- exp_descrip$srchc fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst @@ -91,7 +90,8 @@ load_subseasonal <- function(recipe) { #------------------------------------------------------------------- ## NOTE: metadata_dims has to be specified as 'file_date' to be able to get ## the metadata when the first file is missing. However, when retrieving two - ## variables, it must be 'var'. Start() does not admit both. + ## variables, it must be 'var'. Start() does not admit both. + if (recipe$Analysis$Datasets$System$name == "NCEP-CFSv2") { hcst <- Start(dat = hcst.path, var = variable, var_dir = var_dir_exp, @@ -117,7 +117,34 @@ load_subseasonal <- function(recipe) { time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - + } else { + hcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", + "$file_date$.nc") + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod, + print_sys_msg = TRUE), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble', 'lev')), + ensemble = indices(1:hcst.nmember), + metadata_dims = c('file_date'), # change? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + } # Remove var_dir dimension if ("var_dir" %in% names(dim(hcst))) { hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") @@ -156,7 +183,6 @@ load_subseasonal <- function(recipe) { # the call uses file_date instead of fcst_syear so that it can work # with the daily case and the current version of startR not allowing # multiple dims split - fcst <- Start(dat = fcst.path, var = variable, var_dir = var_dir_exp, @@ -243,15 +269,23 @@ load_subseasonal <- function(recipe) { # Loop over years to replace missing dates for (years in 1:dim(dates)['syear']) { that_year <- dates[,,years,] - if (any(is.na(that_year))) { - pos_missed <- which(is.na(that_year)) - actual_year <- that_year - actual_year <- format(actual_year[which(!is.na(actual_year))[1]], - "%Y") - that_year[pos_missed] <- as.POSIXct(as.Date(paste0(actual_year, - format(complete_year[pos_missed], "-%m-%d")))) - dates[,,years,] <- that_year - } + if (!all(is.na(that_year))) { + if (any(is.na(that_year))) { + pos_missed <- which(is.na(that_year)) + actual_year <- that_year + actual_year <- format(actual_year[which(!is.na(actual_year))[1]], + "%Y") + that_year[pos_missed] <- as.POSIXct(as.Date(paste0(actual_year, + format(complete_year[pos_missed], "-%m-%d")))) + dates[,,years,] <- that_year + } + } else { # if one year is totally empty + if ( years != 1) { + actual_year <- as.numeric(format(dates[1,1,years-1,1], "%Y")) + 1 + dates[,,years,] <- as.POSIXct(as.Date(paste0(actual_year, + format(complete_year, "-%m-%d")))) + } + } } # ----------------------------------------------------- dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m%d") diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R index eca66c55..bc8301aa 100644 --- a/modules/Loading/R/subseas_file_dates.R +++ b/modules/Loading/R/subseas_file_dates.R @@ -7,7 +7,8 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, hcst.start, hcst.end, - ftime_min, ftime_max, out) { + ftime_min, ftime_max, out, + model = "NCEP-CFSv2", var = NULL) { # Generate the sday_window vectors: ## Only for Thursdays and Mondays ## Create a diagonal matrix of 3 and 4 days matching Mondays/Thursdays @@ -37,7 +38,7 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, monday_win <- c(rev(next_sday) * -1, 0, prev_sday) } else { thrusday_win <- monday_win <- 0 - } + } #### END Generation sdya_window vectors ftime_min <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.end @@ -81,7 +82,10 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, (strtoi(sday.year) - ftime_min), sep = "") sday.dates <- apply(expand.grid(sday.years, sday.mday), 1, paste, collapse = "") - sdays.file_dates <- abind(sdays.file_dates, sday.dates, along = 1) + if (model == 'ECMWF-ENS-EXT') { + sday.dates <- paste0(sday.sdate,"/", var, "_", sday.dates) + } + sdays.file_dates <- abind(sdays.file_dates, sday.dates, along = 1) } names(dim(sdays.file_dates)) <- c('sday','syear') diff --git a/recipe_daily_subseas.yml b/recipe_daily_subseas.yml new file mode 100644 index 00000000..e2ad2bd2 --- /dev/null +++ b/recipe_daily_subseas.yml @@ -0,0 +1,131 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + #- {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: FALSE} + - {name: 'psl', freq: 'daily_mean', units: 'hPa', flux: FALSE} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # - {name: 'ECMWF-ENS-EXT', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20241024 #20241024 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20241024 #20241024 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 5 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 11 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + #- {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + #- {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: "bias" #evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3], [1/10], [9/10]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'all' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 32 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_ecvs_cal_seas.yml b/recipe_ecvs_cal_seas.yml index 3417d2d9..0bcf9f20 100644 --- a/recipe_ecvs_cal_seas.yml +++ b/recipe_ecvs_cal_seas.yml @@ -24,7 +24,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0101' + sdate: '0201' fcst_year: '2025' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' diff --git a/recipe_ecvs_cal_subseas.yml b/recipe_ecvs_cal_subseas.yml index 51610f2b..abd91f05 100644 --- a/recipe_ecvs_cal_subseas.yml +++ b/recipe_ecvs_cal_subseas.yml @@ -9,7 +9,7 @@ Analysis: # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) # units: desired data units for each variable. Only available for temperature, # precipitation, and pressure variables. - #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'tas', freq: 'weekly_mean', units: 'C'} - {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: yes} # name: 'tas' # freq: 'weekly_mean' @@ -36,13 +36,13 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: 20250123 #%Y%m%d # Cambiar a 2023 + sdate: 20250213 #%Y%m%d # Cambiar a 2023 #- '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' # - '0201' # ... - fcst_year: 20250123 # Forecast initialization year 'YYYY' (Optional, int) + fcst_year: 20250213 # Forecast initialization year 'YYYY' (Optional, int) hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) diff --git a/recipe_prlr_cal_subseas.yml b/recipe_prlr_cal_subseas.yml index c3a62c37..5bf1cd49 100644 --- a/recipe_prlr_cal_subseas.yml +++ b/recipe_prlr_cal_subseas.yml @@ -24,7 +24,8 @@ Analysis: System: # name: System name (Mandatory, str) # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) - - {name: 'NCEP-CFSv2', member: 'all'} + #- {name: 'NCEP-CFSv2', member: 'all'} + - {name: 'ECMWF-ENS-EXT', member: 'all'} # To request more Systems to be divided in atomic recipes, add them this way: # - {name: 'Meteo-France-System7'} Multimodel: @@ -36,13 +37,13 @@ Analysis: # To request more References to be divided into atomic recipes, add them this way: # - {name: 'ERA5Land'} Time: - sdate: 20241024 #%Y%m%d # Cambiar a 2023 + sdate: 20221013 #20241024 #%Y%m%d # Cambiar a 2023 #- '1201' # Start date, 'mmdd' (Mandatory, int) # To request more startdates to be divided into atomic recipes, add them this way: # - '0101' # - '0201' # ... - fcst_year: 20241024 # Forecast initialization year 'YYYY' (Optional, int) + fcst_year: 20221013 #20241024 # Forecast initialization year 'YYYY' (Optional, int) hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) -- GitLab From 07f2f6b65d1a34eecd9d880b5dd10f39314a9a17 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 29 May 2025 16:29:48 +0200 Subject: [PATCH 77/78] Fixes cross_calibration --- modules/Crossval/Crossval_Calibration.R | 68 +++++++++++++++++++------ 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index e357853b..bb7773e1 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -60,13 +60,13 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + rename_dim = 'syear', na.rm = na.rm) obs_tr <- MergeDims(obs_tr, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + rename_dim = 'syear', na.rm = na.rm) hcst_ev <- MergeDims(hcst_ev, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + rename_dim = 'syear', na.rm = na.rm) } if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { @@ -75,7 +75,8 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - na.rm = TRUE, qstep = 0.1, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, ncores = ncores) cal_hcst_ev <- QuantileMapping(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, @@ -83,21 +84,52 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - na.rm = TRUE, qstep = 0.1, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, ncores = ncores) +# Try to plot quantile-quantile: +# browser() +# test <- quantile(cal_hcst_tr[,,1,1,5,1,10,26], seq(0,1,0.1), na.rm = T) +# test2 <- quantile(obs_tr[1,1,,5,1,10,26,1], seq(0,1,0.1), na.rm = T) + +# plot(test2, test) +# test1 <- cal_hcst_tr[1,,1,1,5,1,14,1] +# test2 <- hcst_tr[1,1,,5,1,1,14,1] +# pos <- order(test2, na.last=NA) +# test2 <- sort(test2) +# test1 <- test1[pos] +# plot(test1, test2, xlim = c(990,1030),ylim=c(990,1030), +# xlab = "Original hindcast - training indices (hPa)", +# ylab = "Calibrated hindcast - training indices (hPa)", +# main = "One ensemble member") +# lines(x=c(920,1030), y=c(920,1030), type = "l", col = 'blue') + +# test3 <- MeanDims(cal_hcst_tr, 'ensemble') +# test3 <- test3[,1,1,5,1,14,1] +# test4 <- MeanDims(hcst_tr, 'ensemble') +# test4 <- test4[1,1,,5,1,14,1] +# pos <- order(test4, na.last =NA) +# test4 <- sort(test4) +# test3 <- test3[pos] +# plot(test4, test3, xlim = c(1000,1030),ylim=c(1000,1030), +# xlab = "Original hindcast - training indices (hPa)", +# ylab = "Calibrated hindcast - training indices (hPa)", +# main = "Ensemble mean") +# lines(x=c(1000,1030), y=c(1000,1030), type = "l", col = 'blue') + } else { cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, cal.method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', eval.method = 'in-sample', - na.fill = FALSE, na.rm = TRUE, + na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, ncores = ncores) cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, cal.method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', eval.method = 'in-sample', - na.fill = FALSE, na.rm = TRUE, + na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, ncores = ncores) } @@ -126,6 +158,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { ncores = ncores) if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + browser() cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], @@ -216,7 +249,8 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - na.rm = TRUE, qstep = 0.1, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, ncores = ncores) hcst_cal <- QuantileMapping(exp = data$hcst$data, obs = data$obs$data, @@ -224,7 +258,8 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - na.rm = TRUE, qstep = 0.1, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, ncores = ncores) } else { data$fcst$data <- Calibration(exp = data$hcst$data, @@ -232,7 +267,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { exp_cor = data$fcst$data, cal.method = cal_method, multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, + na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', @@ -242,7 +277,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { cal.method = cal_method, eval.method = 'in-sample', multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, + na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', @@ -270,7 +305,8 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - na.rm = TRUE, qstep = 0.1, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, ncores = ncores) hcst_cal <- QuantileMapping(exp = hcst, obs = obs, @@ -278,7 +314,9 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - na.rm = TRUE, qstep = 0.1, + na.rm = na.rm, + wet.day = FALSE, + qstep = 0.1, ncores = ncores) } else { data$fcst$data <- Calibration(exp = hcst, @@ -286,7 +324,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { exp_cor = fcst, cal.method = cal_method, multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, + na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', @@ -296,7 +334,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { cal.method = cal_method, eval.method = 'in-sample', multi.model = FALSE, - na.fill = TRUE, na.rm = TRUE, + na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, memb_dim = 'ensemble', sdate_dim = 'syear', -- GitLab From 51796d995a137a89a785bbaa06c587335bef8be9 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 29 May 2025 17:12:18 +0200 Subject: [PATCH 78/78] quantiles with narm true --- modules/Crossval/Crossval_Calibration.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index bb7773e1..0853f12c 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -142,7 +142,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { as.array(quantile(as.vector(x), ps, - na.rm = na.rm))})}, + na.rm = TRUE))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, @@ -151,7 +151,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { as.array(quantile(as.vector(x), ps, - na.rm = na.rm))})}, + na.rm = TRUE))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, @@ -354,7 +354,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { as.array(quantile(as.vector(x), ps, - na.rm = na.rm))})}, + na.rm = TRUE))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -362,7 +362,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { as.array(quantile(as.vector(x), ps, - na.rm = na.rm))})}, + na.rm = TRUE))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) -- GitLab