From b80a86cfc9f57bebdb27f4c8a1d0e28db92beb7a Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 7 May 2024 18:08:59 +0200 Subject: [PATCH 01/53] temporal agg --- modules/Aggregation/Aggregation.R | 67 +++++++++++++++++++ .../ex0_1_sample_dataset/ex0_1-recipe.yml | 6 ++ 2 files changed, 73 insertions(+) create mode 100644 modules/Aggregation/Aggregation.R diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R new file mode 100644 index 00000000..40018a7d --- /dev/null +++ b/modules/Aggregation/Aggregation.R @@ -0,0 +1,67 @@ +# recipe +## Time_aggregation$execute YES/NO +## Time_aggregation$method accumulated or average +## Time_aggregation$set for decadal +## Time_aggregation$ini based on forecast time +## Time_aggregation$end based on forecast time +## ini = 2, end = 3 with monthly freq would be a 2 months agg +Aggregation(recipe, data) { + + ncores <- recipe$Analysis$ncores # is it already checked? NULL or number + na.rm <- recipe$Analysis$remove_NAs # is it already checked? TRUE/FALSE + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + # original freq + raw_freq <- tolower(recipe$Analysis$Variable$freq) + custom <- recipe$Analysis$Workflow$Time_aggregation$set + ini <- recipe$Analysis$Workflow$Time_aggregation$ini + end <- recipe$Analysis$Workflow$Time_aggregation$end + method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) +## Instead of raw freq may be better to use ini/end +## if (raw_freq %in% c('daily', 'daily_mean', 'weekly', 'weekly_mean')) { + if (!is.null(ini) && !is.null(end)) { + # calculate aggregations from fday1 to fday2 + # calculate aggregations from fweek1 to fweek2 + # other cases could be indicators or other original freq + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method = method, + na.rm = na.rm, ncores = ncores, + ini = ini, end = end) + } else { + result <- data[x] + }}, simplify = TRUE) + + } else if (!is.null(custom)) { + # caluclate aggreagtions from fmonth1 to ftmonth2 + # for decadal, it makes sense to get all seasons and annual means too + + } else { + # higher frequencies are typically used to compute indicators + # lower frequencies are not typically stored in esarchive + } + } +} + +agg_ini_end <- function(x, ini, end, method, na.rm , ncores) { + if (method == 'average') { + x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', + function(y) { + mean(y[ini:end], na.rm = na.rm)}, + ncores = ncores) + } else if (method == 'accumulated') { + x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', + function(y) { + sum(y[ini:end], na.rm = na.rm)}, + ncores = ncores) + } else { + stop("Unknown method") + } + dims <- x[[1]]$dims + dims['time'] <- length(ini:end) + x[[1]]$dims <- dims + x[[1]]$coords$time <- ini:end + attr(x[[1]]$coords$time, "indices") <- TRUE + x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = ini:end) + return(x) +} diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 662e75cc..bedb1983 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -28,6 +28,12 @@ Analysis: method: bilinear type: to_system Workflow: + Time_aggregation: + execute: yes + set: no + method: average + ini: 2 + end: 3 Anomalies: compute: yes cross_validation: yes -- GitLab From 2ceaaca30ca423f1a24f09978c92ec2d56b3b2a9 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 8 May 2024 18:16:11 +0200 Subject: [PATCH 02/53] ini and end and list of indices --- modules/Aggregation/Aggregation.R | 86 +++++++++++++------ .../ex0_1_sample_dataset/ex0_1-recipe.yml | 8 +- 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 40018a7d..6a026e57 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -1,18 +1,21 @@ # recipe ## Time_aggregation$execute YES/NO ## Time_aggregation$method accumulated or average -## Time_aggregation$set for decadal +## Time_aggregation$user_def for decadal ## Time_aggregation$ini based on forecast time ## Time_aggregation$end based on forecast time +# For date definition of period aggregation consider using Indicators module ## ini = 2, end = 3 with monthly freq would be a 2 months agg -Aggregation(recipe, data) { +Aggregation <- function(recipe, data) { +## Do we need this checks here? ncores <- recipe$Analysis$ncores # is it already checked? NULL or number - na.rm <- recipe$Analysis$remove_NAs # is it already checked? TRUE/FALSE + na.rm <- ifelse(is.null(recipe$Analysis$remove_NAs), + TRUE, recipe$Analysis$remove_NAs) # is it already checked? TRUE/FALSE if (recipe$Analysis$Workflow$Time_aggregation$execute) { # original freq raw_freq <- tolower(recipe$Analysis$Variable$freq) - custom <- recipe$Analysis$Workflow$Time_aggregation$set + custom <- recipe$Analysis$Workflow$Time_aggregation$user_def ini <- recipe$Analysis$Workflow$Time_aggregation$ini end <- recipe$Analysis$Workflow$Time_aggregation$end method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) @@ -22,46 +25,73 @@ Aggregation(recipe, data) { # calculate aggregations from fday1 to fday2 # calculate aggregations from fweek1 to fweek2 # other cases could be indicators or other original freq - res <- sapply(1:length(data), function(x) { - if (!is.null(data[[x]])) { - result <- agg_ini_end(data[x], method = method, - na.rm = na.rm, ncores = ncores, - ini = ini, end = end) - } else { - result <- data[x] - }}, simplify = TRUE) - - } else if (!is.null(custom)) { + if (length(ini) == length(end)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method = method, + na.rm = na.rm, ncores = ncores, + ini = ini, end = end, indices = NULL) + } else { + result <- data[x] + }}, simplify = TRUE) + } + } else if (!is.ll(custom)) { # caluclate aggreagtions from fmonth1 to ftmonth2 # for decadal, it makes sense to get all seasons and annual means too - + ## Ex: January, February, March for decadal simulations aggregation + ## custom <- sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + if (is.list(custom)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method, na.rm, ncores, + indices = custom, ini = NULL, end = NULL) + } else { + result <- data[x]}}, simplify = TRUE) + } else { + stop("Why Time_aggregation$user_def is not a list?") + } } else { # higher frequencies are typically used to compute indicators # lower frequencies are not typically stored in esarchive + stop("Temporal aggregation not implemented yet") } } } -agg_ini_end <- function(x, ini, end, method, na.rm , ncores) { +agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { + # to make the code work with both options: + if (!is.null(ini) && is.null(indices)) { + # list of vectors for the indices from ini to end pairs + indices <- lapply(1:length(ini), function(x) { + ini[x]:end[x]}) + } else { + # take the firs and las element of each indices list for time_bounds saving + ini <- unlist(lapply(indices, function(x){x[1]})) + end <- unlist(lapply(indices, function(x){x[length(x)]})) + } if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y) { - mean(y[ini:end], na.rm = na.rm)}, - ncores = ncores) + function(y, ind) { + sapply(1:length(indices), function(z) { + mean(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', + ncores = ncores)$output1 } else if (method == 'accumulated') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y) { - sum(y[ini:end], na.rm = na.rm)}, - ncores = ncores) + function(y, ind) { + sapply(1:length(indices), function(z) { + sum(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', + ncores = ncores)$output1 } else { stop("Unknown method") - } - dims <- x[[1]]$dims - dims['time'] <- length(ini:end) - x[[1]]$dims <- dims - x[[1]]$coords$time <- ini:end + } + x[[1]]$dims <- dim(x[[1]]$data) + x[[1]]$coords$time <- 1:length(ini) attr(x[[1]]$coords$time, "indices") <- TRUE + x[[1]]$attrs$time_bounds$start <- x[[1]]$attrs$Dates + x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = end) x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = ini:end) + indices = ini) + return(x) } diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index bedb1983..7c8fcb2d 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -30,10 +30,12 @@ Analysis: Workflow: Time_aggregation: execute: yes - set: no + user_def: + - [1, 3] + - !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) method: average - ini: 2 - end: 3 + ini: [1, 2, 1] + end: [2, 3, 3] Anomalies: compute: yes cross_validation: yes -- GitLab From e156dfa8b29cd595353dc290c701194105639c67 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 9 May 2024 17:27:43 +0200 Subject: [PATCH 03/53] fix attrs and add example --- example_scripts/exec_timeagg.R | 41 +++++++++++ modules/Aggregation/Aggregation.R | 9 +-- modules/Visualization/R/plot_metrics.R | 9 ++- .../examples/recipe_tas_seasonal_timeagg.yml | 70 +++++++++++++++++++ 4 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 example_scripts/exec_timeagg.R create mode 100644 recipes/examples/recipe_tas_seasonal_timeagg.yml diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R new file mode 100644 index 00000000..1f34193c --- /dev/null +++ b/example_scripts/exec_timeagg.R @@ -0,0 +1,41 @@ +rm(list=ls()) +gc() +setwd("/esarchive/scratch/nperez/git/auto-s2s") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") +source("modules/Aggregtation/Aggregation.") + +# Read recipe +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe <- read_atomic_recipe(recipe_file) +## to test a single recipe: + # recipe_file <- "recipes/examples/recipe_tas_seasonal_timeagg.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" + +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +# Units transformation +source("modules/Units/Units.R") +test <- Units(recipe, data) +# Temporal aggregation +test <- Aggregation(recipe = recipe, data = data) +# Calibrate datasets +data <- Calibration(recipe, test) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Export all data to netCDF +## TODO: Fix plotting +# save_data(recipe, data, skill_metrics, probabilities) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 6a026e57..445d1d55 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -86,12 +86,13 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { } x[[1]]$dims <- dim(x[[1]]$data) x[[1]]$coords$time <- 1:length(ini) - attr(x[[1]]$coords$time, "indices") <- TRUE - x[[1]]$attrs$time_bounds$start <- x[[1]]$attrs$Dates + attr(x[[1]]$coords$time, "indices") <- TRUE + tmp_dates <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = ini) + x[[1]]$attrs$time_bounds$start <- tmp_dates x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', indices = end) - x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = ini) + x[[1]]$attrs$Dates <- tmp_dates return(x) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 92b707fa..e909a1f9 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -206,7 +206,14 @@ plot_metrics <- function(recipe, data_cube, metrics, # Define titles toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) - titles <- as.vector(months) + ## time_bounds in data_cube to know if Time_aggregation was applied + if (!is.null(data_cube$attrs$time_bounds$start)) { + info(logger, paste("Time_bounds found in data_cube", + "using them for titles")) + titles <- "What" + } else { + titles <- as.vector(months) + } ## TODO: Combine PlotLayout with PlotRobinson? suppressWarnings( PlotLayout(PlotEquiMap, c('longitude', 'latitude'), diff --git a/recipes/examples/recipe_tas_seasonal_timeagg.yml b/recipes/examples/recipe_tas_seasonal_timeagg.yml new file mode 100644 index 00000000..b62ac79a --- /dev/null +++ b/recipes/examples/recipe_tas_seasonal_timeagg.yml @@ -0,0 +1,70 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: K + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2009' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 3, 2] + end: [3, 6, 4] + Calibration: + execute: yes + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ -- GitLab From f7baf09576063ceb6fcce9a40483e5ec95c650f6 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 9 May 2024 18:44:38 +0200 Subject: [PATCH 04/53] plotting attr in time_bound --- example_scripts/exec_timeagg.R | 6 +-- modules/Aggregation/Aggregation.R | 6 ++- modules/Visualization/R/plot_metrics.R | 52 ++++++++++++++----- .../ex0_1_sample_dataset/ex0_1-recipe.yml | 4 +- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 1f34193c..3b688985 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -25,11 +25,11 @@ recipe <- prepare_outputs(recipe_file) data <- Loading(recipe) # Units transformation source("modules/Units/Units.R") -test <- Units(recipe, data) +data <- Units(recipe, data) # Temporal aggregation -test <- Aggregation(recipe = recipe, data = data) +data <- Aggregation(recipe = recipe, data = data) # Calibrate datasets -data <- Calibration(recipe, test) +data <- Calibration(recipe, data) # Compute skill metrics skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 445d1d55..c45370fd 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -64,10 +64,12 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { # list of vectors for the indices from ini to end pairs indices <- lapply(1:length(ini), function(x) { ini[x]:end[x]}) + plotting_attr <- list(ini_ftime = ini, end_ftime = end) } else { # take the firs and las element of each indices list for time_bounds saving ini <- unlist(lapply(indices, function(x){x[1]})) end <- unlist(lapply(indices, function(x){x[length(x)]})) + plotting_attr <- list(names(indices)) } if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', @@ -92,7 +94,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { x[[1]]$attrs$time_bounds$start <- tmp_dates x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', indices = end) - x[[1]]$attrs$Dates <- tmp_dates - + attributes(x[[1]]$attrs$time_bounds)$plotting_attr <- plotting_attr + x[[1]]$attrs$Dates <- tmp_dates return(x) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index e909a1f9..87baaebe 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -172,7 +172,6 @@ plot_metrics <- function(recipe, data_cube, metrics, col_sup <- NULL } - # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, @@ -207,10 +206,15 @@ plot_metrics <- function(recipe, data_cube, metrics, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) ## time_bounds in data_cube to know if Time_aggregation was applied - if (!is.null(data_cube$attrs$time_bounds$start)) { - info(logger, paste("Time_bounds found in data_cube", - "using them for titles")) - titles <- "What" + if (!is.null(attributes(data_cube$attrs$time_bounds))) { + info(recipe$Run$logger, "Using plotting attrs from time_bounds.") + titles <- unlist( + lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), + function(x) { + paste("Forecast time", + attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], + "to", + attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) } else { titles <- as.vector(months) } @@ -273,17 +277,37 @@ plot_metrics <- function(recipe, data_cube, metrics, # Loop over forecast times for (i in 1:dim(metric)[['time']]) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 + # Case without time aggregation: + if (is.null(attributes(data_cube$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Define plot title + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", months[i], "/", + hcst_period) + } else { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", + hcst_period) } - forecast_time <- sprintf("%02d", forecast_time) - # Define plot title - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", months[i], "/", - hcst_period) # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 7c8fcb2d..d36b0324 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -31,8 +31,8 @@ Analysis: Time_aggregation: execute: yes user_def: - - [1, 3] - - !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + ftimes: [1, 3] + JJA: !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) method: average ini: [1, 2, 1] end: [2, 3, 3] -- GitLab From 36e11d219bab785da5843975eef9bf1b1f0d35b2 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 11:41:00 +0200 Subject: [PATCH 05/53] working version for ini and end plots --- example_scripts/exec_timeagg.R | 9 ++-- modules/Visualization/R/plot_ensemble_mean.R | 40 +++++++++++++--- .../R/plot_most_likely_terciles_map.R | 46 +++++++++++++++---- 3 files changed, 77 insertions(+), 18 deletions(-) diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 3b688985..6444bc70 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -1,6 +1,5 @@ rm(list=ls()) gc() -setwd("/esarchive/scratch/nperez/git/auto-s2s") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -9,7 +8,8 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("tools/prepare_outputs.R") -source("modules/Aggregtation/Aggregation.") +source("modules/Units/Units.R") +source("modules/Aggregation/Aggregation.R") # Read recipe #args = commandArgs(trailingOnly = TRUE) @@ -24,7 +24,6 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) # Units transformation -source("modules/Units/Units.R") data <- Units(recipe, data) # Temporal aggregation data <- Aggregation(recipe = recipe, data = data) @@ -38,4 +37,6 @@ probabilities <- Probabilities(recipe, data) ## TODO: Fix plotting # save_data(recipe, data, skill_metrics, probabilities) # Plot data -Visualization(recipe, data, skill_metrics, probabilities, significance = T) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) + diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 59881864..cd4d9bb7 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -118,8 +118,26 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) + } else { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { @@ -174,11 +192,15 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Loop over forecast times for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -209,7 +231,13 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } # Modify base arguments base_args[[1]] <- i_var_ens_mean[i, , ] - fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + } else { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } base_args$mask <- mask_i base_args$dots <- dots_i # Plot diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index c9ce70f2..ea0f2b90 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -42,7 +42,6 @@ plot_most_likely_terciles <- function(recipe, ## TODO: Sort out decadal initial month (is it always January?) init_month <- 1 } - # Retrieve and rearrange probability bins for the forecast if (is.null(probabilities$probs_fcst$prob_b33) || is.null(probabilities$probs_fcst$prob_33_to_66) || @@ -113,8 +112,27 @@ plot_most_likely_terciles <- function(recipe, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / Initialization: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F,locale = "en_GB") + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + } else { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } + years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { ## TODO: Ensure this works for daily and sub-daily cases @@ -170,11 +188,16 @@ plot_most_likely_terciles <- function(recipe, base_args[names(output_configuration)] <- output_configuration for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) + # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -196,7 +219,14 @@ plot_most_likely_terciles <- function(recipe, format(as.Date(i_syear, format="%Y%m%d"), "%d-%m-%Y")) # Plot - fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + } else { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } + base_args$probs <- i_var_probs[i, , , ] base_args$mask <- mask_i base_args$dots <- dots_i -- GitLab From 27578fe3789f835dc9bcc8afc739ccdb97e0273e Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 15:54:00 +0200 Subject: [PATCH 06/53] tesing decadal plots and user_def param --- example_scripts/exec_timeagg.R | 2 +- example_scripts/test_decadal.R | 11 +-- modules/Aggregation/Aggregation.R | 2 +- modules/Visualization/R/plot_ensemble_mean.R | 69 ++++++++++++------- modules/Visualization/R/plot_metrics.R | 47 ++++++++----- .../R/plot_most_likely_terciles_map.R | 61 ++++++++++------ recipes/atomic_recipes/recipe_decadal.yml | 28 +++++--- .../examples/recipe_prlr_seasonal_timeagg.yml | 63 +++++++++++++++++ tools/prepare_outputs.R | 2 +- tools/read_atomic_recipe.R | 2 +- 10 files changed, 205 insertions(+), 82 deletions(-) create mode 100644 recipes/examples/recipe_prlr_seasonal_timeagg.yml diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 6444bc70..66df62b8 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -17,7 +17,7 @@ source("modules/Aggregation/Aggregation.R") #recipe <- read_atomic_recipe(recipe_file) ## to test a single recipe: # recipe_file <- "recipes/examples/recipe_tas_seasonal_timeagg.yml" - # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_timeagg.yml" recipe <- prepare_outputs(recipe_file) diff --git a/example_scripts/test_decadal.R b/example_scripts/test_decadal.R index 12daa540..de9eae6a 100644 --- a/example_scripts/test_decadal.R +++ b/example_scripts/test_decadal.R @@ -9,7 +9,7 @@ source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") - +source("modules/Aggregation/Aggregation.R") recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive @@ -17,16 +17,19 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) +data <- Aggregation(recipe = recipe, data = data) # Calibrate datasets calibrated_data <- Calibration(recipe, data) # Compute skill metrics -skill_metrics <- Skill(recipe, calibrated_data) +skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins -probabilities <- Probabilities(recipe, calibrated_data) +probabilities <- Probabilities(recipe, data) # Plot data -Visualization(recipe, calibrated_data, skill_metrics, probabilities, +Visualization(recipe = recipe, data = data, + skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index c45370fd..cb23a0dd 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -35,7 +35,7 @@ Aggregation <- function(recipe, data) { result <- data[x] }}, simplify = TRUE) } - } else if (!is.ll(custom)) { + } else if (!is.null(custom)) { # caluclate aggreagtions from fmonth1 to ftmonth2 # for decadal, it makes sense to get all seasons and annual means too ## Ex: January, February, March for decadal simulations aggregation diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index cd4d9bb7..261dcc74 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -123,20 +123,24 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) } else { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) @@ -199,7 +203,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } forecast_time <- sprintf("%02d", forecast_time) } else { - forecast_time <- months + forecast_time <- months } # Get mask subset if (!is.null(mask)) { @@ -213,14 +217,23 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { dots_i <- NULL } - # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Ensemble Mean / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + # Define plot title + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], + " / Start date: ", + i_syear) + } # Define caption if (identical(fun, PlotRobinson)) { ## TODO: Customize technical details @@ -234,9 +247,13 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o if (is.null(attributes(fcst$attrs$time_bounds))) { fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") } else { - fileout <- paste0(outfile, "_ft", - attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", - attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } } base_args$mask <- mask_i base_args$dots <- dots_i diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 87baaebe..2df06e8b 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -208,13 +208,17 @@ plot_metrics <- function(recipe, data_cube, metrics, ## time_bounds in data_cube to know if Time_aggregation was applied if (!is.null(attributes(data_cube$attrs$time_bounds))) { info(recipe$Run$logger, "Using plotting attrs from time_bounds.") - titles <- unlist( + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + titles <- unlist( lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), function(x) { paste("Forecast time", attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], "to", attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) + } else { + titles <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]] + } } else { titles <- as.vector(months) } @@ -291,22 +295,31 @@ plot_metrics <- function(recipe, data_cube, metrics, "\n", display_name, "/", months[i], "/", hcst_period) } else { - forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] - forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) - # title names: - forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) - forecast_time_ini <- month.name[forecast_time_ini] - forecast_time_end <- init_month + forecast_time_end - 1 - forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) - forecast_time_end <- month.name[forecast_time_end] - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", forecast_time_ini, "to", - forecast_time_end, "/", - hcst_period) + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", + hcst_period) + } else { + forecast_time <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]][i] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", + forecast_time, "/", + hcst_period) + } } # Modify base arguments base_args[[1]] <- metric[i, , ] diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index ea0f2b90..5455f0f6 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -117,20 +117,24 @@ plot_most_likely_terciles <- function(recipe, fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) } else { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) @@ -211,20 +215,33 @@ plot_most_likely_terciles <- function(recipe, dots_i <- NULL } # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Most Likely Tercile / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], + " / Start date: ", + i_syear) + } # Plot if (is.null(attributes(fcst$attrs$time_bounds))) { fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") } else { - fileout <- paste0(outfile, "_ft", + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } } base_args$probs <- i_var_probs[i, , , ] diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 26312b34..fe0f2479 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -18,7 +18,7 @@ Analysis: hcst_start: 1990 hcst_end: 1993 # season: 'Annual' - ftime_min: 2 + ftime_min: 1 ftime_max: 24 Region: latmin: 10 #-90 @@ -29,29 +29,39 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) + Y1: !expr 1:12 Anomalies: compute: no cross_validation: save: Calibration: method: bias - save: 'all' + save: 'none' Skill: - metric: RPSS Corr - save: 'all' + metric: RPSS EnsCorr Mean_Bias + save: 'none' Probabilities: percentiles: [[1/3, 2/3]] - save: 'all' + save: 'none' Indicators: index: FALSE Visualization: plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles - ncores: # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/nperez/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/recipes/examples/recipe_prlr_seasonal_timeagg.yml b/recipes/examples/recipe_prlr_seasonal_timeagg.yml new file mode 100644 index 00000000..e03428ac --- /dev/null +++ b/recipes/examples/recipe_prlr_seasonal_timeagg.yml @@ -0,0 +1,63 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + units: mm + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index c571a3b8..c545542a 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -28,7 +28,7 @@ prepare_outputs <- function(recipe_file, # recipe_file: path to recipe YAML file # disable_checks: If TRUE, does not perform checks on recipe # disable_uniqueID: If TRUE, does not add a unique ID to output dir - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R index fb26cb11..3cb11e7e 100644 --- a/tools/read_atomic_recipe.R +++ b/tools/read_atomic_recipe.R @@ -23,7 +23,7 @@ read_atomic_recipe <- function(recipe_file) { # recipe_file: path to recipe YAML file - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) # Create log file for atomic recipe -- GitLab From 7857d1adfa4ff54727178a5dc69741733ce45512 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 16:38:01 +0200 Subject: [PATCH 07/53] test saving --- recipes/atomic_recipes/recipe_decadal.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index fe0f2479..2028fc46 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -44,13 +44,13 @@ Analysis: save: Calibration: method: bias - save: 'none' + save: 'all' Skill: metric: RPSS EnsCorr Mean_Bias - save: 'none' + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] - save: 'none' + save: 'all' Indicators: index: FALSE Visualization: -- GitLab From 9863ac0850e01eba8c4725f58e4cee0ac90a6f2c Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 14 May 2024 17:46:01 +0200 Subject: [PATCH 08/53] fix dimensions order --- modules/Aggregation/Aggregation.R | 6 + .../recipe-seasonal_monthly_1_timeagg.yml | 61 ++++++ .../testthat/test-seasonal_monthly_timeagg.R | 177 ++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 tests/recipes/recipe-seasonal_monthly_1_timeagg.yml create mode 100644 tests/testthat/test-seasonal_monthly_timeagg.R diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index cb23a0dd..5eab4aff 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -71,6 +71,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { end <- unlist(lapply(indices, function(x){x[length(x)]})) plotting_attr <- list(names(indices)) } + original_dims <- names(dim(x[[1]]$data)) if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { @@ -86,6 +87,11 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { } else { stop("Unknown method") } + # Check in case only 1 aggregation only is requested to add time dim back: + if (!('time' %in% names(dim(data[[1]]$data)))) { + dim(x[[1]]$data) <- c( dim(x[[1]]$data), time = 1) + } + x[[1]]$data <- Reorder(x[[1]]$data, original_dims) x[[1]]$dims <- dim(x[[1]]$data) x[[1]]$coords$time <- 1:length(ini) attr(x[[1]]$coords$time, "indices") <- TRUE diff --git a/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml new file mode 100644 index 00000000..0cd5746f --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml @@ -0,0 +1,61 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 4 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Time_aggregation: + execute: yes #do we need this? + method: average #accumulated + ini: [1, 2] # aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + end: [3, 4] + + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: Robinson + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/tests/testthat/test-seasonal_monthly_timeagg.R b/tests/testthat/test-seasonal_monthly_timeagg.R new file mode 100644 index 00000000..2b08d388 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_timeagg.R @@ -0,0 +1,177 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_timeagg.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Aggregated data +suppressWarnings({invisible(capture.output( +aggregated_data <- Aggregation(recipe, data) +))}) + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- Calibration(recipe, aggregated_data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, calibrated_data) +))}) + +suppressWarnings({invisible(capture.output( +probs <- Probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +Saving(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Time Aggregation", { +expect_equal( +is.list(aggregated_data), +TRUE +) +expect_equal( +names(aggregated_data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(aggregated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(aggregated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(aggregated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 2, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(aggregated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 2, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +mean(aggregated_data$fcst$data), +291.6194, +tolerance = 0.0001 +) +expect_equal( +mean(aggregated_data$hcst$data), +290.6241, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(aggregated_data$hcst$data)[1, , 2, 3, 4]), +c(290.5897, 290.2554), +tolerance = 0.0001 +) + +expect_equal( +range(aggregated_data$fcst$data), +c(287.4032, 296.3530), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", + "enscorr_specs") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(var = 1, time = 2, latitude = 3, longitude = 3) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(skill_metrics$rpss[, , 2, 3]), +c(-1.384229, -1.147657), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$rpss_significance[, , 2, 3]), +rep(FALSE, 2) +) + +}) + +test_that("4. Saving", { +outputs <- paste0(recipe$Run$output_dir, "/outputs/") +expect_equal( +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc")), +TRUE +) +expect_equal( +length(list.files(outputs, recursive = T)), +17 +) + +}) + +test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("crpss-november.pdf", "enscorr_specs-november.pdf", "enscorr-november.pdf", + "forecast_ensemble_mean-20201101.pdf", "forecast_most_likely_tercile-20201101.pdf", + "rpss-november.pdf")), +TRUE +) +expect_equal( +length(list.files(plots, recursive = T)), +6 +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) -- GitLab From 616121f3710422f905aa4d78ee36d1b1522b2f88 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 May 2024 12:48:58 +0200 Subject: [PATCH 09/53] test time_bounds attributes --- .../testthat/test-seasonal_monthly_timeagg.R | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/testthat/test-seasonal_monthly_timeagg.R b/tests/testthat/test-seasonal_monthly_timeagg.R index 2b08d388..21977d9c 100644 --- a/tests/testthat/test-seasonal_monthly_timeagg.R +++ b/tests/testthat/test-seasonal_monthly_timeagg.R @@ -57,6 +57,38 @@ is.list(aggregated_data), TRUE ) expect_equal( +"time_bounds" %in% names(aggregated_data$hcst$attrs), +TRUE +) +expect_equal( +is.list(aggregated_data$hcst$attrs$time_bounds), +TRUE +) +expect_equal( +names(aggregated_data$hcst$attrs$time_bounds), +c("start", "end") +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$time_bounds$end) +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$Dates) +) +expect_equal( +"plotting_attr" %in% names(attributes(aggregated_data$hcst$attrs$time_bounds)), +TRUE +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$ini_ftime, +c(1,2) +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$end_ftime, +c(3,4) +) +expect_equal( names(aggregated_data), c("hcst", "fcst", "obs") ) -- GitLab From 6472d20fe272fd59c4b6a00d86723b84284ecb33 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 May 2024 17:52:56 +0200 Subject: [PATCH 10/53] check recipe --- tools/check_recipe.R | 73 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index be38ae71..68637281 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -301,7 +301,78 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- - + # Time_Aggregation + if ("Time_aggregation" %in% names(recipe$Analysis$Workflow)) { + if (!("execute" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + recipe$Analysis$Workflow$Time_aggregation$execute <- FALSE + } else { + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + if (!("method" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + "No Time_aggregation method defined in recipe.") + } else { + if (!tolower(recipe$Analysis$Workflow$Time_aggregation$method) %in% + c('average', 'accumulation')) { + error(recipe$Run$logger, + "Time_aggregation method should be either average or accumulation.") + } + } + if ((any(!c('ini','end') %in% names(recipe$Analysis$Workflow$Time_aggregation)) && + !'user_def' %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + paste("Missing parameters in Time aggregation section", + "add 'ini' and 'end' or 'user_def'.")) + } + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + if (!is.numeric(recipe$Analysis$Workflow$Time_aggregation$ini) || + !is.numeric(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + "ini and end in Time aggregation need to be numeric") + } else { + if (length(recipe$Analysis$Workflow$Time_aggregation$ini) != + length(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + paste("The numeber of ini and end parameters need to be", + "the same for Time aggregation")) + } + max_ind <- max(c(recipe$Analysis$Workflow$Time_aggregation$ini, + recipe$Analysis$Workflow$Time_aggregation$end)) + if (recipe$Analysis$Time$ftime_max < max_ind) { + error(recipe$Run$logger, + paste("Forecast time max required is smaller than indices", + "requested for time aggregation")) + } + } + } + if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if (c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL + warn(recipe$Run$logger, + "Only 'ini' and 'end' indices are used for time agg.") + } + if (is.list(recipe$Analysis$Workflow$Time_aggregation$user_def)) { + if (is.character(names(recipe$Analysis$Workflow$Time_aggregation$user_def))) { + if (!all(lapply(recipe$Analysis$Workflow$Time_aggregation$user_def, + is.numeric))) { + error(recipe$Run$logger, + "User_def indices in Time aggregation are expected", + "to be numeric.") + } + } else { + warn(recipe$Run$logger, + paste("Not names provided to the list of user_def indices", + "Time aggregation, review plots and file names.")) + } + } else { + error(recipe$Run$logger, + "user_def in Time aggregation should be a named list") + } + } + } + } + } # Calibration # If 'method' is FALSE/no/'none' or NULL, set to 'raw' ## TODO: Review this check -- GitLab From 769cff64918dda33bb52e2c231b64f0010cb70e8 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 May 2024 12:17:43 +0200 Subject: [PATCH 11/53] fix if condition --- tools/check_recipe.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 68637281..0a412b61 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -346,8 +346,8 @@ check_recipe <- function(recipe) { } } if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { - if (c('ini', 'end') %in% - tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL warn(recipe$Run$logger, "Only 'ini' and 'end' indices are used for time agg.") -- GitLab From 8921318624fe2e6d8f4128bb0ce7928a36414fb8 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 28 May 2024 12:45:27 +0200 Subject: [PATCH 12/53] split cross --- .gitignore | 1 + Skill.R | 153 ++++++++++++++++ crossval.R | 150 +++++++++++++++ full_ecvs_mask.R | 435 ++++++++++++++++++++++++++++++++++++++++++++ full_ecvs_oper.R | 294 ++++++++++++++++++++++++++++++ recipe_tas_oper.yml | 98 ++++++++++ 6 files changed, 1131 insertions(+) create mode 100644 Skill.R create mode 100644 crossval.R create mode 100644 full_ecvs_mask.R create mode 100644 full_ecvs_oper.R create mode 100644 recipe_tas_oper.yml diff --git a/.gitignore b/.gitignore index 263c4e64..f05c28f4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ out-logs/ *.swp *.swo ecsbatch.log* +*.nc modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml diff --git a/Skill.R b/Skill.R new file mode 100644 index 00000000..2c1f1d6f --- /dev/null +++ b/Skill.R @@ -0,0 +1,153 @@ +fair <- TRUE + +## START SKILL ASSESSMENT: +# RPS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, + ncores = recipe$Analysis$ncores) +cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_cal_obs_tr, + ncores = recipe$Analysis$ncores) +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 +# RPSS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', Fair = fair, nmemb = nmemb, + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, + sdate_dim = 'syear', memb_dim = 'ensemble') +RPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + Fair = fair + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +# Corr +source("modules/Skill/R/tmp/Corr.R") +enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = sdate_dim)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + +# RMSE +rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + +skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + diff --git a/crossval.R b/crossval.R new file mode 100644 index 00000000..4a725457 --- /dev/null +++ b/crossval.R @@ -0,0 +1,150 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages + +## data dimensions +sdate_dim <- dim(data$hcst$data)['syear'] +nmemb <- dim(data$hcst$data)['ensemble'] +nftime <- dim(data$hcst$data)['time'] + +cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) +outdim <- length(cross) # leave-one-out should be equal to sdate_dim + +# What we need to return? + +if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] +} else if ('region' %in% names(dim(data$hcst$data))) { + nregions <- dim(data$hcst$data)['region'] +} +ano_hcst_ev_res <- NULL +ano_obs_ev_res <- NULL +ano_obs_tr_res <- NULL + +cal_hcst_ev_res <- NULL +cal_hcst_tr_res <- NULL +obs_ev_res <- NULL +obs_tr_res <- NULL + +lims_ano_hcst_tr_res <- NULL +lims_ano_obs_tr_res <- NULL + +lims_cal_hcst_tr_res <- NULL +lims_cal_obs_tr_res <- NULL + +for (t in 1:outdim) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + if (anomalies_comp) { + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = recipe$Analysis$ncores) + #rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", "hcst_ev") + #Category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + lims_ano_hcst_tr_res <- abind(lims_ano_hcst_tr_res, lims_ano_hcst_tr, + along = length(dim(lims_ano_hcst_tr)) + 1) + lims_ano_obs_tr_res <- abind(lims_ano_obs_tr_res, lims_ano_obs_tr, + along = length(dim(lims_ano_obs_tr)) + 1) + } + if (calibraion_comp) { + cal_hcst_tr <- Calibration(hcst_tr, obs_tr, exp_cor = NULL, + cal.method = "mse_min", eval.method = "in-sample", + multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'ensemble', sdate_dim = 'syear', + dat_dim = NULL, + ncores = recipe$Analysis$ncores) + cal_hcst_ev <- Calibration(hcst_tr, obs_tr, exp_cor = hcst_ev, + cal.method = "mse_min", + eval.method = "hindcast-vs-forecast", + multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'ensemble', sdate_dim = 'syear', + dat_dim = NULL, + ncores = recipe$Analysis$ncores) + #Category limits + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + lims_cal_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + cal_hcst_tr <- Subset(cal_hcst_tr, indices = 1, along = 'syear', drop = 'selected') + cal_hcst_tr_res <- abind(cal_hcst_tr_res, cal_hcst_tr, + along = length(dim(cal_hcst_tr)) + 1) + names(dim(cal_hcst_tr_res)) <- c(names(dim(cal_hcst_tr)), names(sdate_dim)) + cal_hcst_ev <- Subset(cal_hcst_ev, indices = 1, along = 'syear', drop = 'selected') + cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, + along = length(dim(cal_hcst_ev)) + 1) + names(dim(cal_hcst_ev_res)) <- c(names(dim(cal_hcst_ev)), names(sdate_dim)) + + obs_tr_res <- abind(obs_tr_res, obs_tr, + along = length(dim(obs_tr)) + 1) + names(dim(obs_tr_res)) <- c(names(dim(obs_tr)), names(sdate_dim)) + lims_cal_hcst_tr_res <- abind(lims_cal_hcst_tr_res, lims_cal_hcst_tr, + along = length(dim(lims_cal_hcst_tr)) + 1) + names(dim(lims_cal_hcst_tr_res)) <- c(names(dim(lims_cal_hcst_tr)), names(sdate_dim)) + lims_cal_obs_tr <- Subset(lims_cal_obs_tr, indices = 1, along = 'ensemble', drop = 'selected') + lims_cal_obs_tr_res <- abind(lims_cal_obs_tr_res, lims_cal_obs_tr, + along = length(dim(lims_cal_obs_tr)) + 1) + names(dim(lims_cal_obs_tr_res)) <- c(names(dim(lims_cal_obs_tr)), names(sdate_dim)) + } + gc() + res <- list(ano_hcst_ev = ano_hcst_ev_res, + ano_obs_ev = ano_obs_ev_res, + ano_obs_tr = ano_obs_tr_res, #reference forecast for the CRPSS + lims_ano_hcst_tr = lims_ano_hcst_tr_res, + lims_ano_obs_tr = lims_ano_obs_tr_res, + cal_hcst_tr = cal_hcst_tr_res, + cal_hcst_ev = cal_hcst_ev_res, + obs_tr = obs_tr_res, + lims_cal_hcst_tr = lims_cal_hcst_tr_res, + lims_cal_obs_tr = lims_cal_obs_tr_res ) +} +info(recipe$Run$logger, + paste0("Cross-validation loop ended, returning elements:", + paste(names(res), collapse = " "))) + + + + diff --git a/full_ecvs_mask.R b/full_ecvs_mask.R new file mode 100644 index 00000000..45d7bb50 --- /dev/null +++ b/full_ecvs_mask.R @@ -0,0 +1,435 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_oper.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + +#file <- "/esarchive/scratch/nperez/NUTS3_ERA5Land-62_56E-22_81N.nc" +#file <- "/esarchive/scratch/nperez/git4/sunset/NUTS3_ERA5_-62_56E-22_81N.nc" +#mask <- nc_open(file) +#lat <- ncatt_get(mask, 'lat') +#lon <- ncatt_get(mask, 'lon') +#mask <- ncget_var(mask) + +dev.new() +source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") +bias <- Bias(exp = data$hcst$data, obs = data$obs$data, + memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) +PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "singnif") +bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, + memb_dim = 'ensemble', time_dim = 'syear') +dev.new() +PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "original") + + + +dev.new() +source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") +bias <- Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, + memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) +PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "singnif") +bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, + memb_dim = 'ensemble', time_dim = 'syear') +dev.new() +PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, + lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], + filled.c = F, toptitle = "original") + + + +source("/esarchive/scratch/nperez/git/Flor/NUTS3.R") +mask <- test +mask_lat <- lats +mask_lon <-lons + +# Region to subset +range(data$hcst$coords$longitude) +range(data$hcst$coords$latitude) + +mask <- SelBox(mask, lon = mask_lon, lat = mask_lat, + region = c(-20, 40, 20, 80)) +mask_lat <- mask$lat +mask_lon <- mask$lon +mask <- mask$data +all(data$hcst$coord$latitude == mask_lat) +all(data$hcst$coord$longitude == mask_lon) + +polys <- array(unique(as.vector(mask))[-1], + c(poly = length(unique(as.vector(mask))[-1]))) +## missing polys: +missing <- which(!(1:length(ids) %in% polys)) +tas_polys <- Apply(list(data$hcst$data, mask, polys), + target_dims = list(c('latitude', 'longitude'), + c('lat', 'lon'), NULL), + function(x, y, pol) { + mean(x[which(y == pol)], na.rm = TRUE)}, + ncores = 6)$output1 +data$hcst$data <- tas_polys +data$hcst$coord <- data$hcst$coord[-c(7,8)] +data$hcst$coord <- append(data$hcst$coord, list(polygons = polys)) + +polys_obs <- Apply(list(data$obs$data, mask, polys), + target_dims = list(c('latitude', 'longitude'), + c('lat', 'lon'), NULL), + function(x, y, pol) { + mean(x[which(y == pol)], na.rm = TRUE)}, + ncores = 4)$output1 +data$obs$data <- polys_obs +data$obs$coord <- data$obs$coord[-c(7,8)] +data$obs$coord <- append(data$obs$coord, list(polygons = polys)) + + +polys_fcst <- Apply(list(data$fcst$data, mask, polys), + target_dims = list(c('latitude', 'longitude'), + c('lat', 'lon'), NULL), + function(x, y, pol) { + mean(x[which(y == pol)], na.rm = TRUE)}, + ncores = 4)$output1 +data$fcst$data <- polys_fcst +data$fcst$coord <- data$fcst$coord[-c(7,8)] +data$fcst$coord <- append(data$fcst$coord, list(polygons = polys)) + + testp <- Subset(MeanDims(data$fcst$data, 'ensemble'), along = 'time', + indices = 1, drop = 'all') +data_pol <- data.frame(data = testp, NUTS_ID = ids[-missing]) +data_pol <- rbind(data_pol, data.frame(data =rep(0, length(missing)), NUTS_ID = ids[missing])) +shp <- rgdal::readOGR(shp_file) +shp <- subset(shp, LEVL_CODE == 2) + shp_data <- merge(shp, data_pol, by.x="NUTS_ID", by.y="NUTS_ID") + shp_data@data$id <- rownames(shp_data@data) + df.points <- fortify(shp_data, region = "id") + df.df <- plyr::join(df.points, shp_data@data, by = "id") + +gusa <- map_data("world") +plot_poly <- ggplot(data = df.df) + + geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + + coord_map(projection = "stereographic", + xlim = c(-15, 40), + ylim = c(35, 75)) + + geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-16, 22, 2)), + group = group), color = "grey", size = 0.01) + + geom_polygon(aes(x = long, y = lat, group = group), fill = NA, + color = "grey", size = 0.01, data = shp) + + theme_bw() + + scale_fill_manual(values = c(rev(brewer.pal(9, 'Blues')), + brewer.pal(9, 'Reds'), "black"), + drop = FALSE, name = 'tas(ºC)') + + theme(panel.background = element_rect(fill = 'azure'), + text = element_text(family = "Times")) + # Colour of ocean + xlab('Longitude') + ylab('Latitude') + + scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + + scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + + ggtitle("Forecast test") + +#reg <- SelBox(data$hcst$data, lon = as.vector(data$hcst$coords$longitude), +# lat = as.vector(data$hcst$coords$latitude), +# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), +# latdim = 'latitude', londim = 'longitude') +# +#data$hcst$data <- reg$data +#data$hcst$coords$longitude <- reg$lon +#data$hcst$coords$latitude <- reg$lat + +#reg <- SelBox(data$obs$data, lon = as.vector(data$obs$coords$longitude), +# lat = as.vector(data$obs$coords$latitude), +# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), +# latdim = 'latitude', londim = 'longitude') + +#data$obs$data <- reg$data +#data$obs$coords$longitude <- reg$lon +#data$obs$coords$latitude <- reg$lat + +# UKMO January 1993 is missing: +if (recipe$Analysis$Time$sdate == '0101') { + if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { + if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { + info(recipe$Run$logger, + "UKMO January 1993 not available") + ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end + ind <- (1:length(ind))[-which(ind == 1993)] + data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) + data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) + sdate_dim <- dim(data$hcst$data)['syear'] + } + } +} + +calibraion_comp <- TRUE +anomalies_comp <- FALSE +source("crossval.R") + +## Define FAIR option: +fair <- TRUE + +## START SKILL ASSESSMENT: +# RPS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, + ncores = recipe$Analysis$ncores) +cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_cal_obs_tr, + ncores = recipe$Analysis$ncores) +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 +# RPSS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', Fair = fair, nmemb = nmemb, + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, + sdate_dim = 'syear', memb_dim = 'ensemble') + + +shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" +shp <- sf::st_read(shp_file) # class sf +ids <- subset(shp, shp$LEVL_CODE == 3) + + +###### +library(sf) +library(ggplot2) +#library(ggplot2, lib.loc = .libPaths()[2]) +library(RColorBrewer) +library(rgeos) + +gusa <- map_data("world") + +pals <- brewer.pal(9, 'Reds')[3:9] +pals <- c("#ffffb3",pals,'#525252') + +brks <- 0:5 +brks <- seq(258, 274, 2) + +plot_poly <- list() +polys_data <- NULL + + +## Now fill each polygon with the mean value + data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), + LAU_ID = polys) + shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") + +# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame + shp_data@data$id <- rownames(shp_data@data) + df.points <- fortify(shp_data, region = "id") + df.df <- plyr::join(df.points, shp_data@data, by = "id") +ano <- df.df +ano$data <- ano$data - fmi_clim_m$data + +ggplot(data = ano) + #df.df) + + geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + + coord_map(projection = "stereographic", + xlim = c(lon_min, lon_max), + ylim = c(lat_min, lat_max)) + + # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), + geom_polygon(aes(x = long, y = lat, fill = cut(data, + breaks = seq(0, 5, 0.5)), + group = group), color = "grey",size = 0.08) + + geom_polygon(aes(x = long, y = lat, group = group), fill = NA, + color = "grey", size = 0.5, data = shp_herding) + + theme_bw() + + scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), + 'white', brewer.pal(9, 'Reds'), "black"), + drop = FALSE, name = 'tas(ºC)') + + theme(panel.background = element_rect(fill = 'azure'), + text = element_text(family = "Times")) + # Colour of ocean + xlab('Longitude') + ylab('Latitude') + + scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + + scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + + ggtitle(paste(month.name[mes], year))# + + +###### +#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, +# lat = data$hcst$coords$latitude, filled.c = F, +# fileout = "test.png") +# CRPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + Fair = fair + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + + +# Corr +source("modules/Skill/R/tmp/Corr.R") +enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = sdate_dim)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 +#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, +# lon = data$hcst$coords$longitude, +# dots = mean_bias_sign[1,1,1,1,1,,,1]) + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + +# RMSE +rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + +#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, +# drop = "selected") + +#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', +# fcst = res$ano_hcst_ev, +# obs = obs_noensdim, +# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), +# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), +# na.rm = FALSE, +# ncpus = recipe$Analysis$ncores) +if (any(is.na(rpss$sing))) { + info(recipe$Run$logger, + "RPSS NA") + + rpss$sing[is.na(rpss$sign)] <- FALSE +} +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + +skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +if (data$hcst$coords$longitude[1] != 0) { + skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +} + info(recipe$Run$logger, + paste("lons:", data$hcst$coords$longitude)) + info(recipe$Run$logger, + paste("lons:", data$obs$coords$longitude)) + + +data$hcst$coords$longitude <- -179:180 + +Visualization(recipe, data, skill_metrics, significance = TRUE) + +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + + diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R new file mode 100644 index 00000000..ba65dbd5 --- /dev/null +++ b/full_ecvs_oper.R @@ -0,0 +1,294 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_oper.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + +# UKMO January 1993 is missing: +if (recipe$Analysis$Time$sdate == '0101') { + if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { + if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { + info(recipe$Run$logger, + "UKMO January 1993 not available") + ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end + ind <- (1:length(ind))[-which(ind == 1993)] + data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) + data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) + sdate_dim <- dim(data$hcst$data)['syear'] + } + } +} + +calibraion_comp <- TRUE +anomalies_comp <- FALSE +source("crossval.R") + +## Define FAIR option: +fair <- TRUE + +## START SKILL ASSESSMENT: +# RPS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, + ncores = recipe$Analysis$ncores) +cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_cal_obs_tr, + ncores = recipe$Analysis$ncores) +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 +# RPSS +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', Fair = fair, nmemb = nmemb, + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, + sdate_dim = 'syear', memb_dim = 'ensemble') + + +shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" +shp <- sf::st_read(shp_file) # class sf +ids <- subset(shp, shp$LEVL_CODE == 3) + + +###### +library(sf) +library(ggplot2) +#library(ggplot2, lib.loc = .libPaths()[2]) +library(RColorBrewer) +library(rgeos) + +gusa <- map_data("world") + +pals <- brewer.pal(9, 'Reds')[3:9] +pals <- c("#ffffb3",pals,'#525252') + +brks <- 0:5 +brks <- seq(258, 274, 2) + +plot_poly <- list() +polys_data <- NULL + + +## Now fill each polygon with the mean value + data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), + LAU_ID = polys) + shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") + +# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame + shp_data@data$id <- rownames(shp_data@data) + df.points <- fortify(shp_data, region = "id") + df.df <- plyr::join(df.points, shp_data@data, by = "id") +ano <- df.df +ano$data <- ano$data - fmi_clim_m$data + +ggplot(data = ano) + #df.df) + + geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + + coord_map(projection = "stereographic", + xlim = c(lon_min, lon_max), + ylim = c(lat_min, lat_max)) + + # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), + geom_polygon(aes(x = long, y = lat, fill = cut(data, + breaks = seq(0, 5, 0.5)), + group = group), color = "grey",size = 0.08) + + geom_polygon(aes(x = long, y = lat, group = group), fill = NA, + color = "grey", size = 0.5, data = shp_herding) + + theme_bw() + + scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), + 'white', brewer.pal(9, 'Reds'), "black"), + drop = FALSE, name = 'tas(ºC)') + + theme(panel.background = element_rect(fill = 'azure'), + text = element_text(family = "Times")) + # Colour of ocean + xlab('Longitude') + ylab('Latitude') + + scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + + scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + + ggtitle(paste(month.name[mes], year))# + + +###### +#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, +# lat = data$hcst$coords$latitude, filled.c = F, +# fileout = "test.png") +# CRPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + Fair = fair + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + + +# Corr +source("modules/Skill/R/tmp/Corr.R") +enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = sdate_dim)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 +#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, +# lon = data$hcst$coords$longitude, +# dots = mean_bias_sign[1,1,1,1,1,,,1]) + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + +# RMSE +rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + +#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, +# drop = "selected") + +#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', +# fcst = res$ano_hcst_ev, +# obs = obs_noensdim, +# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), +# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), +# na.rm = FALSE, +# ncpus = recipe$Analysis$ncores) +if (any(is.na(rpss$sing))) { + info(recipe$Run$logger, + "RPSS NA") + + rpss$sing[is.na(rpss$sign)] <- FALSE +} +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + +skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +if (data$hcst$coords$longitude[1] != 0) { + skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +} + info(recipe$Run$logger, + paste("lons:", data$hcst$coords$longitude)) + info(recipe$Run$logger, + paste("lons:", data$obs$coords$longitude)) + + +data$hcst$coords$longitude <- -179:180 + +Visualization(recipe, data, skill_metrics, significance = TRUE) + +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + + diff --git a/recipe_tas_oper.yml b/recipe_tas_oper.yml new file mode 100644 index 00000000..b7b7e0f6 --- /dev/null +++ b/recipe_tas_oper.yml @@ -0,0 +1,98 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + name: ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -20 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Calibration: + method: envos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git4/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + -- GitLab From e6ecffe7c6e9fa1fb5bc3312e3347311fca2b310 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 28 May 2024 18:26:54 +0200 Subject: [PATCH 13/53] fix pipeline and improve crossval --- crossval.R | 309 ++++++++++++++++++++++++++----------------- full_ecvs_oper.R | 3 +- recipe_tas_oper.yml | 10 +- tools/check_recipe.R | 1 - 4 files changed, 195 insertions(+), 128 deletions(-) diff --git a/crossval.R b/crossval.R index 4a725457..875b2a9c 100644 --- a/crossval.R +++ b/crossval.R @@ -1,52 +1,66 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages -## data dimensions -sdate_dim <- dim(data$hcst$data)['syear'] -nmemb <- dim(data$hcst$data)['ensemble'] -nftime <- dim(data$hcst$data)['time'] - -cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) -outdim <- length(cross) # leave-one-out should be equal to sdate_dim - -# What we need to return? - -if ('latitude' %in% names(dim(data$hcst$data))) { - nlats <- dim(data$hcst$data)['latitude'] - nlons <- dim(data$hcst$data)['longitude'] -} else if ('region' %in% names(dim(data$hcst$data))) { - nregions <- dim(data$hcst$data)['region'] -} -ano_hcst_ev_res <- NULL -ano_obs_ev_res <- NULL -ano_obs_tr_res <- NULL +full_crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$Skill$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # TODO a loop for pair of percentiles + probs <- recipe$Analysis$Workflow$Probabilities$percentiles + cal.method <- recipe$Analysis$Workflow$Calibration$method -cal_hcst_ev_res <- NULL -cal_hcst_tr_res <- NULL -obs_ev_res <- NULL -obs_tr_res <- NULL + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + nmemb <- dim(data$hcst$data)['ensemble'] + nftime <- dim(data$hcst$data)['time'] + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'latitude', 'longitude', 'unneeded', 'syear') + } else if ('region' %in% names(dim(data$hcst$data))) { + nregions <- dim(data$hcst$data)['region'] + ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', + 'region', 'ensemble', 'syear') + tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'region', 'unneeded', 'syear') + } -lims_ano_hcst_tr_res <- NULL -lims_ano_obs_tr_res <- NULL + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + outdim <- length(cross) # leave-one-out should be equal to sdate_dim -lims_cal_hcst_tr_res <- NULL -lims_cal_obs_tr_res <- NULL + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(probs, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(probs, function(X) {NULL}) + hcst_probs_ev <- lapply(probs, function(x){NULL}) + obs_probs_ev <- lapply(probs, function(x){NULL}) -for (t in 1:outdim) { - info(recipe$Run$logger, paste("crossval:", t)) + for (t in 1:outdim) { + info(recipe$Run$logger, paste("crossval:", t)) - # subset years: Subset works at BSC not at Athos - ## training indices - obs_tr <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$train.dexes) - hcst_tr <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$train.dexes) - ## evaluation indices - hcst_ev <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - obs_ev <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - if (anomalies_comp) { + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') # compute climatology: clim_obs_tr <- MeanDims(obs_tr, 'syear') clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) @@ -59,92 +73,147 @@ for (t in 1:outdim) { ncores = recipe$Analysis$ncores) ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, ncores = recipe$Analysis$ncores) - #rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", "hcst_ev") - #Category limits + # compute category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + ps <- sapply(ps, function (x) eval(parse(text = x))) + quantile(as.vector(x), ps, na.rm = TRUE)})}, + output_dims = lapply(probs, function(x) {'cat'}), + prob_lims = probs, + ncores = recipe$Analysis$ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + ps <- sapply(ps, function (x) eval(parse(text = x))) + quantile(as.vector(x), ps, na.rm = TRUE)})}, + output_dims = lapply(probs, function(x){'cat'}), + prob_lims = probs, + ncores = recipe$Analysis$ncores) + #store results ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, - along = length(dim(ano_hcst_ev)) + 1) + along = length(dim(ano_hcst_ev)) + 1) ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, - along = length(dim(ano_obs_ev)) + 1) + along = length(dim(ano_obs_ev)) + 1) ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, - along = length(dim(ano_obs_tr)) + 1) - lims_ano_hcst_tr_res <- abind(lims_ano_hcst_tr_res, lims_ano_hcst_tr, - along = length(dim(lims_ano_hcst_tr)) + 1) - lims_ano_obs_tr_res <- abind(lims_ano_obs_tr_res, lims_ano_obs_tr, - along = length(dim(lims_ano_obs_tr)) + 1) + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(probs)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } } - if (calibraion_comp) { - cal_hcst_tr <- Calibration(hcst_tr, obs_tr, exp_cor = NULL, - cal.method = "mse_min", eval.method = "in-sample", - multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'ensemble', sdate_dim = 'syear', - dat_dim = NULL, - ncores = recipe$Analysis$ncores) - cal_hcst_ev <- Calibration(hcst_tr, obs_tr, exp_cor = hcst_ev, - cal.method = "mse_min", - eval.method = "hindcast-vs-forecast", - multi.model = FALSE, na.fill = TRUE, - na.rm = TRUE, apply_to = NULL, alpha = NULL, - memb_dim = 'ensemble', sdate_dim = 'syear', - dat_dim = NULL, - ncores = recipe$Analysis$ncores) - #Category limits - lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 - lims_cal_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), - fun = function(x) { - quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', - ncores = recipe$Analysis$ncores)$output1 - cal_hcst_tr <- Subset(cal_hcst_tr, indices = 1, along = 'syear', drop = 'selected') - cal_hcst_tr_res <- abind(cal_hcst_tr_res, cal_hcst_tr, - along = length(dim(cal_hcst_tr)) + 1) - names(dim(cal_hcst_tr_res)) <- c(names(dim(cal_hcst_tr)), names(sdate_dim)) - cal_hcst_ev <- Subset(cal_hcst_ev, indices = 1, along = 'syear', drop = 'selected') - cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, - along = length(dim(cal_hcst_ev)) + 1) - names(dim(cal_hcst_ev_res)) <- c(names(dim(cal_hcst_ev)), names(sdate_dim)) + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() - obs_tr_res <- abind(obs_tr_res, obs_tr, - along = length(dim(obs_tr)) + 1) - names(dim(obs_tr_res)) <- c(names(dim(obs_tr)), names(sdate_dim)) - lims_cal_hcst_tr_res <- abind(lims_cal_hcst_tr_res, lims_cal_hcst_tr, - along = length(dim(lims_cal_hcst_tr)) + 1) - names(dim(lims_cal_hcst_tr_res)) <- c(names(dim(lims_cal_hcst_tr)), names(sdate_dim)) - lims_cal_obs_tr <- Subset(lims_cal_obs_tr, indices = 1, along = 'ensemble', drop = 'selected') - lims_cal_obs_tr_res <- abind(lims_cal_obs_tr_res, lims_cal_obs_tr, - along = length(dim(lims_cal_obs_tr)) + 1) - names(dim(lims_cal_obs_tr_res)) <- c(names(dim(lims_cal_obs_tr)), names(sdate_dim)) + # add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + for(ps in 1:length(probs)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', 'extra', 'syear') + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'extra', indices = 1, drop = 'selected') } - gc() - res <- list(ano_hcst_ev = ano_hcst_ev_res, - ano_obs_ev = ano_obs_ev_res, - ano_obs_tr = ano_obs_tr_res, #reference forecast for the CRPSS - lims_ano_hcst_tr = lims_ano_hcst_tr_res, - lims_ano_obs_tr = lims_ano_obs_tr_res, - cal_hcst_tr = cal_hcst_tr_res, - cal_hcst_ev = cal_hcst_ev_res, - obs_tr = obs_tr_res, - lims_cal_hcst_tr = lims_cal_hcst_tr_res, - lims_cal_obs_tr = lims_cal_obs_tr_res ) + # Compute Probabilities + source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") + for (ps in 1:length(probs)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = recipe$Analysis$ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = recipe$Analysis$ncores) + } + # Convert to s2dv_cubes + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + if (is.null(data$fcst)) { + # Forecast anomalies: + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + } + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + result <- list() + all_names <- NULL + for (ps in 1:length(probs)) { + for (perc in 1:(length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1)) { +#ps <- 1 + if (perc == 1) { + name_elem <- paste0("below_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) + } else if (perc == length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1) { + name_elem <- paste0("above_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1]) + } else { + name_elem <- paste0("between_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1], + "_and_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) + } + result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + all_names <- c(all_names, name_elem) + } + } + names(result) <- all_names + if (!('var' %in% names(dim(result[[1]])))) { + result <- lapply(result, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = result, + data_cube = data$hcst, type = "hcst") + # TODO Forecast + probs_fcst <- NULL + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, type = "fcst") + } + } +# } + return(list(hcst = ano_hcst, obs = ano_obs_ev, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) } -info(recipe$Run$logger, - paste0("Cross-validation loop ended, returning elements:", - paste(names(res), collapse = " "))) - - - diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R index ba65dbd5..e2976eba 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_oper.R @@ -29,9 +29,8 @@ if (recipe$Analysis$Time$sdate == '0101') { } } -calibraion_comp <- TRUE -anomalies_comp <- FALSE source("crossval.R") +res <- full_crossval_anomalies(recipe = recipe, data = data) ## Define FAIR option: fair <- TRUE diff --git a/recipe_tas_oper.yml b/recipe_tas_oper.yml index b7b7e0f6..2b39add3 100644 --- a/recipe_tas_oper.yml +++ b/recipe_tas_oper.yml @@ -21,12 +21,12 @@ Analysis: hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months Region: latmin: 20 - latmax: 80 + latmax: 10 lonmin: -20 - lonmax: 40 + lonmax: 10 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: "to_reference" @@ -35,7 +35,7 @@ Analysis: Anomalies: compute: yes cross_validation: no - save: none + save: all Time_aggregation: execute: no Calibration: @@ -48,7 +48,7 @@ Analysis: cross_validation: yes Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. - save: none + save: all Indicators: index: no Visualization: diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 0b9919b8..245e4e3d 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -118,7 +118,6 @@ check_recipe <- function(recipe) { error_status <- TRUE } if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% -ls/check_recipe.R MULTIMODEL_METHODS) { error(recipe$Run$logger, paste("The specified approach for the multimodel is not valid.", -- GitLab From e5b87804ec019fcef397ab1a42396fb8644777aa Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 29 May 2024 18:56:42 +0200 Subject: [PATCH 14/53] cross val saved --- example_scripts/exec_units.R | 8 +- crossval.R => full_crossval_anomalies.R | 142 +++++++----- full_ecvs_oper.R | 273 +----------------------- recipe_tas_oper.yml | 2 +- skill_full_crossval.R | 147 +++++++++++++ 5 files changed, 249 insertions(+), 323 deletions(-) rename crossval.R => full_crossval_anomalies.R (65%) create mode 100644 skill_full_crossval.R diff --git a/example_scripts/exec_units.R b/example_scripts/exec_units.R index 819121c9..081038d2 100644 --- a/example_scripts/exec_units.R +++ b/example_scripts/exec_units.R @@ -21,16 +21,16 @@ source("tools/prepare_outputs.R") recipe <- prepare_outputs(recipe_file) # Load datasets -data <- load_datasets(recipe) +data <- Loading(recipe) # Units transformation source("modules/Units/Units.R") test <- Units(recipe, data) # Calibrate datasets -data <- calibrate_datasets(recipe, test) +data <- Calibration(recipe, test) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, data) +skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, data) +probabilities <- Probabilities(recipe, data) # Export all data to netCDF ## TODO: Fix plotting # save_data(recipe, data, skill_metrics, probabilities) diff --git a/crossval.R b/full_crossval_anomalies.R similarity index 65% rename from crossval.R rename to full_crossval_anomalies.R index 875b2a9c..649f841b 100644 --- a/crossval.R +++ b/full_crossval_anomalies.R @@ -1,5 +1,6 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") full_crossval_anomalies <- function(recipe, data) { cross.method <- recipe$Analysis$Skill$cross.method @@ -7,10 +8,15 @@ full_crossval_anomalies <- function(recipe, data) { if (is.null(cross.method)) { cross.method <- 'leave-one-out' } - # TODO a loop for pair of percentiles - probs <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) cal.method <- recipe$Analysis$Workflow$Calibration$method - + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## TODO remove if the recipe is checked: + na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] nmemb <- dim(data$hcst$data)['ensemble'] @@ -19,11 +25,13 @@ full_crossval_anomalies <- function(recipe, data) { if ('latitude' %in% names(dim(data$hcst$data))) { nlats <- dim(data$hcst$data)['latitude'] nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', 'latitude', 'longitude', 'ensemble', 'syear') tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', 'latitude', 'longitude', 'unneeded', 'syear') } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' nregions <- dim(data$hcst$data)['region'] ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', 'region', 'ensemble', 'syear') @@ -42,10 +50,12 @@ full_crossval_anomalies <- function(recipe, data) { ano_obs_ev_res <- NULL ano_obs_tr_res <- NULL # as long as probs requested in recipe: - lims_ano_hcst_tr_res <- lapply(probs, function(X) {NULL}) - lims_ano_obs_tr_res <- lapply(probs, function(X) {NULL}) - hcst_probs_ev <- lapply(probs, function(x){NULL}) - obs_probs_ev <- lapply(probs, function(x){NULL}) + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) for (t in 1:outdim) { info(recipe$Run$logger, paste("crossval:", t)) @@ -66,30 +76,28 @@ full_crossval_anomalies <- function(recipe, data) { clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) # compute anomalies: ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) # compute category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - ps <- sapply(ps, function (x) eval(parse(text = x))) - quantile(as.vector(x), ps, na.rm = TRUE)})}, - output_dims = lapply(probs, function(x) {'cat'}), - prob_lims = probs, - ncores = recipe$Analysis$ncores) + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - ps <- sapply(ps, function (x) eval(parse(text = x))) - quantile(as.vector(x), ps, na.rm = TRUE)})}, - output_dims = lapply(probs, function(x){'cat'}), - prob_lims = probs, - ncores = recipe$Analysis$ncores) + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) #store results ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, along = length(dim(ano_hcst_ev)) + 1) @@ -97,7 +105,7 @@ full_crossval_anomalies <- function(recipe, data) { along = length(dim(ano_obs_ev)) + 1) ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, along = length(dim(ano_obs_tr)) + 1) - for(ps in 1:length(probs)) { + for(ps in 1:length(categories)) { lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], @@ -108,11 +116,13 @@ full_crossval_anomalies <- function(recipe, data) { "#### Anomalies Cross-validation loop ended #####") gc() - # add dim names: + # Add dim names: names(dim(ano_hcst_ev_res)) <- ev_dim_names names(dim(ano_obs_ev_res)) <- ev_dim_names names(dim(ano_obs_tr_res)) <- tr_dim_names - for(ps in 1:length(probs)) { + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', 'time', 'latitude', 'longitude', 'syear') names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', @@ -120,37 +130,56 @@ full_crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'extra', indices = 1, drop = 'selected') } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + # Compute Probabilities - source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") - for (ps in 1:length(probs)) { + for (ps in 1:length(categories)) { hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', prob_thresholds = NULL, bin_dim_abs = 'cat', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], - ncores = recipe$Analysis$ncores) + ncores = ncores) obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, bin_dim_abs = 'cat', indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = lims_ano_obs_tr_res[[ps]], - ncores = recipe$Analysis$ncores) + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } } - # Convert to s2dv_cubes + # Convert to s2dv_cubes the resulting anomalies ano_hcst <- data$hcst ano_hcst$data <- ano_hcst_ev_res ano_obs <- data$obs ano_obs$data <- ano_obs_ev_res - if (is.null(data$fcst)) { - # Forecast anomalies: - clim_hcst <- Apply(data$hcst$data, - target_dims = c('syear', 'ensemble'), - mean, - na.rm = na.rm, - ncores = recipe$Analysis$ncores)$output1 - data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) - } + info(recipe$Run$logger, "#### Anomalies and Probabilities Done #####") if (recipe$Analysis$Workflow$Anomalies$save != 'none') { @@ -175,18 +204,19 @@ full_crossval_anomalies <- function(recipe, data) { # Save probability bins result <- list() all_names <- NULL - for (ps in 1:length(probs)) { - for (perc in 1:(length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1)) { + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { #ps <- 1 if (perc == 1) { - name_elem <- paste0("below_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) - } else if (perc == length(recipe$Analysis$Workflow$Probabilities$percentiles[[ps]]) + 1) { - name_elem <- paste0("above_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1]) + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) } else { - name_elem <- paste0("between_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc-1], - "_and_", recipe$Analysis$Workflow$Probabilities$percentiles[[ps]][perc]) + name_elem <- paste0("between_", categories[[ps]][perc-1], + "_and_", categories[[ps]][perc]) } - result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + result <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = ps, drop = 'all')), result) all_names <- c(all_names, name_elem) } } @@ -199,16 +229,28 @@ full_crossval_anomalies <- function(recipe, data) { if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = result, - data_cube = data$hcst, type = "hcst") + data_cube = data$hcst, agg = agg, + type = "hcst") # TODO Forecast probs_fcst <- NULL if (!is.null(probs_fcst)) { save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, type = "fcst") + data_cube = data$fcst, agg = agg, + type = "fcst") } } -# } - return(list(hcst = ano_hcst, obs = ano_obs_ev, fcst = data$fcst, + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, metrics = list(hcst_EM = hcst_EM), + data_cube = data$hcst, agg = agg, + module = "statistics") + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, metrics = list(fcst_EM = fcst_EM), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, obs_tr = lims_ano_obs_tr_res), diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R index e2976eba..00050965 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_oper.R @@ -14,278 +14,15 @@ data <- Units(recipe, data) data_summary(data$hcst, recipe) data_summary(data$obs, recipe) -# UKMO January 1993 is missing: -if (recipe$Analysis$Time$sdate == '0101') { - if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { - if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { - info(recipe$Run$logger, - "UKMO January 1993 not available") - ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end - ind <- (1:length(ind))[-which(ind == 1993)] - data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) - data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) - sdate_dim <- dim(data$hcst$data)['syear'] - } - } -} -source("crossval.R") +source("full_crossval_anomalies.R") res <- full_crossval_anomalies(recipe = recipe, data = data) -## Define FAIR option: -fair <- TRUE +source("skill_full_crossval.R") +skill_metrics <- skill_full_crossval(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) -## START SKILL ASSESSMENT: -# RPS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") -cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, - ncores = recipe$Analysis$ncores) -cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = res$lims_cal_obs_tr, - ncores = recipe$Analysis$ncores) -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) -source("modules/Skill/R/RPS_clim.R") -rps_clim <- Apply(list(ano_obs_probs_ev), - target_dims = c('probs', 'syear'), - RPS_clim, bin_dim_abs = 'probs', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 -# RPSS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'probs', Fair = fair, nmemb = nmemb, - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, - sdate_dim = 'syear', memb_dim = 'ensemble') - - -shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" -shp <- sf::st_read(shp_file) # class sf -ids <- subset(shp, shp$LEVL_CODE == 3) - - -###### -library(sf) -library(ggplot2) -#library(ggplot2, lib.loc = .libPaths()[2]) -library(RColorBrewer) -library(rgeos) - -gusa <- map_data("world") - -pals <- brewer.pal(9, 'Reds')[3:9] -pals <- c("#ffffb3",pals,'#525252') - -brks <- 0:5 -brks <- seq(258, 274, 2) - -plot_poly <- list() -polys_data <- NULL - - -## Now fill each polygon with the mean value - data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), - LAU_ID = polys) - shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") - -# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame - shp_data@data$id <- rownames(shp_data@data) - df.points <- fortify(shp_data, region = "id") - df.df <- plyr::join(df.points, shp_data@data, by = "id") -ano <- df.df -ano$data <- ano$data - fmi_clim_m$data - -ggplot(data = ano) + #df.df) + - geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + - coord_map(projection = "stereographic", - xlim = c(lon_min, lon_max), - ylim = c(lat_min, lat_max)) + - # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), - geom_polygon(aes(x = long, y = lat, fill = cut(data, - breaks = seq(0, 5, 0.5)), - group = group), color = "grey",size = 0.08) + - geom_polygon(aes(x = long, y = lat, group = group), fill = NA, - color = "grey", size = 0.5, data = shp_herding) + - theme_bw() + - scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), - 'white', brewer.pal(9, 'Reds'), "black"), - drop = FALSE, name = 'tas(ºC)') + - theme(panel.background = element_rect(fill = 'azure'), - text = element_text(family = "Times")) + # Colour of ocean - xlab('Longitude') + ylab('Latitude') + - scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + - scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + - ggtitle(paste(month.name[mes], year))# + - -###### -#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, -# lat = data$hcst$coords$latitude, filled.c = F, -# fileout = "test.png") -# CRPS -crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) -# Este no sé como se calcula????: -# Aquí no se puede porque estaría incluyendo información de los otros años -#source("modules/Skill/R/CRPS_clim.R") -# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí -# el resultado es igual a dentro del bucle. -crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'sample.syear', - Fair = fair - ncores = recipe$Analysis$ncores) - - -# CRPSS -ref <- res$ano_obs_tr -dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, - nftime, nlats, nlons, sdate_dim) -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - - -# Corr -source("modules/Skill/R/tmp/Corr.R") -enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) - -mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), - target_dims = list(c('syear', 'ensemble'), 'syear'), - fun = function(x,y) { - if (!(any(is.na(x)) || any(is.na(y)))) { - res <- t.test(x = y, - y = apply(x, 1, mean, na.rm = T), - alternative = "two.sided")$p.value - } else { - res <- NA - } - return(res)}, - ncores = sdate_dim)$output1 -mean_bias_sign <- mean_bias_sign <= 0.05 -#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, -# lon = data$hcst$coords$longitude, -# dots = mean_bias_sign[1,1,1,1,1,,,1]) - -# Spread error ratio -source("SprErr.R") -enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) -enssprerr_sign <- enssprerr$p.val -enssprerr_sign <- enssprerr_sign <= 0.05 -enssprerr <- enssprerr$ratio - -# RMSE -rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - -#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, -# drop = "selected") - -#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', -# fcst = res$ano_hcst_ev, -# obs = obs_noensdim, -# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), -# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), -# na.rm = FALSE, -# ncpus = recipe$Analysis$ncores) -if (any(is.na(rpss$sing))) { - info(recipe$Run$logger, - "RPSS NA") - - rpss$sing[is.na(rpss$sign)] <- FALSE -} -skill_metrics <- list(mean_bias = mean_bias, - mean_bias_significance = mean_bias_sign, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - -skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) -# Compute save metrics -source("modules/Saving/Saving.R") -#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - -recipe$Run$output_dir <- original - -source("modules/Visualization/Visualization.R") -if (data$hcst$coords$longitude[1] != 0) { - skill_metrics <- lapply(skill_metrics, function(x) { - Subset(x, along = 'longitude', indices = c(182:360, 1:181)) - }) -} - info(recipe$Run$logger, - paste("lons:", data$hcst$coords$longitude)) - info(recipe$Run$logger, - paste("lons:", data$obs$coords$longitude)) - - -data$hcst$coords$longitude <- -179:180 - -Visualization(recipe, data, skill_metrics, significance = TRUE) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) source("tools/add_logo.R") add_logo(recipe, "rsz_rsz_bsc_logo.png") diff --git a/recipe_tas_oper.yml b/recipe_tas_oper.yml index 2b39add3..34f82e63 100644 --- a/recipe_tas_oper.yml +++ b/recipe_tas_oper.yml @@ -39,7 +39,7 @@ Analysis: Time_aggregation: execute: no Calibration: - method: envos # Mandatory, str: Calibration method. See docu. + method: raw # Mandatory, str: Calibration method. See docu. cross_validation: yes save: none Skill: diff --git a/skill_full_crossval.R b/skill_full_crossval.R new file mode 100644 index 00000000..27ab47aa --- /dev/null +++ b/skill_full_crossval.R @@ -0,0 +1,147 @@ + +source("modules/Saving/Saving.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/RPS_clim.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") +source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/Corr.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-sigBias/R/Bias.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/SprErr.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/Eno.R") + +## data_crossval is the result from function full_crossval_anomalies or similar. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices +## the recipe could be used to read the Percentiles +## if fair is TRUE, the nmemb used to compute the probabilities is needed + ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference +skill_full_crossval <- function(recipe, data_crossval, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + ## START SKILL ASSESSMENT: + # RPS + rps <- RPS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) + rps_clim <- Apply(list(res$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 + # RPSS + rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', Fair = fair, nmemb = nmemb, + cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + # CRPS + crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) + crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) + + # CRPSS + crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + # Corr + enscorr <- Corr(res$hcst$data, res$obs$data, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + + # Mean Bias + mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = 0.05, + ncores = recipe$Analysis$ncores) + + # Spread error ratio + enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) + enssprerr_sign <- enssprerr$p.val + enssprerr_sign <- enssprerr_sign <= 0.05 + enssprerr <- enssprerr$ratio + + # RMSE + rms <- RMS(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + + # RMSS + rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, + ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, sign = TRUE, + ncores = recipe$Analysis$ncores) + + skill_metrics <- list(mean_bias = mean_bias$bias, + mean_bias_significance = mean_bias$sig, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms, rmsss = rmss$rmsss, rmsss_significance = rmss$sign) + original <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + + skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Save metrics + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + + recipe$Run$output_dir <- original + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(test, function(x){InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} + -- GitLab From e5d7e658f436f9982a6c46b21cc536c5d0e7ace2 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 30 May 2024 11:02:34 +0200 Subject: [PATCH 15/53] cleaning code --- full_crossval_anomalies.R | 72 ++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/full_crossval_anomalies.R b/full_crossval_anomalies.R index 649f841b..d2eced11 100644 --- a/full_crossval_anomalies.R +++ b/full_crossval_anomalies.R @@ -1,9 +1,9 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") - + full_crossval_anomalies <- function(recipe, data) { - cross.method <- recipe$Analysis$Skill$cross.method + cross.method <- recipe$Analysis$cross.method # TODO move check if (is.null(cross.method)) { cross.method <- 'leave-one-out' @@ -12,38 +12,34 @@ full_crossval_anomalies <- function(recipe, data) { categories <- lapply(categories, function (x) { sapply(x, function(y) { round(eval(parse(text = y)),2)})}) - cal.method <- recipe$Analysis$Workflow$Calibration$method ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs ## TODO remove if the recipe is checked: na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] - nmemb <- dim(data$hcst$data)['ensemble'] - nftime <- dim(data$hcst$data)['time'] + orig_dims <- dim(data$hcst$data) # spatial dims if ('latitude' %in% names(dim(data$hcst$data))) { nlats <- dim(data$hcst$data)['latitude'] nlons <- dim(data$hcst$data)['longitude'] agg = 'global' - ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', - 'latitude', 'longitude', 'ensemble', 'syear') - tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', - 'latitude', 'longitude', 'unneeded', 'syear') } else if ('region' %in% names(dim(data$hcst$data))) { agg = 'region' nregions <- dim(data$hcst$data)['region'] - ev_dim_names <- c('dat', 'var', 'sday', 'sweek', 'time', - 'region', 'ensemble', 'syear') - tr_dim_names <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', - 'region', 'unneeded', 'syear') } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(names(orig_dims) %in% 'syear')], + sdate_dim) + tr_dim_names <-c(orig_dims[-which(names(orig_dims) %in% 'syear')], + unneeded = 1, sdate_dim) # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, amt.points = sdate_dim, amt.points_cor = NULL) # k = ? - outdim <- length(cross) # leave-one-out should be equal to sdate_dim ## output objects ano_hcst_ev_res <- NULL @@ -57,7 +53,7 @@ full_crossval_anomalies <- function(recipe, data) { hcst_probs_ev <- lapply(categories, function(x){NULL}) obs_probs_ev <- lapply(categories, function(x){NULL}) - for (t in 1:outdim) { + for (t in 1:length(cross)) { info(recipe$Run$logger, paste("crossval:", t)) # subset years: Subset works at BSC not at Athos @@ -117,18 +113,19 @@ full_crossval_anomalies <- function(recipe, data) { gc() # Add dim names: - names(dim(ano_hcst_ev_res)) <- ev_dim_names - names(dim(ano_obs_ev_res)) <- ev_dim_names - names(dim(ano_obs_tr_res)) <- tr_dim_names + names(dim(ano_hcst_ev_res)) <- names(ev_dim_names) + names(dim(ano_obs_ev_res)) <- names(ev_dim_names) + names(dim(ano_obs_tr_res)) <- names(tr_dim_names) # To make crps_clim to work the reference forecast need to have same dims as obs: - ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') for(ps in 1:length(categories)) { - names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', - 'time', 'latitude', 'longitude', 'syear') - names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', 'sweek', - 'time', 'latitude', 'longitude', 'extra', 'syear') + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + names(orig_dims[-which(names(orig_dims) %in% 'ensemble')])) + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + names(tr_dim_names[-which(names(tr_dim_names) %in% 'ensemble')])) lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], - along = 'extra', indices = 1, drop = 'selected') + along = 'unneeded', indices = 1, drop = 'selected') } # Forecast anomalies: @@ -206,14 +203,13 @@ full_crossval_anomalies <- function(recipe, data) { all_names <- NULL for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { -#ps <- 1 if (perc == 1) { name_elem <- paste0("below_", categories[[ps]][perc]) } else if (perc == length(categories[[ps]]) + 1) { name_elem <- paste0("above_", categories[[ps]][perc-1]) } else { - name_elem <- paste0("between_", categories[[ps]][perc-1], - "_and_", categories[[ps]][perc]) + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) } result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) @@ -259,3 +255,25 @@ full_crossval_anomalies <- function(recipe, data) { ref_obs_tr = ano_obs_tr_res)) } + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + -- GitLab From bc457bea43820766105b1d78f320b311bfed2854 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 30 May 2024 15:11:53 +0200 Subject: [PATCH 16/53] fixes and logo --- full_crossval_anomalies.R | 24 ++++++++++++------------ full_ecvs_oper.R | 1 + rsz_rsz_bsc_logo.png | Bin 0 -> 48040 bytes skill_full_crossval.R | 3 ++- tools/add_logo.R | 11 ++++++----- 5 files changed, 21 insertions(+), 18 deletions(-) create mode 100644 rsz_rsz_bsc_logo.png diff --git a/full_crossval_anomalies.R b/full_crossval_anomalies.R index d2eced11..f65d86a7 100644 --- a/full_crossval_anomalies.R +++ b/full_crossval_anomalies.R @@ -18,7 +18,7 @@ full_crossval_anomalies <- function(recipe, data) { na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] - orig_dims <- dim(data$hcst$data) + orig_dims <- names(dim(data$hcst$data)) # spatial dims if ('latitude' %in% names(dim(data$hcst$data))) { nlats <- dim(data$hcst$data)['latitude'] @@ -31,11 +31,12 @@ full_crossval_anomalies <- function(recipe, data) { # output_dims from loop base on original dimensions ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', ## 'latitude', 'longitude', 'unneeded', 'syear' - ev_dim_names <- c(orig_dims[-which(names(orig_dims) %in% 'syear')], - sdate_dim) - tr_dim_names <-c(orig_dims[-which(names(orig_dims) %in% 'syear')], - unneeded = 1, sdate_dim) - + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, amt.points = sdate_dim, @@ -111,19 +112,18 @@ full_crossval_anomalies <- function(recipe, data) { info(recipe$Run$logger, "#### Anomalies Cross-validation loop ended #####") gc() - # Add dim names: - names(dim(ano_hcst_ev_res)) <- names(ev_dim_names) - names(dim(ano_obs_ev_res)) <- names(ev_dim_names) - names(dim(ano_obs_tr_res)) <- names(tr_dim_names) + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names # To make crps_clim to work the reference forecast need to have same dims as obs: ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', indices = 1, drop = 'selected') for(ps in 1:length(categories)) { names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', - names(orig_dims[-which(names(orig_dims) %in% 'ensemble')])) + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', - names(tr_dim_names[-which(names(tr_dim_names) %in% 'ensemble')])) + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } diff --git a/full_ecvs_oper.R b/full_ecvs_oper.R index 00050965..8a08fef0 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_oper.R @@ -24,6 +24,7 @@ skill_metrics <- skill_full_crossval(recipe = recipe, data_crossval = res, Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) +## Check logo size is appropiated for your maps: source("tools/add_logo.R") add_logo(recipe, "rsz_rsz_bsc_logo.png") diff --git a/rsz_rsz_bsc_logo.png b/rsz_rsz_bsc_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..59406d7a5f242aa87eb276bea39d6544a802b986 GIT binary patch literal 48040 zcmV)lK%c*fP)004R>004l5008;`004mK004C`008P>0026e000+ooVrmw00006 zVoOIv0RI600RN!9r;`8x00(qQO+^Rj0U8N22T|E(7ytl(07*naRCwC#eRrH)McM!N znK|Y5yS*ma^xjA#2@rbkN>PxesGx{Qlir(!CWs<}bfotd2rUVagd~uVo^0y&c6V<* zW#;+)ad$Tf1jYANdG)v7k00EeJ9EyQnRCA9nWqxILHy`>59P-gSOXw>$<3tgV!ZtE zR!kVu4|dK$d{8wuHs{djxlq|G#`PNp*|r89X&q(S&xLzZpeu)t3-=g^YcVN+y zPNWpW6app1F|_#}Ov8Ykbm&v|`OjEKZO)TT2B9^c#Es7Nd zv}qcgNGAEHfot&BhYrD{BU&(UkC8AOAKp7xouZDl4}pg zE01ST8f{~+E$&}gZ6uvCBmgPZMQ8$xF*4=5AFfczgO_?Lm&F^-;aMft9F}R43DvMK7ZI^IXbZBH9ZI3J>Ed@{7yS%7&edm~LMvo%KNJn7+X|7

g2m;UskP*ZknyAh)unB@zTNYLi98$P))niMV9L+TAhC{KUb0!xQ6p-7r0teOK zh9BMX1gc|#7kxY*M;|!ro1XJN^q~*`Mfj(Y=$=1&03}7Gc;S{yFyW*hk?Xiv`PhX_ zL?}G)T9Me472i!5>aphQc{x51haNe1bZ5dp$n$w$v7#F+M5?>GgY0s7}9n*OmIp2VTUFj-1A?y!`<=2=NIAPW`6m zy$^lp!+#0>DI|K}MGs?-WB0-O6}7bColo%T`Zi=YHF5RsLq*fZF5&BdG7B^huCdGO z>fPg92PgU-PuCjePJ~HN$kU_-Ai!(@1Q3IWASAMacq<99**4JRX=)>Zq(S7nzS0bY z3<(MZslB*Bu0CzQ!Sfo@UVEY~OOO5LRqTH10fA`b;z&nm<>~YtTZ4mp4Wkqq^SXZA_IoIQi4=#%tXv%1o6v8C63UAN( z6s7wejIm<}p&z?=?wvNIKD!L({$W0+go!}~Ca(ICJ#mFO-!Ub1uHqY&f$z#rt0x#GIQ~0BB)Lj1lhOkVFI$84qCAaHYoP zMG7z(feJ$ILEd@9Pf}h_P)4YDnNSoKks^u0LRnnVpOTb_d>2Bhr?N=rZh5!Mki7mD*{!&t6JhEj|c7yg!!snCNEPPo|O}UJf zTCB;SWJ?a-M%za=%SDr86f6&8Kt&}F-V4_Hx7U`fZ*a@J9LH_#hczQ8h)6+nB0y~; z`rCB}-f~p)nBoEInLocmPyg^cEW^Jbl~xQW8g^I-9XPX$Uwm^m7sRXR#w#x7W2O)K zrq`$sedxpgVf@1+I_U>@0RWWCw572F9SN693Id|NH7f)eesQ@uA<^#LQS0842&|R5dgy?Fo6I9fdS2cBMBJ@G)TgF)0As8^onrU zSZ$eD-_q2cbU(S>FBo?roqPVF;LWe+rR4Zfy8gT$^FKOIedt3U{{O%~M505Ey%_*d z6bsSvHEnQGJ`WjQEZ4ue6-aMWvAxHhlTPVhDW5|iQUC;yXR#fwq`)kNLD^7H+&y%7 zYtFk()3X_UsORy4O7Uo| z5oBh7zjPaZ_Z20Wd!*j{|0w@&AqbLmhatsQ64Y#4w7jH9&fkAxeD1RgRyEfYRno$T zufz@a`~_G3^!t4z+J`>$VV5wzZo|RH-UJ4tuqZ-{KG+6iG*^r*GLHIrssBL^#guTtobM< zl0G!N6o0UEMNM;a?j+aI-%&osYRw`~nhbz62;5E642A)KKx_~Zgg{ORT4NYuwIE(i z2G$kD%zC!Wx=?YfgIl+_MlOu@G#QTP+3YLwJe6sFbqJdEv?pXz71A-uvS@-}Kt`p$~odKY*_#iH<+@4gi1@nwBnT1hP3U99U??NDj+20!1*{RNT96muQ#JUN&TRqTin9A!^fxN^Ek_Z5!_w76lge z8x&~x;J4@Hh{oV&i*CYQQ;*rDR4Gb+N|r5ID<-QjCQ^c_pxj1bJfOA8@P#j4!-}Es1(1)*uze}PA zo&OgAKvP{at=XJLt|QCQ(x72SEk7v4u*MeWRnOsJAou_q-K3fCoZrSPFr;CkH5!wr zbu{{~L6Q+KHaPL_m6{5l=fX)VWv+|iv!}|dts(8L-ldMiCo7)=-9zlQ=;z52Jvvn| zkpQG2wpb>fkA%gu!$$@e&%5jW3?hT*v*fXuK>PQSCNpqfqarF7Tuu;weDFCtUb{uO zrU7^SApfPK4|f6ZwGC_OnyL|8GV}K&T#wo7pf)!n0i27gNGELt1bcUf(D;>DI%4lpp}E3uaz(is7wkm)rhwi`Pg&MerRpW z!Z0KQz>yTUt**PH9Vr)7n+a z4_^ykOA;M$24H2kit=JKK#C7K;MZf3wwcW`Lsiu8Hel>Vji5ks%k$pTE2k5cM(hhi3F z5KI#v27~h6QKOF1QuN#?uF|t-683h3r*$m}sI|Vx$Fo0;Kt&>X|8G{OGWJw3F zo_qf{J={L@;qT&alIZADZo{d6IS;?v|7uFKX1QQskzw1)_goBXZt`Zk4hBL%0g%0& z80dDOS71mxK#K+p2pnSsjQWli&m1TVCDN{86hb4m_YRVAD+51Inwt? z7=l?dl}{T)jUQd7fe?-R!S7%uw{e>zDC-+2v^)$fi827BU4?lMUDSJuy?^{WELu?y zLrM%AP)Lo5EKKRLr-35Vhh{>jA0RLgGqTJ86b!6={(9{3gFCQpTRj9ZDvOJ$zQaXf z*uao}6$}8OQilvFSZbg4nm7&%)@)!O0eW`r6!zAgG=Mcq*E|S0RY-Pi!#26Ld!${ z;{5$PK4b2K-@KIGhyP6cZAr9b7N+iZJUPDNj!ar2>?o$?r#{R8 zbrWci77QI7JF@@O(w2`NSz~m5_@R%>KDt4pXiwE(jc;BCQ( z`BKi&>tj|i=!~oj4=oQ7$3d- z5<=xe(6D(EszMo*j~D~rC#+dGAF)9bKp=!-L6r9^;*Q3)__q4&g`UT8DPdHU1nz1} zJP|_%l;OC8Mq}f~M%?@}hnFnbg6X?ohKeaC zlN1I9PMwL3AG{1FP>psu6#Gw!VcYUo(VDJ87G0RU_e_MW0=TVf@$R4hisEszP%&{5 zhE>O4heK?b1|GZQ=zss4*oXgE?2<$eIOk8;{MvhPh76?X7mVc3pY9MV?|+<+`}G-t zSDtHlJ(uOZw{PKhl5W%M!3yj|znBPr5$=~GDfp^&(HU`^m!Rw}V+LzlsZB{S_w58<$`tO!g@bf}A5 zS0Aes2fMBJ-46cUZdT}Vh2k&Vd3PXFe(c+S=iV_ewuht&AW!hpKx;4{{!&_O|1Onr zwsp4p(hdrA&A(Oy;87>vM(wQ~3;@luZ}?v=(ycFij0=vro{A2fA$+Bj0qm7Xs1LN_ z{Frx6*uo%?X^NJjBIAH;j+X^(TLi)yjrCdKv^#+3GlDh+3T+G;98t}U^&BcJL#`uB zohc=KA21A{WnueB8Q^>VVz~t1rbBEG-rk1;R0Y-MK<~ki0bx^4$Z!x&ebl^ zw2aFS-&?dcWQA(aGQuXs%R{{XFw)Q6`vhY9epdjjY)Mnv+ueoAXOafIfNSFlbnbs$JP8{njiS#X+j9 ziR#WoM(5HFnHIRLyhzk-OA5E$0eqif=kI^W;HcVITgSE?pyQ9tjvYXb@=Bg7hMtqDFTSF3U?1zVj?@6ch*YDcj4^6vfBy8Nt%wY&;7nc|Gzm6xee-h=>aVap{|$ZfQ&k zR#1gS)@g}^{~&9?wCVHjojhRozx@35T=9$%Xxo_82pfbAzvj||uOomtlOj1{WL)nx zXe6F_^J93aB4(YVmd`HgU99p$eobJ_0LaU^Xm4XKDsn|fGQh}3V31yN1sBY^i8M2V zLD4W#-J9=gU_G63!!3B^isS!J=#TsGpMhPH=w7GZg7xhVH7sf7K>uPRmt()6$UMC> z;s1p-D-h+k@Lwb%2Bd*ZtiU+fXEo1;Fk9dG+{56(2$Y%z5&0#h)`Z4J?`5yYAcY;s zcsp+?5M&@guM8L%{DF{Y-nKqthT{e|&2G}bVjbJ((9<$${cGRVWxf8QfZU!cZx>{_ ztH1x=@4bJ2X}a>+rkDi?WLaW)tjIX0b$Q1J2$NAVut0Zox;hp%X~ghi*a4FP;Qc#( z^bajie7}n@dCWj6l*G@@sh7wo6)d;^m`$r61At+Q%>ydTbA?i!ty$Wwv(x_#=R<2{ zC`7mBTzR4NgQ8}*SKiv>9;h^ivk;b0eo7c}bwz*kEhpu4v}Xm1Eu*MC!-x4klOecc zV3nM^ajSQnX4^5R0VW_>+5>yoN+ z_x>Z&Dfa+|7y#DEfGs{a|Cg2XF8%f_GId8EsVWjkZR{aK-7Ivld!3+^FOtg+38vS7T=k4Fs z5ThjYhT>xT$+nhUb9qHn);D#i@`^}VTeEwRVvJ;xme5|Bgj`cSAn;aeJGI$vmq>43 zubDynoqRMud(C&h>36da{~`DiiQdbMyHB{DYW5h%OW$21wPYoIHM+6ho$Grj%1fI& z>MVPiPZNs0ANxi zAj4sC*GsQY{KbCJr?_PAVM0Npa$wlcq*ZZqoqKmSr@zA*+)bc6lls>t#(%Kw7m+7d z29pLLg#kYt5;t~zaO8Cqc?N?HD5j08Ta~Z{?KNvK9N%RC`1p@M`G*$im}?%RkS08S z-CqH`!`8G57P^j}3IKtSd@K9m4-Ny?aoHgs9n#j~Jg+@Ar9~Vsln1)5o)fT*8Ht4V zjPG&P_OXLW5SbP}uc{6l*3_8W8V;G??&$EIQ9dIWHfp8BnpBrMR0yEF!rI%(aYa|s z`?Ie&+MS7uNe~$ZHpPpK;{y@nlSB3n8h?Gd^XhCyU#JuUJ#!}n5D16|4?MW?tQW6- z$^)|6+Wo+}ZcgtB0wNK4mB-@xScIl`c6j%B9*^aoY+po>5(tLGb*Z^;TpiluPy}u9 zt&W8EtX7Ob*k~f4A)8hcdp^toTYNNmV(j28_1R?3;Yo>h?>Wy$O|R~w`~EG9R`>50 zIJl`Ly~Va{(YjzQUvm3Ne9M`KepBycAO6Fz6EmFptC#T3oAasND#gGEW93JU0(Fa3 zVE0*Xh-m`CBBC;01y%xQ*sxBgs%|OJ_2?~B%(3#!>)w;%q_o}XkB^9 z^7+}UopjO$ZL{|bOy2$EnHL?FY!`e=c1 zV@IohA8QyOz_jo=YYO|G9s~yLpjhNA*fK4=N6poljJ}2yAT6x4ZSkz@@FBjZr&69Q zFPDbAsiMrP-mo$AvFGT1BnidE=D}meL_YrT!;aOSgGvw}4QvyVx;&RhLxZIODkw7c zPA1g{ra?EQQ|eL>xd1F7pqas}K>#Zu$&5zjo}G&_^TQJZA7!(8IuXIJ=wUNxzMV^Y zSNR?f1p%>`ac);f?k+NfQCt%6O;e!ZwadQgce4-wA?R^l$CGy~Ye?Fr;Zx?XY2?i}$>G7)FdK)J3H+E-WgLg+%~2|y_Jc9=u*Q1lUDx4J zpl+QkgL;W)%oRgWq=&I`K;LSo3SOc!d$0R;(M15m+fKuEK1W18&X{r6`qE8a%dLCDTz=nyo zsw(S{Z=KX{hGEef5J+jj&G9U^$$|3GljzklOPe})v~^*q*w6bM*fI~uwVeb`Q-;ed&YE_;OD zyW@S>K^9{s#QkxTi!0J8bp>nAr$Yj5Clu;7v@jq95RJ-tu=3qD)3O5EXOHUM z*uXRQFSNIA$v);fJi0q6Nw*}sBMFfrViApsYrEEFe*+=RX~zsvTkEyRW?VlUGLB2M z`Oo?u7jE|5)Kwtz&WPj_3i`ajG%9SjEF*C>$7Nwr2L| z?9v0YMimH3DMUA{PrWWDPvULcGS6t8R{$_m6uHkqB(`tY1vajH{Gv9`#bMprAu`g`{eznkeuuVn zc>UJbxu>kyr?0XJ_dG^->!^M3Lvrr^nCj}EzhcUEr*OO^;?@- zXvN|ViJD6N_NW2~G8mG7>iQ&k4ahZy?_24wSkfqysjMF>Fpuf# zRF5eifgN0y{_i3$SQ{WH-_{zfc-dB-_riSawZ|kn^Y+*N_NkZ= zHyxaF%y5KFi;)GKoSM|lCsG<>^{hk7pW5{LpXb%pW@lR#&F{`uN?JV~sx0>&Hpsb`Xs8p?_T>r3{#c9d3J0T84FM`PymR;7V$n^N%7ZZQ~9 z01!kO(yBgY*%&fBR9=JB9{nwM#_;j60aA;?ZuwYBLu`T}X0w#DwGht;OFSlQ`HV2} zEGl=07+m;To9-#pFYqp z8&EB)#*E;P7OlX2=N*bEr{4KDE=joY(f4q~#rIO@NAGCfbQ5XC6ME)AX}iDXR#~VxpObi82TxQ-5>Ih(PT&_rbajyj!2p?`-bSXOMx$ zo@p{oTB8=MIO<#Hj6STU#`<+>vAnmmMBWn#>f6y!53PAZ&r%weSc(B7At}Gb1_O@c z&*++SC2iiA{i*VK2mls>G9~54+sTwSN$Y%?Lm{wr=)mYkXKMo5=iPd8Hy8%Z8~vRj zRgS|avLY4+yuuRmKw;2y)506vog~MQv}xmu_ikDD^jfjUgkl~)c>tci`jl_x8SBG; zBnSY?4mlgcDhAX1#0a2vp+vjuM`}X1rqb&CeE4R!F=@x`E|CIQhQjjqne~}-P-+^Z zPbpP;iAzI<8oI7Qm91Q!obS3A+T%a^1*d!lGZ`k;SN5~_)=I4hYzg1xaMaLBqh{;o z+^YKhzN2kv4BEUYvqIm zys$8C&aSV|EmJ-;Nst{7e*!q?bP&%R*t})=XBP4`u{{;?3OZu0zHhSy8xZRpuk#)g$tTD*M<-(EV&{9oH%!8Ui8^ zq`wG4doIU6)e05>5e(5Evt9b^P{cB>{@GqWnFcLfvxT3z>ZE`5yVr;R639a@e-x9Z zjlxTRSx5u>DJm&5RMoI(zotg#4_Z+O#02VQhMjeS!Z0XVR%QLbsxaH)aU01%e{7js zE7vW~=%mNL&gFEEMPKA!_nnM6ECf-k!1zH+lQT~%!#wz)koxcs8|aY*2h02J-te^R z=t(3ENGJ_JC%uXxy!MJe0M<=$#cx<+%; z?fmUCU&qYDPQ})x9|6dG>GB=%aai`!W#kv{Dbq=%m8Sp3^GlzE)p|NPiOd3<@3y3#y?v#U>MY1Tx8u+QfVa)KD2!4+O^3- zrLj9RWIhoMOKFf3ipXD2oo-&U>G}2As4lolDIP!s1Z;Xz6x;78PmRxJ)y0}YhJ-5( z{&mxwwl%?;@FZXJ$b8xXoI^-k)zeY0rxm7#Ac{`nP{0GD!iDp^*4PW^L*xsIU!DU80JS77DCL z^YlcUKVPYQ{iIz)3MqhPFeo0&E=+t6!{hRRlSk>f&n=UVEBsiIc|}*3zP`uleusWv zzJsqizN_E=UfX}E9(!9hs@fF za_hBpM{a(;|~x7RS+^a z#EJs*e*VOTiR<>e9N}oNGMn@!ux4diGCh9Jehc4yY@^C%GIZ>X!?Ez`_6dnjcShFN zk%FLX3`fk5w!ZZ0Is~T3pWHl&UV44=?w+GUBp~8(dwFLfQ(sydDs63bCTWH?Ov;rM zmBM7^z>?2f5ARHSBVu7|bHD2FEA0)P)yl&F5I7Px*LNhG4MGsLY_8+MWBQ}^ zwQK(guT~%ae*x|Li~G>DDMd}04BFnj9B1GCka_>ZwI92V-Ys8CV;2Ucd}NYWQEu;^ zb9kv~ko3crZhfy#pk1lJ^r9DYS$$LwS@y=P?~rHIFws&|6PRHa2R9_@l0s>NT;Ep` znA*|q&+FEM?r=^1dfV-9&QJ93wbfu2rX|+S+^cXJo1%05@}$^%>S(=oV*@RI_$OZ? z(&<0^4WdOsn6^Ppn>w)g;Y*pBL1kwEEf4OzkjCjGh?spvMSBjBL#NfKEuU{f$La*x zl~JA9kRZ1uldr1UWBvjNWx?K29{IiT6poq<5M|RTgd-t-=i^p#J?3zTM5d`3R$;M* z*3_`Lr8msCaCls7`{)`Srtfj#^r5n2ZD&3-))0iC-@W1xh1D@?pMSk3Iu+k}=m@mW zZ9pV!G1-!yeZP)}4Tw_Rx(-oR6H{Q&rk5_^!UJz0t--0JCu{mgbnUYX7Uj!TOOweFS!k+2OcXNS1ZfJ0qyPHTRSJW0D4`D z2pS9#2#S|;A1^%$6{BR~*szvD2+vbW8e-o>oA=K4{@9s@85+bQ5;rbQZOhyu3vBbK zBP#souU$dF`#LcF+p}|7ePHi4zIUWwNEGt-NUrDjKT{|A_m2IowmW3E3P;2(>8+Vd zDNY72-{cQGVmO^NzL0Oa>HIG(&{5a@1@F9GOC-Rdut~{ISM18RriFm8B*I}!H@|iX zXvld8_b=pV)S}LISAYa*rMRfrVmIv~o$^UaLMSY`<p z2rvi2mhRY^;NprP%z#13ge$ZH43p4rNL-ihU(74tXhK;mNSju+3$P?91zguIz>mgal#{p_P@%}lw4-(?#81hDGHfAMSAhyNzf%(MSUAAYtRNES*Y96#7UbXPX5 z&*}B)>_v}rJeUM18k65upugItFxu-~1t`8rKI8j}7F{;;1T!!H17g{fhA1ov-q! zZ|^}~Rw1FYXx-2O5e}d<7DRb001@=z1>y*+EHWtttr)?uiJC!0cx%pj9=XR5qP7fD z85bKiY=z^Sh(&G0>>&Jb0MmA>g6!(Rrn(dyPeBkcZd5fIp0BBa|#t>sxiD@0a8v1$t@XJmOw0GAm_TsW|>Vh?-8-=xvTzt z&qg2q<3X{be@biP(ek;FoonxRoOOuIhyH70|pHyB)qD7PI zYI0mp2Td`)vC&gMUc$hCe`>$|8h3&c<)i;;)~aP=sJ2z ze(T$#PyIXiXUD_;tFU9TYCwiVbboqr=b0#z#wGV0te(94J$NCPw>|&cU3QP`eF>D( z$gbRq(!(c{{&XvJeJf7<`2oD!lyb6ygjLI0_>sHbBUMxc9$JdF$FF0JG*)bG0k)^H z!kC)U|Isb1*xc0n-0W~y7*kW;+h*m~RscH$y>HJ5gVFu}{&kmaYTo5F#t$s}pN)A- z%Z}&kmEp$>_@B+e*TwjgZ@{t*j7h_YavmX_)6g0STEOH%L6i&{jpGlRfl(zEO5&m3 zanZl?ne4+q1q6JTDh?havc6Wf#2&4!{#@OY-IKqtZ|vTVhQi|h%(9L%Q5iHUW|gVN zPueAtt{<)r-wj(<_D(VgvTz`*d7WS6sw$dJE&=p6Of?_Z*k zRsHzxpX{^KDa&@xCw8tHR*6kpI!OdfE-0{R%Z4t=u4J{Uju8w(3@RqWR+u=oRCjeb z?4%W$oaTb!Vl2G>=l|wM>;GR|_UG3D0L#{HqYc|yxVRud$+U)Mf*~}lR0^Lrfqh?* z+oc36Kv)8iLego=n0R zUT1R$y)5pKY=6Z)*-(b=Zb&FfcCgBI9((v!o zNOtJ{)5~<($P%^ky)D$znx%{{xWft4!j~TWrk}I_uW-|uhvJ1-7gB7RW)mKL_@^!S z?4fm7{_s5Zsv1~Ma`3}LN7Kmi2zbTK3af8cdfAPt>;0QCBc7wSmw(C3?MNn5-}J5Y z;cI{foqjiMd}5V=lT-Hee$P2see9R@q1|AHVN(0hp}}#9tg08j5XvXYIli(?%xY`% z-qUKQ(jGhY6_QX|X3fgv^xROulv-&h&B`<|y`{ySs}&o&w0L*-|37*r^*KFaF&pZr|U4HaVUx(<>vryc>i2SU_zHN&Ehm6ooA8!sP zI@Nb_Idw2JRWg``z_vipc&T57e&+M#Ayr-Dq3-=neAbmm(jP89l2^0?>71r@o3>)+ z$O;T9vYCi*>7QO9!;~03q7toL4(_}43T$j0iR7xa2=*Juf;y>k++LWlTOr67O##e{t>!iu(7b93_PJmbURihwhn8GKqo$0I))Fg!0;hu5K*z7VyRG{oi? z{>1$cI2v6Y3Z0AZVCyZ0`GZqP#7yilqXg;XSbV;58IoJu;7@sw@e)uv{XD9iFcuq< z9T+;Wid))}fM(R+*O#ULUkQrtb1fBDN-m8B!%G);e(rl1*qbwSdo%Pz4F!Vo?cB%L z90;5zN)IXFmTg&ytnwq3!Ru2g|B9ZB^gBlZw>8?BlcHBCDQGJWANxZ%P`SLmb-gbVczWBm<|Axh$0Dv?R zM7+>AE}ik84F@fwOCNimZu#CmJDv2~mp)96>+`a8O~PkiOCc(oo1EvJ9Ck-{ zcJ}V;FbSq$NIcRt@5s{)x`rcrPL{cxf)oPLnADEP8tjpig=8jJra*P6RLz|}vRRf& z0k%b4SRUlMEm^@HBM>!sw+Y2MHn@UU&0kNq-v3=*@WQ)v-=kYaXNN=ZiOFOtEDh-P z*=sZg7g70y{>Jz*f# z?g-XEFlKUgeyu9nf0*cK%L!Hjfr#L-6HE1hXV&n|lOIQL#!%^dOhkmr%8+ho%8;8< zWCSHDszNH6@!3l|u!9DrlfGmIEKBgw4cvIfHMD6cg z!cV-r7ANgD;@`n5|6hUn9sVof`@Rl^j4_)xrlYU3V@76pJX1tjF z&Udo_Ze@G-tKV~SdhahK3CSB&@_oa`uTss&+SGY2B3FnMBV+qmPrfSEhs&Gzn(`yPrw4R42I;R z+0Q#pL3zOV$t4rig85BYyrG`)!Ebt-f9Lwg>D_hf(ey!1U`s{?cQ1GPc!{P+%OFb@a2ONoFT~qHzxcRYiQx`A5>4EuCoV=;We!gxyl`<+?a^Bgi`K7d2_yhM)QeXOqC(8BDyBH85;=aa`!X6}aodki=f zpRu19IkH@z`GawozhE0ijf(Rx4?6>L(vJxMspgEvwiH1Gx$59b$x1SH`)t;IxT?F{ z2&i;?Khg5xYBl`)eQ49`>xH{&qXq?hiUj1{5z{j+)c8PDOU{YJ3oDB5SQ5j=Q65gl~MEm&8Z0$h)|>5q8f zqYZTIX@8RNv>&R7KAOq;Cj&q*DE^dfc7GfjBtc9Ebtd$AU{a=q&F;rP`Z>`>`tbAj zz^k|2!@&67C)chiAX#R!VTZ*5lWktGHb`yF4Un;Z8VpjNvy%_3WeU3Bd;Pxn^yBI? zPyHVeMIXKzRCdr!Mk?$2mJr8ucKOe<27|t6m1ls7Nq}N;d4jL-v~5X43f7URp+jM# zV$G`5O5f9k-3c&uW)jYaB^DQ(2WMP8I})&@kQ!OnR{>K_ZE1GrYsKc5d_i|Q$1V&( zU&W!lI#p`Jt z7M5;U-}#Q`aSaj7KmfO767IGA`-czkJRKt<5emy?IajT z?AcwNeoHW9%eHn$#X^|mdOVoHR2Y$qTeH5*Wa)UVI1mlfo6U9U`IY@6!@4s17^Nr{ z4)TXT{rS+@*PZ?jqivP0IsC9a9f!*a!0^H0g`cfYPRQmk)3#{C@cyBv+Z1)RZfwKA z!Np~5&6&NlhLQ%niptP?EzP+!SnuX}7{UN-i#A3=MqMhaXVy2nw|K4|1_I)7^A9Bv zdL~+AEoy6ahA_AS1dc|;=C*`h9WrU0K$|9 zx%8V)f2SAU_6O_LSDO#ZIM~e)+*ML&JS#zFHlrpoL)$iO-Mr0PIATaKt$yz3{OsHf zblj|=-%tjh|6Y(a?P}Bam1hTx^RgLzdvB=cPL(mKTc;`fngPLynT)qW`4EO>P%7st zTZ;YKJG?opfSr;p=ZUAsD&IV6RCIJ(SGLX1NzvS#74WqmDX`8>rPQ6!-O)|EBu{L2 zmtqD<${7aL^DffjbvF^Lxlk)a^P62J^k^r#qije3(!fU3pjAS1BN1$uslQS@M)_O} zrhFmd?H)Sl@pC1?kdn3-2CX)L1^|Y~=2a!%t)vG zj~S4++;gQq@3eD(W%uT<%#7HqtEsFgOp&mm9ZwBhUz=L!`&sgMi#k7O`hC%U6#!$8WS2kB^BIAlQE%B|epjda?YtVpIx%R`y0&)rDbK^u?WI5% zhDoilnDGM;UTS~)H5BY~=vk?hx=|@E%D2mZ&<%q&6&0B$gkxe+?bFK>;w#pC9$J3GCfD8*F(X(1rOVR?a+xNXenNc@vex;}DUJxU0I z6*Sgn)83H0S2QRPpt&PPg^$|2F_$3XA9Z!Ad$nfRwpe%6w+2jn>X(Z%!Re*%=5o9b z5hD^cu7GBlOsQ)*pE6PcU}z@Orfo+YU$xt7&osQ`x_Sl)AQ~|)@7$8PDO4GFC70EQ zg9xFh@nJUY4^#?O=#Hm|M2!nvSKT&xT)bfA^3Erm93R4ro@lt$wkfD;X1QFZdW!L#PJn|7;dFsqQ68$GbxNQ#WW*0)x!0i<~d$L;e>}>OIz7S|Es|at6 z6kA{+IMI<4*)9)`qepaaY}L+gO}&Bhkdi)I{>Vjb_3yE0P9+&>VysO;`IrJt+czS> zF0AgogsuQWFd(1(Xu-jwhm4HQ7(XRGb^MfqspF>^yzJmqy*?xL@xoKKeLQI2frA4R!x4F#5ZF$jyK>d)&FD{B zGmvxi?!J~V4U-O^Xz+JW+;@lOgaKt5<0kj#&z@K(RzJOLlVQ;W~~kPj-3710;l?PANFu+~U6MIy{s$V3^oqTG#+4_&F{~ zcKUxf;IOh_eBVRA+tsE1;JYYfCRi5MN&{`I!Jf+l6AAC`4NqN4Lrs6a3)pmbK zT_?wT)o#dqFVW#5o7K~lLf&jZSoGoxb(z7(_N(xG3}g*32*MDH43pXnORNJ(b|)`S zha&R*KtTLq`6mfd8Y5T}0|d|4^RVex@DzqP1H&Xj zqhA-RKTS2Iy42;7n7o7p=fhM%J)y4e3><9lSz0WgSn*+^3+00?Y-yFK8f#XLSuHor zZ`ziwOI{F(%8N-LKW|^@iq%c*0Fm+dz*TzV4uCtY^uwESWL{~*iIAKr*AXW-7*X5eheMdi@K*EqOiG=n6#xMW1jK93!s|vB7Mf=X z2?ALAK98?D?1q}w#%w7ot^r7sfDDG{;(qr(p|rno85;f>Xri$EYDsBPr%=&GW9uOFlp4+b$4!3c%q zZR5rUYAg%$d&kS%KJ~28<++?bxjTALh9mMOXJKM|ywLo4x3=-2%; zQg1)*-PY&RwGpqj2SiX%9^{b8H2&0EaortFrJC~@%D-^E?dRzAyUdT52#Cea&!nj_ zu%-~ZpLVxE^mN#zX9A&Pz|0tN+V82TGKB2MFVKXURqqRf8o?xmB*bFY#qAxsp`p@}vUbXd(1#lxqJ$*!tZl(hsts}mC^7X48gvwiO7-=;6P{4*?H(*OW?>hhB?{9C`m_l}st$IKc>@7;Tr9x*sdwawwPn+C%1hdw_8FOs{y9FIk4#QYHTzGVr9?efeYrgFQrL^sbrU*Prl6z zR=ODLl2AU6?|v>sC?p?9f7W$=*PIuh`|jx_Z#$0Oy~i6O7?OWlwK7w;;JtMHzPlTH zPT$QK?JkftthCk&y8&P!fJn@^t?1C3SP-oif>K}z00E_h$$1h4z``)7_JI9Me_v4@ ztrdg}m_dRO3W`5}`dM-d5gL0`DnwX?$^GEwylFj;tReA6}1g+7dqG_1ESLOQTk}i0zp58;arbVA2R0vHboSdH2UFHQs zV9S!{>smB0No!bOkd$bw=^wc5(~rCAr9sa!Ia_8(6l{YAxk7ms;pnS3p>V#Bl7nEOb~%J7ae;3vE^Fv*xsNA$%_KjAv83) zBelkWZuN|3uv#+U=+bJX(GSQs4cpc#*JFIXDpi>;iboJc2nFR9_H_TA)shCSu`Kba zWr;H&A<+lq-DuZ(3pm=uv^bZJQ$Fxt+Ft@1s_8*4eZhD zn3h=E-sTO_i~1>`gXV?@_~=~F z1kdBiJ@1;7_!L88aC2%ZcN;~8z{-tVP&a+g;sWJkEV|SBTDGy+cU2s{PY;}msz9o~ zHjPZDbAZX8NtXlKrQSEhWNS0PYkNw`>{guz(*PcFHd-`zS<73g*4%Oj_!hNs9icgxx zYsD2{f&ir}N^I}Fla9q(ceGM@MJ^=`dRdxOuM~${o4q%+>wLPcE&5S#cfH8-^>lz> z81%_Ldl$T(n)^CsGg?17YaUiFPu`eLsY~1(*W|K#&s0)h)cC|&9Jc>piB+8tq}Pxj zfd6K)(;msaq_aH}YTpca4pFFcB>8p534&ZclMfM_@fQ_7%FbQnAw!!945XmNk3 zGMT~U2bYZZJUz5`rUcbyi43iIaL@92J`d|RFfzz-_|m*2$q<4zw!U{wZL$mFSR)R= zXAPus-rgN^9zn88ovjpNAcmB*cGF9jtpLI8S!FvQo)ePx8#+98@h@{1{N;3YjLO2JiW&|&9voW1Ppk- z9;bRpNz&HVx364PcF>T1O7Q?7Uv$qfu<)-hPB*p2upSv8=jf^R@1J259XR7C&(r%N zZ`46S+2((hWUxjw(XPf8S4EC=UHz>duMpD`OI+7SdzT)h6c0u>TWY2%%fg_O zFBuupnn(0_t(dm47!4U{&3PgxhH1&AxojF6wmKC`;T(cgnGBr z`Pashq6+~Dn1ozs26vvd2uoJAkXG96?YLvV9uVcMh}|J936c_SPLWUo+UGcWI(7?f zUkabxelG5QAy-h<1Bybf&{+@2d0t_ZepwW!vx^FiGvdY8+2x0g#>DXjx~Rk&oJy-( zw904J_q85WpHP+6C)|sY{ff=zx)PObpmPTgws$Klv-UDA@qn*IKgSW1vsp1k`6%dq zc;~$EVrwos>u|GlK*S$1xp1(X)8Ef~D(2mpvN?TJaC*h!w-;{vFt}Uc;#^kW*8QSy zpVyt{X1miWGa_V1L!Lv>zDCK+y#N4zIPY7Sbn0FhJvyutb2d`z+gHuAESdvAfXT^b z^!FPZX=!VV|FM(fL!iMz0(Ow@P%g4W6v)pj`Cbk`-moTB>*V+#5L|#DC9Orj3*mV@ zMd#<9n1QZN|5{_}NvpGIeKs@{2|^$s{v4QoH6k(d4Iuz%Ae&Yfu;z!>uIiksNPvR<|wr2Y}(_RGw@Y5P~*sef5W1J7#aD=pGZ~vwyA6m^QuM?RBBjX?>wF^~lxf zwEBtfalUl1P4{nnzJ9e~(jtIKD^O>r|M2ZsEnnf}^eix77~~Zf*;h|IV_IBm4Cs0M zguo}#5O8v8a*r}a5|-2CtF*7>P_0qk?XuTaUS_WN*)LZO)2dsoM1YgygUhGgyDHJ< zJ)#vP0AX9=ovpK*KVgO9?!|PqMlhLBueG>>MgP=4@x>XK}HN13} ze|ILW?_avAt3dfY7)(qAX5U|O<*x^sN?~x1#3>~$HB14eu)X#b(YF2r?YgEnp8U)X zh!;U*u!c@|`In@U>MmxA0<47qEXkh*N(@8_Ol*GcJlcEOkUpc*zX&2a_&@?Y_O)`i z8+N*X-9d=TtK7&XgJ9VPiX$OR8?+lHkJwuXl148n?5qaJOOM2$ z;4ue#D&g)_KBVW`JIC!4fgmL>4GfGR8Z}ra(lmBlS<}9I_rF)SW$#UI_8+u-)i{1q z9Pw(SxTzuch?~Vz5w-0dFEw7_sk+#>pSJ&Fnspba^ebL@~ z$A<(%^f<(J;uRtDS(LAPe&UUfB;xkJUjQ{mUheJ=Sc#27cYioh|gj%b1hg~LNW}&OHdYq=cy^( zX_{p?D(4yoT76fKU}h;KWGE!=yWpDPx8*+GDq3c*c{vmk*GLmeYmVl!`c%*5834nw z#r$}I`5lZahVs=c0z*iKA$bLjy<6E+t7T!4)RXosK)-71+F+1g5`rZ&hw|4WLzyj715HO91Aq2w?$VC!>^87uzb);knY+F62quKG*6y5z1w`{RF6qX+;R}a;SBScKL zO)E+&?6V4I6(C+Z*PL463j{wk%pi1f$1} zXb61R)#;7|xPXKvLvqVrdlW6d@9~Z?fF25=VMye{0j#!c0nhdJB;<9vrsUP1yqIVQ z3TfoH@CnhVINP@IrX3J36_uFhNQq7`1Q8Mi#n$Tb$^c5sgBtNOpw!oiAL+yY1V*x9 zD~J}dIc;$I%l@WCmMWI}mGjnRgUBcfEUclaA-7JRj+tVrEUs(+HYE z=6cky+oOo`J$fC~doL|W5Mr^wXX%VWJZ$kQtBlLGwiUE@B3wPXko^F4YEP#dH|FHF z7Pq;)GW@+Qo6_(3o*s}-uSb41!^hjx|6Cl4%j-u?ELczw@v|#d6j4)aPP%58cHg74 zbLjA*A3XTLrlGFG(};keZMG;M$Yed}ln39E{kwHnJ6gg71bun@yZZYJ^F$0-3c}QR zotow6#iu7PKM4n(d>y~NWCIz7(EY1Q#lT_Jo1)R=L3P`B4>zZF&uJ<*Nv8vrSRRg> z@3Q8m&=KRLf~SH(UGC)#+s&CS5gvsE}KdXu`F3Fh+vyy!J1bOHwNzh(x|-u zEnr!6cX4rGQAd0BL@>)>(ENDxn815iAG?63pEi)!*9JxF+*Q|B96I<}H>dV+eW%1A zBW(oC6;nn<=Du-fivv81qQg)6UCz-@2qFpxXhkxrP#CiGph8=(*qEfc*RM^&}y_~z7QhvxV?It^9yxUaUzHZ>LBSg@)knmbpju;u+qkfySpW`~CBtaXdy*^q* zAJ}Dq=C+m;;t;Dz%WrGV%C<%Nu*1i0T2?OYtq(tr31zQo{ zXWvU!El&>=l5?e%!8;o3GqatHIzuTg01*sHD>ps=+*+9Xi(!Yw`LOf%!@|$Ee$!m@ zUkgIy9Ep^pLNoh4o=N-P?M*(ib99XYAfpj^c{1VL1S!lw-1LX(GY@##b@e0$azdiDyv#l&lkwMfv^kc}k{d3yu1lxXl^`fVkUg3KA)tk% zm8L;+!V&pg(I|J`ny0dC4G5cVyWnhvNdQP;+J};Tn zw*znh7(|dk3$KE?6@UeT>nPeo`52lXJ8w6d?IM^!NJ3R*WI|g97J}*+MJVAKTLEEl@s(q(Qp4#G33W{?zkW+LCnTuwvdg ze}k+ZTd6%yGfAShmaGVcO{Eo6(;I)}=&T=+@)fIOvnZ)7rn=eJDQ3nM_rF20!a@c> z9(j;T^22|fPl=?%!hq;lou=5Jpkf9!y)z#`I|DAorXMbfs)9Pxra2G^3eGDwH376` zOQJV4G!VDMU+zCZe|PN9aq8@w@$h$FXSCh%rHA~de|%^A^A6yxcbA~NL}Ko}n+5!p z%9=W3uIuPML4-(5UY^S6o8V~~B&`v)7yyMWms;Pe)0NYzMf1{DW&&#xgpj>`UOQ6! z;0vc>!HOpO!{s+{|D#VAsgwiG8#XC1=hB^|l+&+ynn1+b_eh#H0B~86LH#Ri20+d2 z8Prt9`2Eiss4?dj^+OvLmZ1IEXlsXLngro*x&|LQUAmyV#NT8rley^j$c`OvP zjAIWP>03cb5B&1@zU})+@-LD z8Lo-?>DShQ`2hr{U;9BWr}pfrBeXM#GMOgT4ILgDok)3I?M)s9LzeWF_9ghc$)tK* zYiLk+^wF+KRD&U`0KM%lxb{GlUMQ<~k1ETOgSKpPKT|$O0b(IRC1u7zuFG#Z4%^vw z$B9&g&q^iSKiDC@r>rFWSI5;w!H{fjZ*@*gcKSaD;0Xyc6sGf?9N$jhfb00V;^N?` z?Fs)OW*`=qXXaeo9}Wsj7~~4@)b=*-L*>H&0L!MOhaOQj1Az15!*v2(zBUI2KkyvA z*G_Y?OA-a7L0(0_@YrP1S*v}B2@^_r{#|EvPs9uWKrk9YV9Z1+=wHIIh)rwPbc$?R z6SQU!$P7r7mIZYEhJ+56hAAAC9156Jzcnq~oFZvSl$Ql{SE7p?k4Q=(l_n<^vySTO z^q*E5j%8uvs9~Xf=RP#9y?Dj~)5-cAikLiga+&T-s~kj!N@F@TT_op!V?irBR2(UW=5kDj7fJivuhAC9mEO()Y;}r ztr$!Q@?NmGv;;B8ynTE1;-?mf>#l@zdx zi|nJ?+B36B2;0plWdtNDDgvsrE62&kc0x*W;izJfXmbT?AP_Qm@BmA1*qF+HpapYD zxv4F;8Y$hzhSZ|1-4&k4FioH^YG}*UXl?g|uNkH!&VK5s>ir9-tt~6H2819kDm8We z#tavh*dm?LLMesPk}xv9>sORU%eHJz-{-n~05n3~+chN&Y7B?OuR7mfbr)h4;u2%aKfH@RUeN+4>tL?~hS8_nfOtuk5-S@( z2J2bJF6IxeT}tWcr()nfd$4TX%JmId<+vJ!CH6cs62HNLj`RSjg7rf9nP=I%*O=Uz$dotweD-9j!EA zNJep3Z5gu1IHb2UxS#&{Jotf=kW|&EHs(^v9w(r-HAh{WTbM=AA8_JfXnX7dN~VV( zx8Oz}02XfTARz^&4mpc2{Og7E`0dZJx8Mb3${;*71Aa*uZHZ|(%4iiH3 zH!MTAnOsM4S#ZOuwb{1#jKN}Xq2!0Id7sC|K${2u1amGp;+y80|5{M#K{pxcoa_s{NhU z?tHg~ay-Zg%Pm`8d~QQ%`cdZj4~+HRx@S|t(gj@$To=Ouz%bA{ba-f*g1ND?Eo*w6 zuIgBkWTb9qYuYscLj*%4JKyinz$40@e$LBImXGL3ANV(tLgn)cShRlV;J`E$@;cfZ z1ckDMt?ys)rR~><6K;X9O-NScdL|0|R&2G0@otqVTC~|i*e3j7pJH73!&5Nwdv~I# zr45mw!M-9iynH19py=?+AVW6D5af5HG5U$~*sW{F!Xtk}5X5%SA}8nN$3`HmyT5lF zCtddl9>3}o0KopookI)PIS3T@2a$kg4X0r}zIDJPymr^UYz-QTx{c3&)6e%VVHn@; z&E?gz4$OoI`n&=b0@w3@CG0#xo|{GbZm2o zh6NO22Jxa4kW~!DWcCk;(R-9>-v_&$9<6)g*9o&WSi0qI!L`a~j%ATId z_zMha$Z$wnjg z>9jt#CsgzcYLQ*fYe39%KE7ra(OER?gu(19VB@pD+=)bs54ZvVU|A+LtnUI$gDZ!W zini9Qa91=kYR*!BpQq?`6ThV@1X|tCFNaDCZ^hKw zQx>NkdXfM|V#c=}Up*HO2aJ~HjCy_PVZ3qa2KnOqTb|Wb}_bkH?^!ntR3(2-0JP1;(u-hJ@F=d)N6H z0L08R{*+q^>l)lWQz?Cg>+qD`em!FV$hPUdSX^AW+o-}Nv+q2&r!RIp{Wk^M>T}b& zy40nvqxa62g{B>Hs_rrs9?)HjDj1RvXKRzEAz~U&J~CZB@^EaLjAk6U@7fPoiqyKRJEI$2*N6=vpzGkPrjH>TYo(z`I)<3>2iiSrt83qT0 zNxwMvc>c+?uj7FmzD<95=_5=U+7HXN&gM_%Bq-UI0~jy@0@Z^fXsOF#pZ%uu(4m8A z>gd6EV)gh0r~Z>4`{Mz83cyBi{C?y}6WY}fhJcM$Osr1e@>gHrBZzRoFYd+LxBvKG{T}{Z zoO{PpD5;90z9~s7A6+6-sIA5k(A+`9TmDw;MDho5^C-g;sltVn>vK?|!M z{OLD6^#6KL@wa|QZEvn3q%+#yec%gDP9OcH#L7VTwj8;k!(w9-iR=aeLj)`dW>3fC z!KxLjI#>9-fL1EyK)yvf4-gp#rr@6_{T5XfC5p-pMNf__F?x z;0e+l*rnDre_3761^Fz(47V^Uwt$mhbq~XhFEI|t`k5;F$#T+ATXA0E1zgSxd}l;qVj^AOSc6~7z{L%2#ZG^ zcxd>`CwzMy^&en!?bE;H!G~Q#$&My?!*++`8LWjx{hc}l-DYH}hhftj@1P(aMAC}G zX9IR5go+{y87<*9Hn9;fA$^w`=Pt%AFI|F5j(Q%aUr-@d)^kbD)!d!EsAFw|k3}fHphpKnq*(SU5iJq5A zsoV2qMRv$1y3@9|`~R7~#w`KeHSZ~3cA39$66m}O{SL;g9h_0S)e55bx1LOZy(Ln1 z)fYX)%f0vhRSBlI7pu#wIs{NuZ0wQF_#Z;Cyzk@d^!Mms0J9kY?s$4GmaJ~3hp&17 z;8(eLzw2cx<)~=XJS5Sj{^EP6$ft?!9uwtrZH6QAH>tU`7o&fr-0jE#I@Og$-R86~ zge2F~BnhnH@zgyA>yA{Km*4YKy6TOW@#`Cw2+w5_#N5AsgxlLZa&n3+OTse29SKK; zLNHh8v4*ENzIOtb^uHgm zAz>DhD3gKkeUN1duBV(KK>UOl{nn&?F=c-E$}}y5T$D^t;lBKI{?%fGQ8Y zS#)M{Drm~R+uQy3SnZI}u9shW%7chsrn=l;!xSdpKHR6X*GS})AE^x9m`bV3wkLJ& zxk6s=XWF#*$Ri78e(Q=c?wNakLI8rCU|BtOxieLkUd2S z-JWRCB5q6G^7_t3r5|7SFr{)nzqP1dCbOQ3M9eAeE$;iuLkI*P1Vb7~vPM8F7TxY% zM!d*8B%SfzEQ$rq*18TIhz67ZsB2Rr|KNqoX#M-2^TG$`$o{(x(rX{R2eIjgLis+Y zKY0znwCzs-5zV-FGN1p$0d&rOtEsKa6TYwgc!706XTtw2m|+FPNAALBX95Z8f9Q`y z^Xe|PD#MzXscXSafc6;xRUCYss2EhK`}rxn`N<}DE7miGBynd?9vZ>GqX1@5MR7Tr zJ6f1PP=mW)8p$SAR~xt=gV`^*c^h?+YTI2u3I@t|=%mAIW6Z z=&la`5#@6M0EEKEBbiN!Q$f*B3#QbyE(jVtMwJ{PQE1AUJZdUE-D->7;)x`_ti(6mnT2p#(smyp?syG@fP9*h_ zo)5z^kQg#RzP-NI{f_7H)RF>uZ(FCg(2ykhSJ-;R)0cl!??@l|@HasdPQHyeU}D*O zTP4~ZKQyZFzD!!5*;7dEi;nY#Nt-52jEzsEbIC*t$T9^EKHlVxZARd=mphhvE|2`O zUX(Qn5|OAl&GGo*XxKK|-o8v>{FM?(&kt4xew$6J3%;^c^M4~4ngol20`ohmtp8*< zU>P0nUhZH1;2gT?j01Wl(M#@q4Y&OGmsGgliNf=hvJC8zNT?6AVhw^33W+~w)@6Q( zkT4LHenGMQ+l-?R0f4YA=5>77aMrM6s-pFE>aJ{#Pt$sPk{KZhp^*G-a_&_ZANQ*V zc+KY>Rjbz|7yGWRkcRLBLAf-Y@+JW!yUS&({{4az6A5=jqSJqdH7O8ePx2UP5{in= z2_?m%u6CpQtn2XJc^z|4$~SVu5pizU(vEwrkugNV=Fy2x{{`(c?69%XG_Wq!Ic`c}sS;rg{Y_g=l#F89aDLr}5$g4LQpaFLw9Qy<6M7^u^2!8k?`;k%10Mcv?OHgIeyT^&PUgb zTmHGkls>fN9a(XcSjUlU8d5q|@rc!u_s0<;_>OVcXW!aILrk zAYb_`vuS@fDJh41v3J+^AzNE=-!_9r%CL?3B=YM_0=^ZNZ<_(}>ybkP^|iI`OHMXV zpjH6SM#8vO2x?RwTj`YkUHQz?-5CVe;*)Pn6LnDaA+sWZRPpSe;a9!Si%J+tpjG9^ChwLb|cBFJitr9J>Z<}uz zG_SkI&i1Fi|x_IA+(zhu!Ca3ls!s*lAL$l zwV|-_A_PGxSgE9cpx5HU^La}5gJlSTY&mIlO)D4s{F@@o^MFmwtQy0MA>6>1^^;3;JNzHZ=YOd`JY_l zhxWYzPBNoAwrJ7t`nn~iMQ`-vpxr?#BLG?{RIOXzdB~PcIi$KA@#(@FIaVVDP5BwZ zA@OJ0u{hk9^ND`>@9rV(lnuWJyE^v2ZTrRXe6ii%YTGjn`FoR6Qm(AbexR_QiAY@P zH{aS91ywPeb;sK~Es#=#Wox%~PYOY4nA<;X#4H3GyH}v&(*(ssW<+KA5X&w;Bp5W0 z96hS2%yW6V(l9_!zQ>gt*Cs!alP7a3;pHp4=Ii$rq1nx6ThBY+l5L7N+O}lhEQ$p; z3!)6V;~;EXF7teXTk6U2$Y?{0;JJGn zpZV(pTQBq+OhC6qc&Cwm70LDdy7SZ2U z+Zr>cr&Ib)X7&Yh{$7bL|Ebjbk;-b;SNico=oskA_-Qj(UOASFE**?zD$EYk~6MGcHUNCYGFmUhl4OI@jh5LG6<%D zG!gj#Vv&gbTI}G%2e-AVetF$2ltKE1XrT>umH6}1wK7@}(k;(l#x&y!8qzP;rw{GJ z|5=EY_x=QR4H-7ZMqw9(xnqlxbuYJlZd&w0cbk0C+wBRPl~#-ubC#z>F%I+=qmt%@AE;Gho3yXXlh(5Uy8BA%lPD(i;PGAxH%UN ziQnd>QToq*X}-!6D^H|S8k8w1vu`aqs1hZk3i$2Y*W>gw8YIRXr{5<1>it}YVUYr-`a`$ zR+nhS!@6%4ny9p*!kp30XGlTRn!i^@AShmz27RZb%>HS(B=A==9(XETWIxdQcEdYL z$=$Su4PppMn}-gMZy7Nzh~m5CHJV6~|%9 z%u(3%*w0biKY-}o1qg=a-9m!%#-8pieV!ZyKyW>M+{pb)D@*!G-*3s%Yk%F7D|agp zvHgc46sBJbiJpA*+mWyTEjrrWv)dP>(6p|T+QLM`heQ;A+9W#Oz4|fBruVwXrN6XI z{!#@{+V!^ktG4a`o3>xIZQs@LT^-BModOX80rAJJi`qYHtxFiKZ(pH?>=(iPFShZ$ z$4&dv0#y}75t=#yGxw;}+0CA4o3r{o+otCM7y$XckMdNPz9y5>=P89C04Ys!V+F?f zh5L_2C@N=5NjYdjI;~C))`VZH+v>g3wk`K=S7UZgHj%rd>D8-pT}cdO<`N)Z4A3-q zy=h6fuAYLP;%5S%fB*1z{jP*tz#8Fh*IF-|^nX-W=e~ISh4Vt;i1Cz=02Gkw^sf@2 z=Uba{^E>LZvr|o(w{r>SC+HVJHtkRD(ey|`wM*__-oEI*&GN`YYA|F}H3ohACMF^f z6@Jt2NFVy}Hz5Gv%-dhZx>cLF{9CufGDs&I9NF=9)4aSin$K0ZgA+OdP`;0{hHbf% zo7QE~*pQYZX?H}Uq`K-n*|E5Dwq?_EJ#Dwg)90h0JeR*)wpVOw+3<+Jb#b#8KXn+F zA3TT(_q%~35qX)EP}`F`@8F;LkF>>6XKCDw!V4GE{AsOIY(mjuu`;b?%;moLl|(>C{$KhKMaQGrF`9a z7TtPG2+3sHSXoqJ9^SgR{f$JsYqmFgOCnKumSG`5AS;{Ehq{iQN&svd^9$m}x8&5T zSZO_7cdK25q}qnpuiUcfqhk%$JQz$5aEE*o!PO?jqZHkM{LLFNDy~BnadZlAJrd=%D5b| zS4?ks>PogJUSc>7`O(tA4aqM5@;rG%_LA`5(oOF8_dlW&1$KSjuR8v>j{U8+3@s!? zyigwJx%%0lZ5rieVV}Ts#_41E{DThKXp3c!UUvbPZsLPVPe8kx3N9m-W-nuCxJKnuhMR)s&$P|WjWt94*ySy1pvVvIW zgw$&f-LLxNM=zV}A{gQD)RK|uEcXKg;h=0xZ*2X1%6|RqW$V&YT#qdhkYy$Ir%j)? zwDRUVao|OFT8o!-PUBp*R7=YZ#jI70om4Bbj?{~5Rn_4mM`W^UDj?Ka`PEmB z{Pdi??$B4yzKCwVcG(oyQ4ta>!Vz;-sySH)L(<9L*%yypa_E1yI;Ri+U!nJn-{a?h zf@s`^P1!hA4;A?M^;U_dtREf~`$IaV&+3jx-_a3X0}!#8xUj1+cdHe*Om{=V!x48P zxb9jb+vWPPV*C7L(!Z58@=n>?LpoUqqF8}EHs|UWq5)}68d>PI=CZW-{slPup#!W3 z?rEIuMilY758$U88DOdyJOP(J_cXSxuH*X8KA_6V zAU7w{wEn%#NcIb3{kkqHDY11)d5i`OE@h=P{lE6!Gs>>w+8Uj!YKN0U=d4z2<(v~j z7?8*!nGD7znH)qgIY$FFHrYgj$zXC43Lu0~mQW6@&bj;KutTl)LxOvKKYZ;Q-u3<7 z^Bkkc=&}2Z+SRqso@1?AwdVADBv!51gO*)ABER9D{cW`iNSi(iU}CaKMv6`DN~IN& z4o#j?hilH7%|t}kU-~Gnz5PtK@(#{9a@;@qz4#~oiN70v5qeF%_!%IZq;H;Hje*CG zpn?cgCWne0J2ES58#O=3)$PN6|0J{x7#x^VP#I`y-O(!=-(AUvTzM2e{q=fc{vEv2 zZuvNuRR{l@ZlryFK!(^hVQP5B5f_#8zIjiNC=Tjs)WmdM-tJldlnO~sQIKYRZy{PyYf zw5!Pxxnzcf>47I`sI==<*@kYy*4w!J@ViA(L$PSt3K;P=wmcz7O-JK%|9Ele z|1aRE+unxlFic(0w$?6eYwZMVVsyj-A{td!hG@t@C;;qeN^#I9Vd@&be&W&}9q%vR z_5&_@3}XgX;J23_O#s+G?N%+5bDVHQKO-KqpJ)DFBJYPiP8T5Q^NA-i+fwJF%%jcz ziPr^1cu##oDmbpD$pLLmxuv#+Di9R#gG`(YK>B?2hs?(0FEA*e4IE&)yBF`|z=&GS zc3meD)(`6Laz5C1)aw5*!Ta%E`TrI8UaVx_c7$$VcU7(DAQ=reBpUlnwBp?Xz2#lIH5YuOl#PVfZv1`d1DAL$!{+6o7&cc`L zp1{=Uze1|tNLU+}V_?XG;j`;7SZ~1Z-&~FIF$bflIEaRJh1M^=1v~&+KLmN1bp6tssHJx_r#yS{QlQ}EyJ^K z+@ojx#b@t$@xy=p+`pGkWPotP>4*RIn+(1X*wqejFDeh6O)Z~v0(gv1x&3MKXb!Gi z(1n99Okwl@8<8O=A{32q+s+ku@7|yNw|qnYZ^J1U--=|G|6rgwS3ZNiYwBsQVW3J9n)hY}8gi~@jQ_P< z1}E%G>-k@$=3ps7p{RC%&ik(Wz3b(ExnqVB z0wNOC&hG8bKkGGhJ@f6&4$&O;O`fjhT*C>4pU&^=kOf0(2HFY= zw0W*l8$FV=mhJ5hOroEkIR}s?|KaMR|K@`)oxTtqUvC9K2uvG8GT`Hg7u1tK3e9e4%+LRVxkrHn%M z<$K5!0&R^tBSm!R+zfy6=uLn37v3y59r=L;5Qq{4v8F4{l!BAr)w^;~HE~BT)ZRT@ zcGPT2r7T%k6>=M%zl>*61iR0|8GX4zS*}C16&mKibN_)N($G?xRg)03?g8| zt$+7?|1Ace{|xQ?^D+os=i*TVDVMe6*wLl#sE5u)2;i@8{u3>^?UWzp*#GB`p?!NR zW&6?`EDcMKsd3k;J<2=0nr0t5hDk9#zVX<Tuq&%10GQsi?M#XrBn)#pD*2b@2FLqgDpH@q+MyT4ONf5hhdn88F+ zLn;)JheF_M&0)Q3OOyS;Z+=2|JYFvosjL$SYNw?V?z75)_Csn9{Lhl;U#s5#hzY>= z1u@M)TR2Qd2uW*MXK4fK0T^}mFOb~37FFR2_=S!So;sW7 zUHUvVZ`*~G<-k*1fo-o{%*QPJ1DU4a60e8KiUX|cl9sI4i*Z!}-1X{Kn0gQ8d;Mx! zKSoQMX!5Kf_Ou^v@$i=KI_dC}N8p|3wt?dvut|qe zQjDJ*I|I*Mb2pYOeVQBUTj`z~Ucq6fOvi)Iu0U-thyHa#&=oeYeQg7Vj;%r4wl;Xf z0eG?wlI`^fmiNcDKVOa7Bk#iA_s-*KXTD19EiDj^i;}7etXuvK*A1_ zQYR^C{{tqPbL=!6e%K7$|L})s+r1kHA2u1=w(i2{iFL?la{R?xOUNuPK^`IKb^;{> z2ji?WXW)jL|G>%Jdw|ZRD4BFM#kg1#7;#2zJnRmL;v9|@f%A?=NVfgHk-}1qiTu6P13|iW{;qw=QV;vw(L-B|a zs0j(QY~6$Hdpl4)v=$R5mE)@~cOn_LQPIB)En6BuRvez&|AFK0oI+?n31uV@VD_1hp-1U`Iy}+Lk_vua>XGteMvW_=ZOueLto9nqYW~F@4S$ z967BJ7u@|B4mz?QFMGF*RJ;o$bO?_Z&5NJsNMt<4V$A@)<%tV!r_~><2P~6mLZ!U; zjKh`WKDlJ|PIv8#3qeFU^r~mDZDT8t?ttShLu&7H7(eAWSef3xu5x_Lb$`VBU#%f$ zyUmedC0a6LImN}E1H0O-Ph1!Jze%rMFr?2fp3AIB{R|bP4W3fq?)cNS{ELTQpch}> zBdw(61dGiZQYm%&zCshr{l}*E*(w{3^WXRAYb~be)<@ksMct%|ki=WJLxK2^O ziBV8zZ^-}vI(tb(K~!NHKCf4Z%+jZ?Q?t+g4UMh#;+fAIVE9e+B{EzUmeig$h?p?A zl=<8%R`Kud7)-nBi}{!5){!9Mm)_nY`g$|U2*|T^DZ0{q_6Nl!9vLY1tL~mYE(!)h z-Lc$pkwWhQu{uu0Yr3wbQ^_24$6N}S;Pa0x!hM&mM`(5_(wd*ELk>0dC^B@x^Tzw} z;qn+MhVuKgvCL{vJng(uT^tfEy?rcoADPnP)OwrY&(Z;7j^KE7BTeh?#S33{Ag^?) zD$#jwN1V$d0g?fSgG!^_tu_HbRe@sN8^VtIcCIan2I6V!_`$W|*Q6#hUQ?s?&IGpX zN&st`xoF;C>h86uqSQy7y=m9)(dOy`Zg*dPsjiVQd?x2@rm7;1n_GLhBuG?Z_Htvq zjF`c~Fa-&z3|;h#C#{R5P~`+-hvnYta?is}-PxW(pCO_l57*V^kWLq(qbJF_F35F( zjq7*u@Ueqv%0M4pT;79hPElnOU?-;lVe-l>}i*F7rv+-R6>FLZK(Y9Z8pcrk`e=Ud^qa_t1_On_LDe!-^X= z=lJIP&c?FO7f~vmK`z&a9i2K!DLJz^%@xDjsH>nBQrBqO=l};II=_3%g*5GF52LP1 z$J^h;kj^ocMVVU~dil9K567#Ec2iGp25s>yA2wq+z4OsGJZfkSwWP8bRa%H-Z#!iT z-P^Aunky_Rc7J{0kyLWn?dVA`c?{-xBZ0TxyBQVL*C1Qs=W5-ct!)koB8;jPyt7#$ zmvhz~w*{tLGL*~^%zwBsgNIf4a@X4AgQ`MD4uyk`L`Z7d? zA;F!Eis8xAsLCISM88MfN%vyU_Ffbf_^EACI~(H*HOpZq6xA+BCe)+MrSKl@lyCZ%?J%8}>z{{Qcp|eZKN0?X0Ty z9BM|q^{K{~-t^fm_L0{=MPGi^sabj3juhxqy1T5GElUjmkpr;&!QAfuZ4w3kRywct z`!Y$7mlju4n`d}KqPerl(YiLL93v`e?93{J16~Gz&+b3#?}|bJVE6&&(d2^-UbeQB zh8;G9o4;!D?%WxF4qz!Hy`poz!y&W6vDFotCZ4ryJYEnr^-OO@#n-ee)C5`ggoGY) zk@==xjh{V`cE;0`4|+&*yl$q|LwZp{1d2?!F1c*yrwYO^;@eZdfKQhtC2cC{RETV6 zMj25r`@*J3_2x+T8E&w`i`cf94A%HLnT4t>^i#skVOVVxUp?|Zm>(3rnXi6F%<8i9 zE5_b+#v4xI!E;1BnRg9=8ANnXdift`q4Fp#T;WskUA+o@J!Fg@D27d|R2x2P;6S-w zrrY|M{f6ST_d=g|y)-;Tc}qC~zqpK9cv5NW9z#kax3<+rMU>$)g*U8Ic5{zgI=Pmt zohy;6sH1$!DxbmaJaMjW8iJr!o3KaX84XNjBWgD7!T?+arlXIVu0;cut?N-^G zJ2~5(k&yvmmSn2Z(h#j!`Yj48D~MfJg$m4?j`rNUOnAr8?8O9v~uRLI#UGoM`V=it^;w6_lrhWI<4+cK1-UDl8P) zh?7$I;x~v)xJ1+)HAt=e-FldM65)}xqOU8j3`en>bVPVy5FMMF6%}d34eL?3@GJB_ zcnUa{XJ|q4k1izP4=X_^W>$}s7kJKW>a_A6UCGqm1lu+%Zp%<)N`)A8K$-e_Su6WY zDN^w?*wxkG<(Hy<`mbfQ(npz$f%^?LV*P9|c(3P>j0`-^Hlhr8_Bt5Ep;7Yag12!BA_f5C+n^Y*;f(vs;U*|omMf$7qenbm-! zI37#?hY0lI`(DPLqz`|5x(A)Dmi+LsE!ep;{(FG>GR@iJQ{Tnk(!7DMH7Q^9di66M zhtG?KwX%Y8pB-Pf(+y55Db!27{YOspXqjz$)TDE#ie9u}Sa~N#_VW{K*?wAOZ-K45 zZeK^55}VtgNjcfJiJ>#1)fz3l>>!b~({Al>zptdq6NrZLYSQ=sSa+*pU8$<+qjlE4 zEa&+XJ#^98V_5BtVcX6a`DRV#@a$J63gR_V((*^1YCTnqpTzmBtvOi-nl6Va<}G64 zDG-%K?1_?34vh{B&Ky+0Cb_v}8a|&CPc5l(%d34-^XUBpb8aee)^xfhWgg|O8jDL$ zv>!X?h^G5hvmsp8tjM&gh+g3vP&>f%cp~h2s!4P#x{~j^=VZ#dx(XKP(KOszZ;dYZ zHmp%~14E*5b+fAI??qQlK!P(ar_vBNx!7?S{b?a5X;w&A0 z$W-QSkFsJCeHn$$j{Gl4icfP%`<6t4mdBUv{xbV*`*{TDYgfb;96I9`)Qu_EWZ!%o zaL_QkI^kN{^u=ko;}hOCo;Y7;DA#rWImU6%f=Ub%li2PkuhmiGmN=)V^g)KC|ng= zHdQ4YVnZm=wxt)x92ix*-`-NG73$ST|D*)5rR#XYFhB9u$8qKSDw|@RJNH;Vn;FbLX z0|T>)i(_3$H(^9kOqNJ4-Oe{%FcM~vY8vM~Qt5nGX(w$vD~~$)3V!@Ihfu01t0GlFKZ=Y2;lWqw5!)SK+++euAy6L#=!Iq$@Qi7?eLxC*4O~2gbjgLEX2%svBr2EYy#U zC*5z6a`fVXA-8c|qblrQEPKA#&de`yWX`SUr_<_A$L1(NYQID2z82LVjOp;>Y50#t zmH3Z*cKdGyckzxjb6_5P*QJ24l z6L0?`Uc2Q!iq1JnWOI(I3pLYr_`Xr&qX(|~{d%kL;K7OtaURHgU4KYm>Odovw=KV4dsGv&E}M6sGCkrrPylg&@IE$3PrM8xbjq*#*ps_CxU!v_SDY4?VJ-+ZuPXR0Gu zEUs3}vmKBvnS-V&@3C$Aytu#!q;t;Y1l69(xCc54r4oomL;AV7oZAx$n5A8P+4~)! z5Fl5Biu--WwehsuBEV1Tnpho=<)73HsXh1L0q)N(|3cUa+YMLvPjwu1stwp0vl9+_ z-b|+KQ!hMw%rTEVw+0g8R+ll#RS+5>ubbLMDF>}pg{FA(rC+Fn=0B>%Gq&T==piBC zq~q2?*h&@_nRL)7!&$P;ni=uwds7hJY{ot}r}Tvpll4^AwmpVCRS0-H>f=wB4hY}q zxGJ5=yR%%7rZbChz*yKD%WcW!6hcws2<7tmHX|>Yi%eZUp2?~|89HkH9(hL2&^2Jsz`+GqcPH~{E0wRzrkq<`aIs=CbwXb#tY4YUxNRO& z8xQaiIm>Z!7EfW9%RB~MEF_K0*?hQiQLYKRmrB@on||^ZM}1eju1dwz_6!$7*O)a; zimREh$>%r5IXQbl_lC?(WfP-%I+wLI!r8hmZ9{YO-F@m#2D#w?z1g|C`}*+M;)5MW zUFCq%66_a*#i=h}xyL`}Bp6Z~=velxHd7D`Ll@XG`^bLResJ}dcH!LGyF6a~rC7?^ zEWu~HE+1>N=#x+tx-Oqdv}Im0hG#@LReY7EiN0jonrbVmlnQz%pr2>i*yS-vh=1<| zZdr6YPK{P#>VyIO=a;wAhRq2s8&{-&I+^+EmJ5A;@#KC2b@!7NfuLN6BYnNjJK=~n zC2!mI1HWz3`d3Jb6d7DMzSuP+X{ogHhvH)W5ZypM82h7IA`mOvLSzkEqZR$$uK=?`-E6Y5_oJ$+Ar^lz^J z*tH+M{sW=%{;n$mLrNV`SRn+$% zCQtwvQd&y4KKLN_b*3rTmxE$Dl8BbBS=E=1%pIf+J8Y=k_UTgY`t(=Mu!5+_rtOE~ zS*I~DByy+gVo1QRou~)=M=|hzGG*VFZMTrkVpckbU+Sj#BwAt~9S(XfbX@v*zk_O` zlcp5iBQGHSvLjQv5i0??p!SRI$R3FjR=ay!3 zYIeiQ)((H6xZPz;jF$OMG9tbOx~@HtE84GaXjIx^98MqQ$W=$G!voSWYsJBUFt zm2pq$Y0aXmFZ;Y=e3wh|;kiWqD3DC$@_c%(H36 zZCjlY44CI|fR6Tf#SOWY>vm{@ zw}1q>Drzc(L-eTYs(Id^_J>Hh=RjXndOxjCBkj;Pjd8a*owny%mNK$ENwjrlL^5U} zopp{#q#b)=9qEa*x>S*<_JGf{64sHB@{L&1{(Y{)LNKJyw=H#*u8Ye8QO|LPDKE@h z>YC~*Zy5<$Vv}~YkSH^~=1+qG&(hAG^lL)OFLLWP9+m6P%x6aV&K*4qTUK`?opfG; zpmo{h+m6XMCg&@r&RER8zpu|mJYi2tX82OW5Nk<_qaoyDIh#Hp34PS;^5a55{WPWM z!EFk0sno9HxHCbw8UTVuRNsf8F_c?E2DaWy5S+J)~>m>R?bmc0j-IiHhl%(7|U8nSWrF$I3Y` z2q`|u3IEZ#yqxbi82sgX4OamlAd^)`o$<4gJ9N-ljzed7!{#pnLyJx~bnP7x#8M@B6*-0A{uqf84J1v~^Q^cPAe( zr@##eM>7N3(#mSzG@oC*LE?LJJK}vrj9_1{^RmCj|H$McioGMIMx5Zpu*L@Au=xXA zA8A^{hpi3SulJl*Qf$of_~_3Nv=nfH$^rG~gKK#x|{ zc*dtYtQ$8iizQokW#z{!FLj^()0rw-TtFRZhriv>!tdUFCSJeo)c@42^O+cKx?}|W zKAoXSC|8Xzbov&Kf|a$^wiTTObaK`F+pu|WANit&6Yy&znQ7X9+VDl4pWKu^Z@@5Z z>_N3J5-2cfdPRwHdAyrk{{56S=(&jj{6ptk37%=L1#MMtIgh7=j)XME!Zgoig>+O>X7*O0shfqoogE zj8u-=ss}VkNiSw|?%7$_LHYd38YwA1dPL#wRF;!ozc{X^rDI!BrpKG^iB&3wb>N(u zuD(S6Rw2aW>9o7V*Ok=UdQ@szu?aeAmzzedN%8P zY&+b#aa(+pH(>U6EW5tzBliZ)6UfN{QAi2n9W?G@Y_}+8K?She|27Tb8=0 zyT|$5NgE4i&58;^fR%SANkQw$lt3)!K70REO$hQA6d21It;iI1)z03}j=7@mtBuLZ zNfihdXm2rKjha?e0mT@gm^YhI6Vep+$QflGCMpg`yo=!!f*AflKwn&Ra1jJ^EELp_i>=?Z zA!sFi9UbvfR?LUZt8V)E^OkFvu`CgH{^ZFg^^y>yI4W-}2=Et$hg>$91a0Z=N$Gh@ zUFh@Z=Qv5#-n+Wph__`of%vQ5-s}M*PkE@E9hHY`T{SdN-|aDVkiaf5yATp!!rvrO z09f_pg{ZIZMBcJlnv&viTNTVMW{f#cOSWYe27=;Bq3yE^-@iw>pGZ*@>vP`^`ou|2 zR@p7}ELGYoH*CmrV&&aVCYG0b-&)m``TBtqA`$I$%|HwGwRr9`cvNuHX?4;1HOUn_ zH>T$#`|?ijx){Frv|dJv4OMY)4Ll<&W&0<~SLRkMKc=`?o9gq6-)jc75<%=ERHFT7 zpw+%&6cqL;%kSIVr0*rFz(1tufD4cV{$5E61h6DY&<$)02IS2(wcfFrhU|q6OS|{@ zLQ+r(Jv=)t#VoG^)ZU&-M887Eg2>9@gBP_)U2BEw4SjdG!mM7Vmu#q})V0QzQjb zAfSD6>`h~E%8{R=sVOy%8Qj{EOC%HdiL5xkdt+bBR)FoOi9WCPoH4)$>)Xw695vf< z+}A7z&`h0O5J-|Bf}Z#jvu0yQtf4+8ckPK&Dz4Dk zYt3>Qy?tqSyQXWp3n*nd&<(*Pu%R#J%v4HML=L#&*$o}7FNJ4Meko(;e#=nViZas~ zGcJNeJPpfJ1Xpu?A+IpoK5S}BDd+x7#yX~~ByfV`@K2Y2*>`?sPZC0k5~;~8{(ue? z_?=(gwx++UP|)&icYQiz|3o*$TQvF=HYShOt*#7Io=t$Q_RegzYx77+d~Vohyt1dW z`wwBi_JWi=+~*bTuFGR3;q7q0AX3Y31%pOIi1&OEFN(^8s-mJ$?p>4K(Yi9R7R)mU z{AV+$p|C8#wgQAu5`@&=lvrnb7jeY4nGCAc*XYJF&V@5s`bIkj44&Bd8nOCm7A*&R|d%XG^fru9+^<^|6iN6={lq6yejfvU1LCd;#WqViE zS9^192@o)Z2wTmtfs(2a|6L>s039FQgnZUQsyE3#pH8~pRMmr{uJr3#GHE{;EzpkE zbm{=J-0vYzAh7GOzpu~zQ?SBw#|v)`<-SEXsca^z)f{st^~A|7m^&KADPq`;(hKI@qkXfa(^&Rc8Xdjr{+B*Uon{bx zrY>Ko8&+^-xWN2Xq|kFsS()$3sbfONw{+%9KxB1%+S>HS_3KP0uOKurl!Iqz@GFSsqi?3Mtl(9v2PgEcHB*+#K4Eo*u2}d zGFJ(R+oiyc*jKmgWW|#;Nvk>UB9*dLlv!KHH! zu3nw7+)=uTrzcJ=zBCv$uMZb^t|=?`UR*c8f6g1f+nn9ICMPyLx0PC7Tgq;hp6iV} z(~ap>9Mnvors>|c_H1LoXFO!vYQn(D(I_hp3>+NZ>{^bYNrtA;WPm$RM>EI-ZrAU!LI(oDhf}e(K;b0n+P}P0^qM!H(wj5DZgJk&?DQ`TUlk z>#BlL{q!x3+hbB~>xCw$3XDxMsDmK@rb$wY@0`vw6fgvgCNtzkIBZIh%E7U%Q4Az{ z(zffmTgsc){=C4k&dUeh7WI4gKE7+nXg*_U?4+mNT%!>O>h@V{4o8RrutIq zxHAV!Vo_IF>Hh{q6O_Uhqs$L)$YY;;Vy#rn{d|7y8%YeWA=KYZqQJg*jU!%kZ0h5v z&!9m=ig@0Ffo@UAr)RVF`~CWRrkGy(bYE(||6no^l!8ho-0ROhbLR(xkH4$dab2fn zcSeSNkk7nwDj#%6wQF0@wyo^yNoR5k%PP(BzJPp0>gWTY0|ezjW-|PX<|b=xAfVqi zbVgAb2f|KwuTt^m4h~j@N$65`Hzl;L3y1XTRi&?2jT!Ru1t*q`Dk;;a27}^f0l#*i z$BQ?mDZVicY&T7`YdX3#16>}IS`7o6HIMkx>%+^zki0Dzq@UK*c*b3E zZU_5@-b>zq2F zZdZx}zkPC5ZJ;RZ#4^fmIUs+0YmfEtK^M=ANI{}y$!S}pkR6uAQz00!?!3En(O$$7 z)|ISyBR|-6q29fHZkiS9q|bMMR(f_P@@B%a_(a!b zrBA*UmZfH}!X~K`q(+dMgr*6wlx?=7M!-Xe_u9^%Ufhnpgu2#s5PbJfd$N90>~(C^ zeEw-mqIpMeL*tgN=C;<{ZH-Ntsh)}uR&Q=2j42nWD+e+6GUFJlD~V)VD(~!!=W|VQ zWU|)r9*_2QERij;T=s2R)sVXAu8GzGheUmjL)U}xH5wccXq;(kwGs-wtywF-;zhWQ zT4?I>g|J_*SBzkLyXE>kf!*Eh*}a{u*{L0!`71Wnr=zaRp_UJx+12!IQttRDl{AEe znX`v`y(wh!%8DNJ;h3(tdh3HnE{-ce<#U+W(V0cp=S|jx8Re$qsEc$>JQqt^!=xtm zF8Zj;^3-3VN~e~Fl|o5jfp<;z^E2V*95}Wb@7ih`G!symC^b5ike`wC6i8EcXN8k0L>&(*)X zrKSD0m*F@#-FDO_jGPNWBHq)P8|85V$mWF=oPFEG`gZI2lkS}CS4>q!1)lYWAsTFl zhoi9x`IdxDVXyn?CynO=oZ9i;o<_x7=k>|2b)8{3Qq^0E6B-%p1hu;~1B#=YIKx#0UfPV5sb z(KH=B!Jxe6q8lonyKrtlw|eAK%~N7ZmEAqdqC@wBwou8dvuZRCToHPtNDn zDUO3tO0k66SH6-ub(*GlzNkq1L*uH{cK9Sg?`7eJWKsWsAO<@2B$W4=nQqCf{?ukZ zfwSI?;fZ;F#MSIYBr2%!@af;7-sE_<5K{zC^=9SaAYWM^24K76HqhI~EkmvWf;=vd3;1w(A z#(M_i(T@`%mULXNS6=NZbrKPMM8q*b3}VIOO;cVZO>JdyiRNqAoqm}J9RZV9C37^B zh=b`>J;wzn7d;~-eV$4=FFOfaKKIgOTzJ`rF95p8VzJ2p|H&oVN=8IxZ@vLpyeEPe5QOsYP2Ys zu%6d6q51s=VoBTad&O;z!`GFU=oPV~t7o$6V?*Qjbcs%AvOJfk`L0q+a*dfwgZ+J% z+72%WnEZ;RsLXLNKNK`ib!@vdn{#i@X4D}A1_VN_?b%nDxhdqus+2{Om{E}VYTXIp z+2f|V3b&{BCJxrh13H-PKtQ|5bul2>o4djnF`s7U;-Ci~dn@4xH zCwjnMLcu|;$ zTW^rSCcn;`QWhOTKtA>5EvFMLK-Do*UMh!{C-piA?L@p5AWbn2ARu+f zjG|LEZBLl3UDjt(@Je0sE@-m9CbXl{Y3pp^s6E1Cyrmhm!^ztXhDJ?k8}l{7T8GuG z*)|8k3gcE+VMf5j>lqu>O5xC=BH!_uRBobe^SpRh`kY8{z_6JtL&IUYynCa^;MWKw zayE|BH1UvQtJl3=?SM?yxnI}#9YazqW6=>(;!DfnyLICGgeK#Op6s2WLK9NZc+28n zr`OgWh-i&K>m4dQ`{s8%rg3XJlj|a>zpm-D$H_Z;O`V(47UoGQHUNBcb)`SLcW?S* zP4aSGM}teEmKDt+#W&<~?#HGn@60)Tpy{RC%L~108k;lk>pCws1e%o;6}C+WJBq){ zwK->nEA?Y6N8R8x@SvUXW~zoe7g<)wn_GKc(I==aQ!|X;?8V zN1?epP4B+99d^dyvVI}a-I)=>gi~GZuW4(|-IUAm>B?baKbbmUND?dzLoALKXxFuG zNv%Y0mSnXl%)$VMk10`2yL&}{QFD(Tgmf#9QM0yXj}Cr&AxUz$J-2g|Hh9*64DKL#JFtMIh5EnAdQB~ zGUJ_sAjLa+T-n;^xY4i?OvQ313%0A3Fm_ZF`(A?~^4VB&GJu_(J;}aP77WAIoxPs+ z&h(g~h<|;1cP{VuYi0GF_Q?UCzNFCSbX$%JB(wI`iqZn%*b>`z#Z)AsjdiX3kwj3^ z@P5))pIR+Tb@}sS2dH>onm6x`Q*TDHu+oztXir}<+Y|Je6bzb3#?lPZ3Q6$Dy0L}J z*Q~W1jqKreCVRY;WSd^k8&Xm}nRRC_JbBowH=p;F8=6yi2<+TUMY7N5Gv9C=yQwfp z)t2Q(ZN|p7R2U=ri_qTs_^E=OG9~qG6(ydxa=B#QqZ{S9yedoN?R8a!L77NqT_G{t z)MchGVeR$!!`_~r)M)}vKcRb8jvZ!x{_W=60oVuV^75~m8-m98|u>=#OH$dk|C(%8AX-`uINo7xLI+tVizJ0&cP%|DqE zee2O1{LY~l^pd85^La)#XHh7ivo9cho3>^az|9R|mX6Sk^~W6_`tZ3&I@u@!u2cKE&y==nz1+;cb66hT$vMvD!udz z4$hu<1i98EMKTtM_+Gd3`DVZ7)ETB!dmH+878UqJENxP*Dep#x`X{)#+~ELh-SoWQ z8+SGZ1EeK0juQ!*!<~G70qyIISsd??Ux&jz2>X4bZI^pp=*@~Pc)WTUDYv|-BfGA; zUmf{#d$bH2lN!J5`#LhQ$c{wbW9HouyWZ` zg(S=HcwY_}S)U_ihgZs2al{{7vom#yX^MtK%>6hR(g(4XIavx)n(2941E*3p&;H4z z$SZ$d9CN~9F-7IG^FShRdOUx&ENg2;ku1vOtlGwp_N)f4qeHL1$XLFt>qMdK!7Q}K zaM*mqZ@Mv$E~AN*HMsu$Tfd!h#si|IJFWzyzowC=p*glS9P*0M!-xFmNE86B{N4L_ z;KIW}M3{2&1GG10AyDYyfLEtITl-|PF{xlkRWjH=E)lb@wk#}AE}Q$)dXz zdZi$$KhV56wh11MWVb_ltye_Ke9G`h^z2Crr(U|b-kTX%gO%}3;>`2f`*n2W$GR?! zv@Cao>tc)~6thG^YT|33$t$Z%wdLm?TK?SyXB=l;a>t+Ow>MnCM_uwbEnWE~5~xE( zL`O-P2g$4rAq0XR!mizEBmh+A*I>x3Q?dBLpCT~mY@Be;k!Waa##C3w;@G2n0RJx0V24;o`*9Ixqn{rD>l~X2e zkp2d99rI*vPt=+h4;Wb#hbP2Ei zVlh4b*5kbB-OuRLw?D%-AKuSIgrP@2Ao6`S1K_esKV5O}I6m!!&*jLG5v7?L7iV`N zEemPuinlRo_;CI%d>~CJH}FM^g^oX*fgwlThh59t!M(p@fZ-qVGZLYGAVus3a)7&E zWro1IouioLqvC*4UCsG@2g?8o78wLlphi#g(Fv0U?*CJYT07zlfPK0{I$3OW~niy&ZliL zVq7_|e{TaLm!$CQiO6Lv_WMZm_2dDNV#O5kXdq2+wl6P6POCsmLm&6Va)7JY3J0LE zA}H1XvJHvHG)-(zrmTlqu@)!`xGA#OA7@!q8C3_3w((aNX4pqvYT_MKJ7Dl@&p3T+;v&V3-+-Eg28p9S9rKQ8?o=oN>-9 z+;!%mJYdE>2$uwC@9GBV8ga6?6tL`lwTWXamtuHYA3 zIk(^NSg&<<-r^Ho2m54E_FIk!4H?s*x55$q@f&Zi`R2ljXY5P=D}O2U%3@JmVzP38 z&gL|!xZj1FXZR$=R*c86GuNQ1K;t=MBi@(4&X#p|W{V4aZb5k=_Z>C4@VjuKnQmzZ z-M{c$KIyt=DI6}rrk#zbD@vm=??>L105NY}niaK?FC?IF)q~4e#^O-z(gC4FbGTl_pW+_P6w#&B5eB%6m*x8VzoqH3)&~=r{DJEvo zo^nOdr>oKskjc5QELVX*j6!&VVNS-HM6x#{iGXl{&u#l4pb%-MQHMs6NM_k4Nd#g; zgRmXmC#<0q1^galG8TJGjcmsiBSw{}&%WyB;vkFe&a~8Z!IhOk)f2a1DW>FSFT2bU zDmrAINX7F)GbNW5=qjCeRdLj$bjBv7l+yfR=$xabww!cwHu=InRZyTSWm!xBDJD|N zQM_L+$LH0BAc0iQQc=G_yPLAo&N~zgd(_bW9<{#Sk^v8?u!j&TD&Sn+;-aXJYD$8< zYJDT+ESGiRP-}-R?YvE%fT=1AB`cvJnaV1#KrohltWEx?PYoLsRRM#zz9CLK z-oBP!TeA_DU-uSy4QAo>Py6_QZyyAkE8wA73*&K}eROd%a6pcLqiS@_u&u;#PmVVPAlCg}ChUD^6 zpGsx&D(sg+2!YO?l=2!HwR8u8q{gNF(<0uNl|l*>7J1yJWJsYWL2v|D4q(ySm6b$5 zFzR!MeRwT59(fx>2;s5-Gb?ElOkE?}a;Tvrp~4kIX;M`hi@t2p?C*N*(3FY?rJ%R+ zj{3^fq>)YMIT-b-;FrHqd#Buhu$S3~wCL_mOQ{Ktgr)2EnIsI2*>YJ2Q+=-^4MTFQ zkKOWO61`n%DJ7w>&~#l_Lp-m<;?H$=@~8xt)&@kpDDM|ka)b(nniU}BFK)v0Q{^}>8En|G(QI~7Kc9-z87#-X4S+O|1% za5kroupOSGT+}GF&nJikLDR$!^&n~8+mHWWX*)6SxO*^mNFkcL5=g}o zbjYD|@%GCvV(;RoVGcWkHRPyl*aUDOh;pyOfDx7W;O*70IyS>oHkPffZDbY=L)YRx z&^_mpfXvU%FQ=2O(KUxnkZT!`w? z$Ks(gCvm831SX#IFf}fF61$eZj;W_Sh;@rzfWQAF^nUXcKoaFMeg;^CRO4#Y&NvMX z11peQyauxWbXFTaB4L!EaQt92KYuoe6plReP85$Ci5H(+0#n*B!V%85E+cQrFw_-0 zh?X$kfASg3IsRPqA2|)r-0~%e76FX!{EBUCS`(f|s#v*IMMe3xUi| zc>Fr6?ytz#Zyv@E>W`CdpTnnB4uu3jp3^b#@FC=~7p?0)hP8JQ-~I62bk`lPqiFgG z7}l=Y!g?_osLAtB*fJcYoPMeNu=DuWnL6()eH4!jPz;;>d-Rp3y42TF&};%$qbpx%sw z!H1p;qi+L_Jo~4(>!SN%_tnFQRzUiT(f8#Y(2FNS44;o;kBzpaufWYE0MxR7!fbLf z?Vy|uSu+vorPrXSydRFa>L$FlC=b2yEnKnbGrnv5xuo1GR1K@c?nQS&^le1#*o)B} z(BQ~A*zMmzmucp{RTLU@FoKyda%u^hRxiPzk!ND`OecURP&IS{4!iJStbG4v z6jcrQ?;+9eal$pvV&|qjDr&ueiqo3rBx=ZqdaVa;$`q-VEM30&PUn@=9->08pFM$m zaMzaJvDpkJxh{`(9E?ydk6?mV3Z!)%Uy~3k0$#DCvP9c-)a>MV;E(s8 zheNJ=9pA0{7H!EO1{Vs%S)*jqMbvaqR;X~qq(V+)lFSsL1I7VW*!|B+;MZ+gU zdPAg0u&o42%`6lHX;&wgCAY1d@?#>HuK>X5b>%*1^yd(pCaJzUp8|FP5Q zyX~8xOARBd%FyuLSFGy>3||;EvyO%mIsgVKmsrUCo}T*-=h|wYU_wBG#ty9HNAAB3 z(+@g=7A;?ix*-*qFk>SB_797wqPiM#^Cv8}e1wt-^J$k}jgpWbhr~W%_w1KIsYdF{ z=x{;2Wp@*5J3mCTcO*45HDk>DGZA0>2OhcLm&m31KupBMOkgNx=#nUE0^P|RcYOW_ zbcIJyOHUuFWeRD{!?Chy6p;?k88?tZ9-TEQ;J7Z%IA9PiKJ7T1_}!a$XZYdh>NlTS zd-^bD)^vafgu?Ds><&MfT3rpQUxW`FHVA=el$Jd4dk7^luOxxbe{~Zd z@X1zkZ5Q75wcNe_J!BGnhz&fNcGP#E@bCp_K7KqOyDo;p-VW-`x#(6F7g;{IrVRqf z<$rN}yY{G&_-`fA?{UI)PvNDz&W1r%_}MM@P_{6PrSGmrM^d7^#6)k8C9~}*#)2|; zz`+sPbLb}A&v(JI`VZo@mz>CsUD*=OShZ;jm9|TT0TYRV)t;>v9bLD|V`S3}ogP|v z#vy#oV;|Gf^)Z;pp(qS&=muI_;|Rb;QN+eEQ;YD#Ys=8PaU+lX$!vW0;j{l}4*Vzn ziT@h5{7297FLBsiK(kAb#0U{-Qo`v> zv9BUPzdLt4@9D|mj`NQCN6+#%91IP9w5lduFp8vK48%*;wL-njSdfAlo}0w>?}0+<2EQRMgQ z=xIwpdcE)lJWz_6mBQ>s^; z|HMD>PyGKJ{|_^He^}ERxX}Or03~!qSaf7zbY(hYa%Ew3WdJfTGBhnPF)cDSR4_R@ zH846gIV&(QIxsLR$YGQK001R)MObuXVRU6WZEs|0W_bWIFfuePFflDMHdHV Date: Thu, 30 May 2024 16:42:51 +0200 Subject: [PATCH 17/53] folder Crossval --- Skill.R | 153 ----- full_ecvs_oper.R => full_ecvs_anomalies.R | 10 +- full_ecvs_mask.R | 435 ------------ .../CrossVal/Crossval_anomalies.R | 6 +- .../CrossVal/Crossval_skill.R | 19 +- modules/CrossVal/R/CRPS_clim.R | 35 + modules/CrossVal/R/RPS_clim.R | 39 ++ modules/CrossVal/R/tmp/Bias.R | 216 ++++++ modules/CrossVal/R/tmp/Corr.R | 484 +++++++++++++ modules/CrossVal/R/tmp/Eno.R | 103 +++ modules/CrossVal/R/tmp/GetProbs.R | 353 ++++++++++ modules/CrossVal/R/tmp/RPS.R | 408 +++++++++++ modules/CrossVal/R/tmp/RPSS.R | 638 ++++++++++++++++++ modules/CrossVal/R/tmp/RandomWalkTest.R | 184 +++++ modules/CrossVal/R/tmp/SprErr.R | 227 +++++++ modules/CrossVal/recipe_crossval_ecvs.yml | 183 +++++ recipe_tas_oper.yml => recipe_tas.yml | 5 +- 17 files changed, 2890 insertions(+), 608 deletions(-) delete mode 100644 Skill.R rename full_ecvs_oper.R => full_ecvs_anomalies.R (74%) delete mode 100644 full_ecvs_mask.R rename full_crossval_anomalies.R => modules/CrossVal/Crossval_anomalies.R (98%) rename skill_full_crossval.R => modules/CrossVal/Crossval_skill.R (88%) create mode 100644 modules/CrossVal/R/CRPS_clim.R create mode 100644 modules/CrossVal/R/RPS_clim.R create mode 100644 modules/CrossVal/R/tmp/Bias.R create mode 100644 modules/CrossVal/R/tmp/Corr.R create mode 100644 modules/CrossVal/R/tmp/Eno.R create mode 100644 modules/CrossVal/R/tmp/GetProbs.R create mode 100644 modules/CrossVal/R/tmp/RPS.R create mode 100644 modules/CrossVal/R/tmp/RPSS.R create mode 100644 modules/CrossVal/R/tmp/RandomWalkTest.R create mode 100644 modules/CrossVal/R/tmp/SprErr.R create mode 100644 modules/CrossVal/recipe_crossval_ecvs.yml rename recipe_tas_oper.yml => recipe_tas.yml (95%) diff --git a/Skill.R b/Skill.R deleted file mode 100644 index 2c1f1d6f..00000000 --- a/Skill.R +++ /dev/null @@ -1,153 +0,0 @@ -fair <- TRUE - -## START SKILL ASSESSMENT: -# RPS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") -cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, - ncores = recipe$Analysis$ncores) -cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = res$lims_cal_obs_tr, - ncores = recipe$Analysis$ncores) -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) -source("modules/Skill/R/RPS_clim.R") -rps_clim <- Apply(list(ano_obs_probs_ev), - target_dims = c('probs', 'syear'), - RPS_clim, bin_dim_abs = 'probs', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 -# RPSS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'probs', Fair = fair, nmemb = nmemb, - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, - sdate_dim = 'syear', memb_dim = 'ensemble') -RPS -crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) -# Este no sé como se calcula????: -# Aquí no se puede porque estaría incluyendo información de los otros años -#source("modules/Skill/R/CRPS_clim.R") -# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí -# el resultado es igual a dentro del bucle. -crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'sample.syear', - Fair = fair - ncores = recipe$Analysis$ncores) - - -# CRPSS -ref <- res$ano_obs_tr -dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, - nftime, nlats, nlons, sdate_dim) -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -# Corr -source("modules/Skill/R/tmp/Corr.R") -enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) - -mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), - target_dims = list(c('syear', 'ensemble'), 'syear'), - fun = function(x,y) { - if (!(any(is.na(x)) || any(is.na(y)))) { - res <- t.test(x = y, - y = apply(x, 1, mean, na.rm = T), - alternative = "two.sided")$p.value - } else { - res <- NA - } - return(res)}, - ncores = sdate_dim)$output1 -mean_bias_sign <- mean_bias_sign <= 0.05 - -# Spread error ratio -source("SprErr.R") -enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) -enssprerr_sign <- enssprerr$p.val -enssprerr_sign <- enssprerr_sign <= 0.05 -enssprerr <- enssprerr$ratio - -# RMSE -rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - -skill_metrics <- list(mean_bias = mean_bias, - mean_bias_significance = mean_bias_sign, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - -skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) -# Compute save metrics -source("modules/Saving/Saving.R") -#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - -recipe$Run$output_dir <- original - diff --git a/full_ecvs_oper.R b/full_ecvs_anomalies.R similarity index 74% rename from full_ecvs_oper.R rename to full_ecvs_anomalies.R index 8a08fef0..44c7e57d 100644 --- a/full_ecvs_oper.R +++ b/full_ecvs_anomalies.R @@ -5,7 +5,7 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_tas_oper.yml" +#recipe_file <- "recipe_tas.yml" recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) # Load datasets @@ -15,11 +15,11 @@ data_summary(data$hcst, recipe) data_summary(data$obs, recipe) -source("full_crossval_anomalies.R") -res <- full_crossval_anomalies(recipe = recipe, data = data) +source("modules/Crossval/Crossval_anomalies.R") +res <- Crossval_anomalies(recipe = recipe, data = data) -source("skill_full_crossval.R") -skill_metrics <- skill_full_crossval(recipe = recipe, data_crossval = res, +source("modules/Crossval/Crossval_skill.R") +skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) diff --git a/full_ecvs_mask.R b/full_ecvs_mask.R deleted file mode 100644 index 45d7bb50..00000000 --- a/full_ecvs_mask.R +++ /dev/null @@ -1,435 +0,0 @@ - -source("modules/Loading/Loading.R") -source("modules/Saving/Saving.R") -source("modules/Units/Units.R") -source("modules/Visualization/Visualization.R") -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -#recipe_file <- "recipe_tas_oper.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) -# Load datasets -data <- Loading(recipe) -data <- Units(recipe, data) -data_summary(data$hcst, recipe) -data_summary(data$obs, recipe) - -#file <- "/esarchive/scratch/nperez/NUTS3_ERA5Land-62_56E-22_81N.nc" -#file <- "/esarchive/scratch/nperez/git4/sunset/NUTS3_ERA5_-62_56E-22_81N.nc" -#mask <- nc_open(file) -#lat <- ncatt_get(mask, 'lat') -#lon <- ncatt_get(mask, 'lon') -#mask <- ncget_var(mask) - -dev.new() -source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") -bias <- Bias(exp = data$hcst$data, obs = data$obs$data, - memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) -PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "singnif") -bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, - memb_dim = 'ensemble', time_dim = 'syear') -dev.new() -PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "original") - - - -dev.new() -source("/esarchive/scratch/nperez/git/s2dv/R/Bias.R") -bias <- Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, - memb_dim = 'ensemble', time_dim = 'syear', alpha = 0.05) -PlotEquiMap(bias$bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "singnif") -bias <- s2dv::Bias(exp = data$hcst$data, obs = data$obs$data, abs = T, - memb_dim = 'ensemble', time_dim = 'syear') -dev.new() -PlotEquiMap(bias[1,1,1,1,1,,], lon = data$hcst$coord$lon, - lat = data$hcst$coords$lat, #dots = bias$sig[1,1,1,1,1,,], - filled.c = F, toptitle = "original") - - - -source("/esarchive/scratch/nperez/git/Flor/NUTS3.R") -mask <- test -mask_lat <- lats -mask_lon <-lons - -# Region to subset -range(data$hcst$coords$longitude) -range(data$hcst$coords$latitude) - -mask <- SelBox(mask, lon = mask_lon, lat = mask_lat, - region = c(-20, 40, 20, 80)) -mask_lat <- mask$lat -mask_lon <- mask$lon -mask <- mask$data -all(data$hcst$coord$latitude == mask_lat) -all(data$hcst$coord$longitude == mask_lon) - -polys <- array(unique(as.vector(mask))[-1], - c(poly = length(unique(as.vector(mask))[-1]))) -## missing polys: -missing <- which(!(1:length(ids) %in% polys)) -tas_polys <- Apply(list(data$hcst$data, mask, polys), - target_dims = list(c('latitude', 'longitude'), - c('lat', 'lon'), NULL), - function(x, y, pol) { - mean(x[which(y == pol)], na.rm = TRUE)}, - ncores = 6)$output1 -data$hcst$data <- tas_polys -data$hcst$coord <- data$hcst$coord[-c(7,8)] -data$hcst$coord <- append(data$hcst$coord, list(polygons = polys)) - -polys_obs <- Apply(list(data$obs$data, mask, polys), - target_dims = list(c('latitude', 'longitude'), - c('lat', 'lon'), NULL), - function(x, y, pol) { - mean(x[which(y == pol)], na.rm = TRUE)}, - ncores = 4)$output1 -data$obs$data <- polys_obs -data$obs$coord <- data$obs$coord[-c(7,8)] -data$obs$coord <- append(data$obs$coord, list(polygons = polys)) - - -polys_fcst <- Apply(list(data$fcst$data, mask, polys), - target_dims = list(c('latitude', 'longitude'), - c('lat', 'lon'), NULL), - function(x, y, pol) { - mean(x[which(y == pol)], na.rm = TRUE)}, - ncores = 4)$output1 -data$fcst$data <- polys_fcst -data$fcst$coord <- data$fcst$coord[-c(7,8)] -data$fcst$coord <- append(data$fcst$coord, list(polygons = polys)) - - testp <- Subset(MeanDims(data$fcst$data, 'ensemble'), along = 'time', - indices = 1, drop = 'all') -data_pol <- data.frame(data = testp, NUTS_ID = ids[-missing]) -data_pol <- rbind(data_pol, data.frame(data =rep(0, length(missing)), NUTS_ID = ids[missing])) -shp <- rgdal::readOGR(shp_file) -shp <- subset(shp, LEVL_CODE == 2) - shp_data <- merge(shp, data_pol, by.x="NUTS_ID", by.y="NUTS_ID") - shp_data@data$id <- rownames(shp_data@data) - df.points <- fortify(shp_data, region = "id") - df.df <- plyr::join(df.points, shp_data@data, by = "id") - -gusa <- map_data("world") -plot_poly <- ggplot(data = df.df) + - geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + - coord_map(projection = "stereographic", - xlim = c(-15, 40), - ylim = c(35, 75)) + - geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-16, 22, 2)), - group = group), color = "grey", size = 0.01) + - geom_polygon(aes(x = long, y = lat, group = group), fill = NA, - color = "grey", size = 0.01, data = shp) + - theme_bw() + - scale_fill_manual(values = c(rev(brewer.pal(9, 'Blues')), - brewer.pal(9, 'Reds'), "black"), - drop = FALSE, name = 'tas(ºC)') + - theme(panel.background = element_rect(fill = 'azure'), - text = element_text(family = "Times")) + # Colour of ocean - xlab('Longitude') + ylab('Latitude') + - scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + - scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + - ggtitle("Forecast test") - -#reg <- SelBox(data$hcst$data, lon = as.vector(data$hcst$coords$longitude), -# lat = as.vector(data$hcst$coords$latitude), -# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), -# latdim = 'latitude', londim = 'longitude') -# -#data$hcst$data <- reg$data -#data$hcst$coords$longitude <- reg$lon -#data$hcst$coords$latitude <- reg$lat - -#reg <- SelBox(data$obs$data, lon = as.vector(data$obs$coords$longitude), -# lat = as.vector(data$obs$coords$latitude), -# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), -# latdim = 'latitude', londim = 'longitude') - -#data$obs$data <- reg$data -#data$obs$coords$longitude <- reg$lon -#data$obs$coords$latitude <- reg$lat - -# UKMO January 1993 is missing: -if (recipe$Analysis$Time$sdate == '0101') { - if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { - if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { - info(recipe$Run$logger, - "UKMO January 1993 not available") - ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end - ind <- (1:length(ind))[-which(ind == 1993)] - data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) - data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) - sdate_dim <- dim(data$hcst$data)['syear'] - } - } -} - -calibraion_comp <- TRUE -anomalies_comp <- FALSE -source("crossval.R") - -## Define FAIR option: -fair <- TRUE - -## START SKILL ASSESSMENT: -# RPS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") -cal_hcst_probs_ev <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_cal_hcst_tr, - ncores = recipe$Analysis$ncores) -cal_obs_probs_ev <- GetProbs(data$obs$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = res$lims_cal_obs_tr, - ncores = recipe$Analysis$ncores) -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) -source("modules/Skill/R/RPS_clim.R") -rps_clim <- Apply(list(ano_obs_probs_ev), - target_dims = c('probs', 'syear'), - RPS_clim, bin_dim_abs = 'probs', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 -# RPSS -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -rpss <- RPSS(exp = cal_hcst_probs_ev, obs = cal_obs_probs_ev, - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'probs', Fair = fair, nmemb = nmemb, - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - -cal_fcst <- CST_Calibration(data$hcst, data$obs, data$fcst, - sdate_dim = 'syear', memb_dim = 'ensemble') - - -shp_file = "/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp" -shp <- sf::st_read(shp_file) # class sf -ids <- subset(shp, shp$LEVL_CODE == 3) - - -###### -library(sf) -library(ggplot2) -#library(ggplot2, lib.loc = .libPaths()[2]) -library(RColorBrewer) -library(rgeos) - -gusa <- map_data("world") - -pals <- brewer.pal(9, 'Reds')[3:9] -pals <- c("#ffffb3",pals,'#525252') - -brks <- 0:5 -brks <- seq(258, 274, 2) - -plot_poly <- list() -polys_data <- NULL - - -## Now fill each polygon with the mean value - data_pol <- data.frame(data = apply(cal_fcst$data[1,1,1,1,1,1,,], 2, mean), - LAU_ID = polys) - shp_data <- merge(subset(shp, data_pol, by.x="LAU_ID", by.y="LAU_ID") - -# Magic lines below that converts SpatialPolygonsDataFrame to normal data frame - shp_data@data$id <- rownames(shp_data@data) - df.points <- fortify(shp_data, region = "id") - df.df <- plyr::join(df.points, shp_data@data, by = "id") -ano <- df.df -ano$data <- ano$data - fmi_clim_m$data - -ggplot(data = ano) + #df.df) + - geom_polygon(aes(long, lat, group = group),fill = "lightgrey", data = gusa) + - coord_map(projection = "stereographic", - xlim = c(lon_min, lon_max), - ylim = c(lat_min, lat_max)) + - # geom_polygon(aes(x = long, y = lat, fill = cut(data, breaks = seq(-14, 18, 2)), - geom_polygon(aes(x = long, y = lat, fill = cut(data, - breaks = seq(0, 5, 0.5)), - group = group), color = "grey",size = 0.08) + - geom_polygon(aes(x = long, y = lat, group = group), fill = NA, - color = "grey", size = 0.5, data = shp_herding) + - theme_bw() + - scale_fill_manual(values = c(#rev(brewer.pal(9, 'Blues')), - 'white', brewer.pal(9, 'Reds'), "black"), - drop = FALSE, name = 'tas(ºC)') + - theme(panel.background = element_rect(fill = 'azure'), - text = element_text(family = "Times")) + # Colour of ocean - xlab('Longitude') + ylab('Latitude') + - scale_x_continuous(breaks = seq(-12, 45, 4),labels = waiver()) + - scale_y_continuous(breaks = seq(32, 70, 4),labels = waiver()) + - ggtitle(paste(month.name[mes], year))# + - -###### -#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, -# lat = data$hcst$coords$latitude, filled.c = F, -# fileout = "test.png") -# CRPS -crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) -# Este no sé como se calcula????: -# Aquí no se puede porque estaría incluyendo información de los otros años -#source("modules/Skill/R/CRPS_clim.R") -# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí -# el resultado es igual a dentro del bucle. -crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'sample.syear', - Fair = fair - ncores = recipe$Analysis$ncores) - - -# CRPSS -ref <- res$ano_obs_tr -dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, - nftime, nlats, nlons, sdate_dim) -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - - -# Corr -source("modules/Skill/R/tmp/Corr.R") -enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) - -mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), - target_dims = list(c('syear', 'ensemble'), 'syear'), - fun = function(x,y) { - if (!(any(is.na(x)) || any(is.na(y)))) { - res <- t.test(x = y, - y = apply(x, 1, mean, na.rm = T), - alternative = "two.sided")$p.value - } else { - res <- NA - } - return(res)}, - ncores = sdate_dim)$output1 -mean_bias_sign <- mean_bias_sign <= 0.05 -#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, -# lon = data$hcst$coords$longitude, -# dots = mean_bias_sign[1,1,1,1,1,,,1]) - -# Spread error ratio -source("SprErr.R") -enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) -enssprerr_sign <- enssprerr$p.val -enssprerr_sign <- enssprerr_sign <= 0.05 -enssprerr <- enssprerr$ratio - -# RMSE -rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - -#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, -# drop = "selected") - -#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', -# fcst = res$ano_hcst_ev, -# obs = obs_noensdim, -# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), -# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), -# na.rm = FALSE, -# ncpus = recipe$Analysis$ncores) -if (any(is.na(rpss$sing))) { - info(recipe$Run$logger, - "RPSS NA") - - rpss$sing[is.na(rpss$sign)] <- FALSE -} -skill_metrics <- list(mean_bias = mean_bias, - mean_bias_significance = mean_bias_sign, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - -skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) -# Compute save metrics -source("modules/Saving/Saving.R") -#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - -recipe$Run$output_dir <- original - -source("modules/Visualization/Visualization.R") -if (data$hcst$coords$longitude[1] != 0) { - skill_metrics <- lapply(skill_metrics, function(x) { - Subset(x, along = 'longitude', indices = c(182:360, 1:181)) - }) -} - info(recipe$Run$logger, - paste("lons:", data$hcst$coords$longitude)) - info(recipe$Run$logger, - paste("lons:", data$obs$coords$longitude)) - - -data$hcst$coords$longitude <- -179:180 - -Visualization(recipe, data, skill_metrics, significance = TRUE) - -source("tools/add_logo.R") -add_logo(recipe, "rsz_rsz_bsc_logo.png") - - diff --git a/full_crossval_anomalies.R b/modules/CrossVal/Crossval_anomalies.R similarity index 98% rename from full_crossval_anomalies.R rename to modules/CrossVal/Crossval_anomalies.R index f65d86a7..b097faa6 100644 --- a/full_crossval_anomalies.R +++ b/modules/CrossVal/Crossval_anomalies.R @@ -1,8 +1,8 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/GetProbs.R") +source("modules/Crossval/R/tmp/GetProbs.R") -full_crossval_anomalies <- function(recipe, data) { +Crossval_anomalies <- function(recipe, data) { cross.method <- recipe$Analysis$cross.method # TODO move check if (is.null(cross.method)) { @@ -14,8 +14,6 @@ full_crossval_anomalies <- function(recipe, data) { round(eval(parse(text = y)),2)})}) ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs - ## TODO remove if the recipe is checked: - na.rm <- TRUE ## data dimensions sdate_dim <- dim(data$hcst$data)['syear'] orig_dims <- names(dim(data$hcst$data)) diff --git a/skill_full_crossval.R b/modules/CrossVal/Crossval_skill.R similarity index 88% rename from skill_full_crossval.R rename to modules/CrossVal/Crossval_skill.R index 053192a8..c142b94f 100644 --- a/skill_full_crossval.R +++ b/modules/CrossVal/Crossval_skill.R @@ -1,13 +1,14 @@ source("modules/Saving/Saving.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/RPS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/RPS_clim.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RPSS.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/RandomWalkTest.R") -source("https://earth.bsc.es/gitlab/es/sunset/-/raw/dev-test_CERISE/modules/Skill/R/tmp/Corr.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-sigBias/R/Bias.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/SprErr.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/Eno.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Corssval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") ## data_crossval is the result from function full_crossval_anomalies or similar. ## this is a list with the required elements: @@ -32,7 +33,7 @@ source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/dev-spread_error_ratio/R/Eno.R ## the recipe could be used to read the Percentiles ## if fair is TRUE, the nmemb used to compute the probabilities is needed ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -skill_full_crossval <- function(recipe, data_crossval, +Crossval_skill <- function(recipe, data_crossval, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { ## START SKILL ASSESSMENT: # RPS diff --git a/modules/CrossVal/R/CRPS_clim.R b/modules/CrossVal/R/CRPS_clim.R new file mode 100644 index 00000000..0e6bef65 --- /dev/null +++ b/modules/CrossVal/R/CRPS_clim.R @@ -0,0 +1,35 @@ +# CRPS version for climatology +CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross.val= TRUE){ + time_dim <- names(dim(obs)) + obs_time_len <- dim(obs)[time_dim] + + if (isFALSE(clim.cross.val)) { + # Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), + dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, + dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb] + # obs: [sdate] + crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = NULL, + Fair = FALSE) + + # crps_ref should be [sdate] + if (return_mean == TRUE) { + return(mean(crps_ref)) + } else { + return(crps_ref) + } +} + diff --git a/modules/CrossVal/R/RPS_clim.R b/modules/CrossVal/R/RPS_clim.R new file mode 100644 index 00000000..6deab3ec --- /dev/null +++ b/modules/CrossVal/R/RPS_clim.R @@ -0,0 +1,39 @@ +# RPS version for climatology +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), cross.val = TRUE, + Fair = FALSE, bin_dim_abs = NULL, return_mean = TRUE) { + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:length(obs) + } + if (is.null(bin_dim_abs)) { + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + } else { + obs_probs <- obs + } + # clim_probs: [bin, sdate] + clim_probs <- c(prob_thresholds[1], + diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + ## See explanation in https://freva.met.fu-berlin.de/about/problems/ + R <- dim(obs)[2] #years + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref <- rps_ref + adjustment + } + + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } +} diff --git a/modules/CrossVal/R/tmp/Bias.R b/modules/CrossVal/R/tmp/Bias.R new file mode 100644 index 00000000..b9292cae --- /dev/null +++ b/modules/CrossVal/R/tmp/Bias.R @@ -0,0 +1,216 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## alpha + if (!is.null(alpha)) { + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL) { + # exp and obs: [sdate, (dat)] + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + if (!is.null(alpha)) { + if (!absolute) { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sig <- pval <= alpha + } + } + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sig <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i,j] <- t.test(x = obs[,j], y = exp[,i], + alternative = "two.sided")$p.value + sig[i,j] <- pval <= alpha + } + } + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sig = sig)) + } else { + return(bias) + } +} diff --git a/modules/CrossVal/R/tmp/Corr.R b/modules/CrossVal/R/tmp/Corr.R new file mode 100644 index 00000000..744ff109 --- /dev/null +++ b/modules/CrossVal/R/tmp/Corr.R @@ -0,0 +1,484 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', dat_dim = 'dataset', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, +#' dat_dim = 'dataset') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(memb_dim)) { + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR[, ] <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { +# if (is.null(dat_dim) & !is.null(memb_dim)) { + + if (length(dim(CORR)) == 2) { + dim(CORR) <- NULL + if (pval) { + dim(p.val) <- NULL + } + if (conf) { + dim(conflow) <- NULL + dim(confhigh) <- NULL + } + if (sign) { + dim(signif) <- NULL + } + } else { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + if (sign) { + dim(signif) <- dim(signif)[3:length(dim(signif))] + } + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} diff --git a/modules/CrossVal/R/tmp/Eno.R b/modules/CrossVal/R/tmp/Eno.R new file mode 100644 index 00000000..cb927602 --- /dev/null +++ b/modules/CrossVal/R/tmp/Eno.R @@ -0,0 +1,103 @@ +#'Compute effective sample size with classical method +#' +#'Compute the number of effective samples along one dimension of an array. This +#'effective number of independent observations can be used in +#'statistical/inference tests.\cr +#'The calculation is based on eno function from Caio Coelho from rclim.txt. +#' +#'@param data A numeric array with named dimensions. +#'@param time_dim A function indicating the dimension along which to compute +#' the effective sample size. The default value is 'sdate'. +#'@param na.action A function. It can be na.pass (missing values are allowed) +#' or na.fail (no missing values are allowed). See details in stats::acf(). +#' The default value is na.pass. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return An array with the same dimension as parameter 'data' except the +#' time_dim dimension, which is removed after the computation. The array +#' indicates the number of effective sample along time_dim. +#' +#'@examples +#'set.seed(1) +#'data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, +#' ftime = 4, lat = 10, lon = 10)) +#'na <- floor(runif(40, min = 1, max = 800)) +#'data[na] <- NA +#'res <- Eno(data) +#' +#'@importFrom stats acf na.pass na.fail +#'@import multiApply +#'@export +Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## na.action + if (as.character(substitute(na.action)) != "na.pass" & + as.character(substitute(na.action)) != "na.fail") { + stop("Parameter 'na.action' must be a function either na.pass or na.fail.") + } + if (as.character(substitute(na.action)) == "na.fail" && anyNA(data)) { + stop("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Eno + + eno <- Apply(data = list(data), + target_dims = time_dim, + output_dims = NULL, + fun = .Eno, + na.action = na.action, + ncores = ncores)$output1 + + return(eno) +} + +.Eno <- function(x, na.action) { + n <- length(sort(x)) + if (n > 1) { + a <- acf(x, lag.max = n - 1, plot = FALSE, + na.action = na.action)$acf[2:n, 1, 1] + s <- 0 + for (k in 1:(n - 1)) { + s <- s + (((n - k) / n) * a[k]) + } + eno <- min(n / (1 + (2 * s)), n) + } else { + eno <- NA + } + + return(eno) +} + diff --git a/modules/CrossVal/R/tmp/GetProbs.R b/modules/CrossVal/R/tmp/GetProbs.R new file mode 100644 index 00000000..2a538892 --- /dev/null +++ b/modules/CrossVal/R/tmp/GetProbs.R @@ -0,0 +1,353 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- seq_len(dim(data)[time_dim]) + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (!all(dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + !all(names(dim(weights)) %in% namesdim_weights)) { + stop("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".") + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.") + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | +# any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of ", +# "'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in seq_len(dim(data)[1])) { + if (is.null(weights)) { + tmp <- which(indices_for_quantiles != i_time) + quantiles[, i_time] <- + quantile(x = as.vector(data[indices_for_quantiles[tmp], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + tmp <- which(indices_for_quantiles != i_time) + sorted_arrays <- + .sorted_distributions(data[indices_for_quantiles[tmp], ], + weights[indices_for_quantiles[tmp], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, + prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, lendim = dim(data)[1], + posdim = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in seq_len(dim(data)[1])) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in seq_len(dim(quantiles)[1])) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - + append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop("Probability in i_time = ", i_time, " is out of [0, 1].") + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / + cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list(data = data_vector[sorter], cumulative_weights = cumulative_weights)) +} + diff --git a/modules/CrossVal/R/tmp/RPS.R b/modules/CrossVal/R/tmp/RPS.R new file mode 100644 index 00000000..0ed599ac --- /dev/null +++ b/modules/CrossVal/R/tmp/RPS.R @@ -0,0 +1,408 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'and 1.\cr +#'The function first calculates the probabilities for forecasts and observations, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. The fraction of acceptable +#'NAs can be adjusted. +#' +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilities, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPS (the +#' potential RPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param nmemb A numeric value indicating the number of members used to compute the probabilities. This parameter is necessary when calculating FairRPS from probabilities. Default is NULL. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'# Use synthetic data +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + if (Fair) { + if (!is.null(cat_dim)) { + if (cat_dim %in% names(dim(exp))) { + if (is.null(nmemb)) { + stop("Parameter 'nmemb' necessary to compute Fair", + "score from probabilities") + } + } + } + } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights) & is.null(cat_dim)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + if (is.null(dat_dim)) { + if (length(dim(weights)) != 2 | !all(names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim] | + dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + + } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + fun = .RPS, + dat_dim = dat_dim, time_dim = time_dim, + memb_dim = memb_dim, cat_dim = cat_dim, + prob_thresholds = prob_thresholds, nmemb = nmemb, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, cross.val = cross.val, + na.rm = na.rm, ncores = ncores)$output1 + + if (return_mean) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + + return(rps) +} + + +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, na.rm = FALSE) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[, , i] + obs_data <- obs[, , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + + # Find the fraction of NAs + ## If any member/bin is NA at this time step, it is not good value. + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (f_NAs <= sum(good_values) / length(obs_mean)) { + + exp_data <- exp_data[good_values, , drop = F] + obs_data <- obs_data[good_values, , drop = F] + + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[which(good_values), , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights #NULL + } + + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, + cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + if (all(names(dim(exp_data)) == c(time_dim, memb_dim)) || + all(names(dim(exp_data)) == c(time_dim, cat_dim))) { + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } + } + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + + # rps: [sdate, nexp, nobs] + rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + } + } else { + R <- nmemb + } + warning("Applying fair correction.") + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- colSums(adjustment) + rps[, i, j] <- rps[, i, j] + adjustment + } + + } else { ## not enough values different from NA + + rps[, i, j] <- NA_real_ + + } + + } + } + + if (is.null(dat_dim)) { + dim(rps) <- dim(exp)[time_dim] + } + + return(rps) +} + diff --git a/modules/CrossVal/R/tmp/RPSS.R b/modules/CrossVal/R/tmp/RPSS.R new file mode 100644 index 00000000..fc9931ad --- /dev/null +++ b/modules/CrossVal/R/tmp/RPSS.R @@ -0,0 +1,638 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill.\cr +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as +#'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained +#'based on a Random Walk test at the specified confidence level (DelSole and +#'Tippett, 2016).\cr +#'The function accepts either the ensemble members or the probabilities of +#'each data as inputs. If there is more than one dataset, RPSS will be +#'computed for each pair of exp and obs data. The NA ratio of data will be +#'examined before the calculation. If the ratio is higher than the threshold +#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are +#'counted by per-pair method, which means that only the time steps that all the +#'datasets have values count as non-NA values. +#' +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of either the reference forecast with at +#' least time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probabilities can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiment, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. If the data are probabilities, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights_exp A named numerical array of the forecast ensemble weights +#' for probability calculation. The dimension should include 'memb_dim', +#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +#' lengths must be equal to 'exp' dimension lengths. The default value is NULL, +#' which means no weighting is applied. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. +#'@param weights_ref Same as 'weights_exp' but for the reference forecast. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistics categories in cross-validation. The default value is +#' FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details. The default is 'two.sided.approx', which is +#' the default of \code{RandomWalkTest()}. +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array of RPSS with dimensions c(nexp, nobs, the rest dimensions +#' of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#' experiment (i.e., dat_dim in exp), and nobs is the number of observation +#' i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as $rpss. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'set.seed(1) +#'exp <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'set.seed(2) +#'obs <- array(rnorm(300), dim = c(lat = 3, lon = 2, sdate = 50)) +#'set.seed(3) +#'ref <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'weights <- sapply(1:dim(exp)['sdate'], function(i) { +#' n <- abs(rnorm(10)) +#' n/sum(n) +#' }) +#'dim(weights) <- c(member = 10, sdate = 50) +#'# Use data as input +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +#'res <- RPSS(exp = exp, obs = obs, alpha = 0.01, sig_method.type = 'two.sided') +#' +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, +#' cat_dim = 'bin') +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = FALSE, + sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights_exp + if (!is.null(weights_exp) & is.null(cat_dim)) { + if (!is.array(weights_exp) | !is.numeric(weights_exp)) + stop("Parameter 'weights_exp' must be a named numeric array.") + + if (is.null(dat_dim)) { + if (length(dim(weights_exp)) != 2 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_exp' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_exp)) != 3 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_exp' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim] | + dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_exp) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' is probability already, so parameter ", + "'weights_exp' is not used. Change 'weights_exp' to NULL.")) + weights_exp <- NULL + } + ## weights_ref + if (!is.null(weights_ref) & is.null(cat_dim)) { + if (!is.array(weights_ref) | !is.numeric(weights_ref)) + stop("Parameter 'weights_ref' must be a named numeric array.") + + if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { + if (length(dim(weights_ref)) != 2 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_ref' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_ref)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_ref)) != 3 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_ref' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] | + dim(weights_ref)[time_dim] != dim(ref)[time_dim] | + dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_ref) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'ref' is probability already, so parameter ", + "'weights_ref' is not used. Change 'weights_ref' to NULL.")) + weights_ref <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## sig_method.type + #NOTE: These are the types of RandomWalkTest() + if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") + } + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPSS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs, + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + + return(output) + +} + +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # Calculate RPS + + if (!is.null(ref)) { + + # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref + ## Insert memb_dim in obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + ## Insert dat_dim + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), dat = nobs) + dim(exp) <- c(dim(exp), dat = nexp) + if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp) + } + if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) { + nref <- 1 + dim(ref) <- c(dim(ref), dat = nref) + if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref) + } else { + nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp + } + + # Find good values then calculate RPS + rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + for (k in 1:nref) { + if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps + next + } + exp_data <- exp[, , i, drop = F] + obs_data <- obs[, , j, drop = F] + ref_data <- ref[, , k, drop = F] + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + ref_mean <- rowMeans(ref_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean) + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + if (f_NAs <= sum(good_values) / length(good_values)) { + rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb, + weights = weights_exp[good_values, , i], + cross.val = cross.val, na.rm = na.rm) + rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb_ref, + weights = weights_ref[good_values, , k], + na.rm = na.rm, cross.val = cross.val) + } + } + } + } + + } else { # ref is NULL + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, weights = weights_exp, + cross.val = cross.val, na.rm = na.rm) + + # RPS of the reference forecast + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), nobs = nobs) + dim(exp) <- c(dim(exp), nexp = nexp) + dim(rps_exp) <- dim(rps_ref) + } + + for (i in 1:nexp) { + for (j in 1:nobs) { + # Use good values only + good_values <- !is.na(rps_exp[, i, j]) + if (f_NAs <= sum(good_values) / length(good_values)) { + obs_data <- obs[good_values, , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) + + if (is.null(cat_dim)) { # calculate probs + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + obs_probs <- .GetProbs(data = obs_data, + indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, + weights = NULL, cross.val = cross.val) + } else { + obs_probs <- t(obs_data) + } + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) + } + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(obs)[1] #number of years + } + } else { + R <- nmemb_ref + } + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref[, i, j] <- rps_ref[, i, j] + adjustment + } + } + } + } + + if (is.null(dat_dim)) { + dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim] + } + +#---------------------------------------------- + # Calculate RPSS + + if (!is.null(dat_dim)) { + # rps_exp and rps_ref: [sdate, nexp, nobs] + rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE) + rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE) + rpss <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (!all(is.na(rps_exp_mean))) { + for (i in 1:nexp) { + for (j in 1:nobs) { + rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] + ind_nonNA <- !is.na(rps_exp[, i, j]) + if (!any(ind_nonNA)) { + sign[i, j] <- NA + } else { + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], + skill_B = rps_ref[ind_nonNA, i, j], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + } + } + + # Turn NaN into NA + if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA + + } else { # dat_dim is NULL + + ind_nonNA <- !is.na(rps_exp) + if (!any(ind_nonNA)) { + rpss <- NA + sign <- NA + } else { + # rps_exp and rps_ref: [sdate] + rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], + skill_B = rps_ref[ind_nonNA], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + + return(list(rpss = rpss, sign = sign)) +} diff --git a/modules/CrossVal/R/tmp/RandomWalkTest.R b/modules/CrossVal/R/tmp/RandomWalkTest.R new file mode 100644 index 00000000..16d89f6d --- /dev/null +++ b/modules/CrossVal/R/tmp/RandomWalkTest.R @@ -0,0 +1,184 @@ +#'Random Walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common observational reference) based on Random Walks (DelSole and Tippett, +#'2016). +#' +#'@param skill_A A numerical array of the time series of the scores obtained +#' with the forecaster A. +#'@param skill_B A numerical array of the time series of the scores obtained +#' with the forecaster B. The dimensions should be identical as parameter +#' 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param test.type A character string indicating the type of significance test. +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster +#' B are significantly different in terms of skill with a two-sided test using +#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +#' whether forecaster A and forecaster B are significantly different in terms +#' of skill with an exact two-sided test), "greater" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for negatively oriented scores), or "less" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for positively oriented scores). The default value is +#' "two.sided.approx". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test (output "sign"). The default value is 0.05. +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test based on 'alpha'. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill negatively oriented, i.e., the lower the +#' better). If $score is positive, forecaster A has been better more times +#' than forecaster B. If $score is negative, forecaster B has been better more +#' times than forecaster A. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance with the same dimensions +#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim". Returned only if "pval" is TRUE. +#'} +#' +#'@details +#' Null and alternative hypothesis for "two-sided" test (regardless of the +#' orientation of the scores):\cr +#' H0: forecaster A and forecaster B are not different in terms of skill\cr +#' H1: forecaster A and forecaster B are different in terms of skill +#' +#' Null and alternative hypothesis for one-sided "greater" (for negatively +#' oriented scores, i.e., the lower the better) and "less" (for positively +#' oriented scores, i.e., the higher the better) tests:\cr +#' H0: forecaster A is not better than forecaster B\cr +#' H1: forecaster A is better than forecaster B +#' +#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while +#' the ROC score is a positively oriented score. +#' +#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +#' level: significant if the difference between the number of times that +#' forecaster A has been better than forecaster B and forecaster B has been +#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). +#' +#'@references +#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' scores_A <- abs(fcst_A - reference) +#' scores_B <- abs(fcst_B - reference) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) +#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', + test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, + sign = FALSE, ncores = NULL) { + + # Check inputs + ## skill_A and skill_B + if (is.null(skill_A) | is.null(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if (!is.numeric(skill_A) | !is.numeric(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A), dim(skill_B))) { + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## test.type + if (!test.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + } + if (test.type == 'two.sided.approx') { + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + if (pval) { + .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.") + } + sign <- TRUE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', + alpha = 0.05, pval = TRUE, sign = FALSE) { + #skill_A and skill_B: [sdate] + + N.eff <- length(skill_A) + + A_better <- sum(skill_B > skill_A) + B_better <- sum(skill_B < skill_A) + + output <- NULL + output$score <- A_better - B_better + + if (test.type == 'two.sided.approx') { + output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + + } else { + + if (!is.na(output$score)) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + alternative = test.type)$p.value + + } else { + p.val <- NA + } + + if (pval) { + output$p.val <- p.val + } + if (sign) { + output$sign <- !is.na(p.val) & p.val <= alpha + } + + } + + return(output) +} diff --git a/modules/CrossVal/R/tmp/SprErr.R b/modules/CrossVal/R/tmp/SprErr.R new file mode 100644 index 00000000..33642eab --- /dev/null +++ b/modules/CrossVal/R/tmp/SprErr.R @@ -0,0 +1,227 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} + diff --git a/modules/CrossVal/recipe_crossval_ecvs.yml b/modules/CrossVal/recipe_crossval_ecvs.yml new file mode 100644 index 00000000..3ec15379 --- /dev/null +++ b/modules/CrossVal/recipe_crossval_ecvs.yml @@ -0,0 +1,183 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipe_tas_oper.yml b/recipe_tas.yml similarity index 95% rename from recipe_tas_oper.yml rename to recipe_tas.yml index 34f82e63..e9f7e028 100644 --- a/recipe_tas_oper.yml +++ b/recipe_tas.yml @@ -47,7 +47,7 @@ Analysis: save: 'all' cross_validation: yes Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. save: all Indicators: index: no @@ -56,6 +56,7 @@ Analysis: multi_panel: no dots: both projection: Robinson + file_format: 'PNG' #projection: robinson Scorecards: execute: no # yes/no @@ -73,7 +74,7 @@ Analysis: col2_width: NULL calculate_diff: FALSE ncores: 4 # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: scorecards logo: yes Run: -- GitLab From a3a49538bf509007d92cb5532f4d5c902988f006 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 31 May 2024 12:32:57 +0200 Subject: [PATCH 18/53] consistent names and rmss --- modules/CrossVal/Crossval_skill.R | 149 -------------- .../Crossval_anomalies.R | 0 modules/Crossval/Crossval_skill.R | 171 ++++++++++++++++ modules/{CrossVal => Crossval}/R/CRPS_clim.R | 0 modules/{CrossVal => Crossval}/R/RPS_clim.R | 0 modules/{CrossVal => Crossval}/R/tmp/Bias.R | 0 modules/{CrossVal => Crossval}/R/tmp/Corr.R | 0 modules/{CrossVal => Crossval}/R/tmp/Eno.R | 0 .../{CrossVal => Crossval}/R/tmp/GetProbs.R | 0 modules/{CrossVal => Crossval}/R/tmp/RPS.R | 0 modules/{CrossVal => Crossval}/R/tmp/RPSS.R | 0 .../R/tmp/RandomWalkTest.R | 0 modules/{CrossVal => Crossval}/R/tmp/SprErr.R | 0 .../recipe_crossval_ecvs.yml | 5 +- .../Crossval/recipe_crossval_ecvs_global.yml | 184 ++++++++++++++++++ tools/check_recipe.R | 2 +- 16 files changed, 359 insertions(+), 152 deletions(-) delete mode 100644 modules/CrossVal/Crossval_skill.R rename modules/{CrossVal => Crossval}/Crossval_anomalies.R (100%) create mode 100644 modules/Crossval/Crossval_skill.R rename modules/{CrossVal => Crossval}/R/CRPS_clim.R (100%) rename modules/{CrossVal => Crossval}/R/RPS_clim.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/Bias.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/Corr.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/Eno.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/GetProbs.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/RPS.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/RPSS.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/RandomWalkTest.R (100%) rename modules/{CrossVal => Crossval}/R/tmp/SprErr.R (100%) rename modules/{CrossVal => Crossval}/recipe_crossval_ecvs.yml (98%) create mode 100644 modules/Crossval/recipe_crossval_ecvs_global.yml diff --git a/modules/CrossVal/Crossval_skill.R b/modules/CrossVal/Crossval_skill.R deleted file mode 100644 index c142b94f..00000000 --- a/modules/CrossVal/Crossval_skill.R +++ /dev/null @@ -1,149 +0,0 @@ - -source("modules/Saving/Saving.R") -source("modules/Crossval/R/tmp/RPS.R") -source("modules/Crossval/R/RPS_clim.R") -source("modules/Crossval/R/CRPS_clim.R") -source("modules/Crossval/R/tmp/RPSS.R") -source("modules/Crossval/R/tmp/RandomWalkTest.R") -source("modules/Corssval/R/tmp/Corr.R") -source("modules/Crossval/R/tmp/Bias.R") -source("modules/Crossval/R/tmp/SprErr.R") -source("modules/Crossval/R/tmp/Eno.R") - -## data_crossval is the result from function full_crossval_anomalies or similar. -## this is a list with the required elements: - ## probs is a list with - ## probs$hcst_ev and probs$obs_ev - ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles - ## each element will be an array with 'cat' dimension - ## the same for probs$obs_ev - ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices - ## in this case cross validated anomalies - ## obs is a s2dv_cube for the post-processed obs - ## in this case cross validated anomalies - ## fcst is a s2dv_cube for the post-processed fcst - ## in this case cross anomalies with the full hindcast period - ## this object is not required for skill assessment - ## hcst.full_val and obs.full_val are the original data to compute mean bias - ## cat_lims used to compute the probabilities - ## this object is not required for skill assessment - ## ref_obs_tr is an array with the cross-validate observed anomalies - ## to be used as reference forecast in the CRPSS and CRPS_clim - ## it is computed from the training indices -## the recipe could be used to read the Percentiles -## if fair is TRUE, the nmemb used to compute the probabilities is needed - ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -Crossval_skill <- function(recipe, data_crossval, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { - ## START SKILL ASSESSMENT: - # RPS - rps <- RPS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], memb_dim = NULL, - cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = recipe$Analysis$ncores) - rps_clim <- Apply(list(res$probs$obs_ev[[1]]), - target_dims = c('cat', 'syear'), - RPS_clim, bin_dim_abs = 'cat', Fair = fair, - cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 - # RPSS - rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'cat', Fair = fair, nmemb = nmemb, - cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - # CRPS - crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) - crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = recipe$Analysis$ncores) - - # CRPSS - crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - # Corr - enscorr <- Corr(res$hcst$data, res$obs$data, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - - # Mean Bias - mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = 0.05, - ncores = recipe$Analysis$ncores) - - # Spread error ratio - enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - ncores = recipe$Analysis$ncores) - enssprerr_sign <- enssprerr$p.val - enssprerr_sign <- enssprerr_sign <= 0.05 - enssprerr <- enssprerr$ratio - - # RMSE - rms <- RMS(exp = res$hcst$data, obs = res$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, - ncores = recipe$Analysis$ncores) - - # RMSS - rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, - ref = res$ref_obs_tr, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = 0.05, sign = TRUE, - ncores = recipe$Analysis$ncores) - - skill_metrics <- list(mean_bias = mean_bias$bias, - mean_bias_significance = mean_bias$sig, - enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, - enssprerr = enssprerr, - enssprerr_significance = enssprerr_sign, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign, - rms = rms$rms, rmsss = rmss$rmsss, rmsss_significance = rmss$sign) - original <- recipe$Run$output_dir - recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") - - skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) - # Save metrics - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - - recipe$Run$output_dir <- original - # reduce dimension to work with Visualization module: - skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) - skill_metrics <- lapply(skill_metrics, function(x){ - InsertDim(x, pos = 1, len = 1, name = 'var')}) - return(skill_metrics) -} - diff --git a/modules/CrossVal/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R similarity index 100% rename from modules/CrossVal/Crossval_anomalies.R rename to modules/Crossval/Crossval_anomalies.R diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R new file mode 100644 index 00000000..58007a5a --- /dev/null +++ b/modules/Crossval/Crossval_skill.R @@ -0,0 +1,171 @@ + +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") + +## data_crossval is the result from function full_crossval_anomalies or similar. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices +## the recipe could be used to read the Percentiles +## if fair is TRUE, the nmemb used to compute the probabilities is needed + ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference +Crossval_skill <- function(recipe, data_crossval, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + ncores <- recipe$Analysis$ncores + alpha <- recipe$Analysis$Skill$alpha + na.rm <- recipe$Analysis$remove_NAs + if (is.null(alpha)) { + alpha <- 0.05 + } + ## START SKILL ASSESSMENT: + skill_metrics <- list() + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = res$probs$hcst_ev[[1]], + obs = res$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(res$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = 0.1, # un use param when providing probs + indices_for_clim = NULL, + Fair = fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + + if ('crps' %in% requested_metrics) { + crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = ncores) + skill_metrics$crps <- crps + crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, ncores = ncores) + skill_metrics$crps_clim <- crps_clim + } + if ('crpss' %in% requested_metrics) { + crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + } + + if ('enscorr' %in% requested_metrics) { + enscorr <- Corr(res$hcst$data, res$obs$data, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha, + ncores = ncores) + skill_metrics$enscorr <- enscorr + } + if ('mean_bias' %in% requested_metrics) { + mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('enssprerr' %in% requested_metrics) { + enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = ncores) + skill_metrics$SprErr <- enssprerr$ratio + skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha + } + if ('rms' %in% requested_metrics) { + rms <- RMS(exp = res$hcst$data, obs = res$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + if ('rmss' %in% requested_metrics) { + rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, + ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + original <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + + skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Save metrics + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + + recipe$Run$output_dir <- original + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} + diff --git a/modules/CrossVal/R/CRPS_clim.R b/modules/Crossval/R/CRPS_clim.R similarity index 100% rename from modules/CrossVal/R/CRPS_clim.R rename to modules/Crossval/R/CRPS_clim.R diff --git a/modules/CrossVal/R/RPS_clim.R b/modules/Crossval/R/RPS_clim.R similarity index 100% rename from modules/CrossVal/R/RPS_clim.R rename to modules/Crossval/R/RPS_clim.R diff --git a/modules/CrossVal/R/tmp/Bias.R b/modules/Crossval/R/tmp/Bias.R similarity index 100% rename from modules/CrossVal/R/tmp/Bias.R rename to modules/Crossval/R/tmp/Bias.R diff --git a/modules/CrossVal/R/tmp/Corr.R b/modules/Crossval/R/tmp/Corr.R similarity index 100% rename from modules/CrossVal/R/tmp/Corr.R rename to modules/Crossval/R/tmp/Corr.R diff --git a/modules/CrossVal/R/tmp/Eno.R b/modules/Crossval/R/tmp/Eno.R similarity index 100% rename from modules/CrossVal/R/tmp/Eno.R rename to modules/Crossval/R/tmp/Eno.R diff --git a/modules/CrossVal/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R similarity index 100% rename from modules/CrossVal/R/tmp/GetProbs.R rename to modules/Crossval/R/tmp/GetProbs.R diff --git a/modules/CrossVal/R/tmp/RPS.R b/modules/Crossval/R/tmp/RPS.R similarity index 100% rename from modules/CrossVal/R/tmp/RPS.R rename to modules/Crossval/R/tmp/RPS.R diff --git a/modules/CrossVal/R/tmp/RPSS.R b/modules/Crossval/R/tmp/RPSS.R similarity index 100% rename from modules/CrossVal/R/tmp/RPSS.R rename to modules/Crossval/R/tmp/RPSS.R diff --git a/modules/CrossVal/R/tmp/RandomWalkTest.R b/modules/Crossval/R/tmp/RandomWalkTest.R similarity index 100% rename from modules/CrossVal/R/tmp/RandomWalkTest.R rename to modules/Crossval/R/tmp/RandomWalkTest.R diff --git a/modules/CrossVal/R/tmp/SprErr.R b/modules/Crossval/R/tmp/SprErr.R similarity index 100% rename from modules/CrossVal/R/tmp/SprErr.R rename to modules/Crossval/R/tmp/SprErr.R diff --git a/modules/CrossVal/recipe_crossval_ecvs.yml b/modules/Crossval/recipe_crossval_ecvs.yml similarity index 98% rename from modules/CrossVal/recipe_crossval_ecvs.yml rename to modules/Crossval/recipe_crossval_ecvs.yml index 3ec15379..2cc339c3 100644 --- a/modules/CrossVal/recipe_crossval_ecvs.yml +++ b/modules/Crossval/recipe_crossval_ecvs.yml @@ -116,6 +116,7 @@ Analysis: # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} Skill: metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 save: 'all' # Options: 'all', 'none' (Mandatory, str) Statistics: metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) @@ -169,7 +170,7 @@ Run: auto_conf: script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script expid: a6wq # replace with your EXPID - hpc_user: bsc032762 # replace with your hpc username + hpc_user: bsc032339 # replace with your hpc username wallclock: 01:00 # wallclock for single-model jobs, hh:mm wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. processors_per_job: 4 # processors to request for each single-model job. @@ -178,6 +179,6 @@ Run: custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. platform: nord3v2 # platform (for now, only nord3v2 is available) email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: victoria.agudetse@bsc.es # replace with your email address + email_address: nuria.perez@bsc.es # replace with your email address notify_completed: yes # notify me by email when a job finishes notify_failed: yes # notify me by email when a job fails diff --git a/modules/Crossval/recipe_crossval_ecvs_global.yml b/modules/Crossval/recipe_crossval_ecvs_global.yml new file mode 100644 index 00000000..830225af --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs_global.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr rps crps rms rmss # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 245e4e3d..5c5ab721 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -596,7 +596,7 @@ check_recipe <- function(recipe) { "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", "rps_clim_syear", "crps_clim", "crps_clim_syear", "enscorr_specs", "frps_specs", "rpss_specs", - "frpss_specs", "bss10_specs", "bss90_specs", "rms") + "frpss_specs", "bss10_specs", "bss90_specs", "rms", "rmss") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { error(recipe$Run$logger, -- GitLab From 34b136ab231da67d4b18995ae08de4a5950e7016 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 4 Jun 2024 14:34:51 +0200 Subject: [PATCH 19/53] multimodel pool and crps --- modules/Crossval/Crossval_anomalies.R | 10 +- modules/Crossval/recipe_crossval_ecvs.yml | 2 +- modules/Multimodel/Multimodel.R | 103 +++++++-- modules/Multimodel/dims_multimodel.R | 6 +- modules/Multimodel/load_multimodel.R | 146 ++++++------ modules/Multimodel/load_multimodel_mean.R | 260 ++++++++++++++++++++++ recipe_tas.yml | 13 +- testing_multimodel.R | 137 ++++++++++++ 8 files changed, 560 insertions(+), 117 deletions(-) create mode 100644 modules/Multimodel/load_multimodel_mean.R create mode 100644 testing_multimodel.R diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index b097faa6..fa421f22 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -234,13 +234,17 @@ Crossval_anomalies <- function(recipe, data) { } } # Save ensemble mean for multimodel option: - hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, metrics = list(hcst_EM = hcst_EM), + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), data_cube = data$hcst, agg = agg, module = "statistics") if (!is.null(data$fcst)) { fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, metrics = list(fcst_EM = fcst_EM), + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), data_cube = data$fcst, agg = agg, module = "statistics") } diff --git a/modules/Crossval/recipe_crossval_ecvs.yml b/modules/Crossval/recipe_crossval_ecvs.yml index 2cc339c3..e23495ec 100644 --- a/modules/Crossval/recipe_crossval_ecvs.yml +++ b/modules/Crossval/recipe_crossval_ecvs.yml @@ -50,7 +50,7 @@ Analysis: # ... fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) - hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + hcst_end: '1996' # Hindcast initialization end year 'YYYY' (Mandatory, int) ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) Region: diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 192e9292..7a9eb670 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -13,32 +13,93 @@ source('modules/Multimodel/load_multimodel_splitted.R') source('modules/Multimodel/dims_multimodel.R') source('modules/Multimodel/build_multimodel.R') source('modules/Multimodel/clean_multimodel.R') +library(startR) +library(CSTools) +library(yaml) +recipe <- read_yaml("recipe_tas.yml") +source("modules/Saving/R/get_dir.R") +source("modules/Loading/Loading.R") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604104736/") +#recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240531165840/") +# DON'T KNOW HOW TO SOLVE LOGGER: +recipe$Run$Loglevel <- 'INFO' +recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" +recipe$Run$logger <- logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) Multimodel <- function(recipe) { # recipe: auto-s2s recipe as provided by read_yaml - - # Loading data saved in the jobs for individual models - if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ - # Load data splitting by system - data_aux <- load_multimodel_splitted(recipe) - - data <- data_aux$data - - files_hcst <- data_aux$files_hcst - files_fcst <- data_aux$files_fcst - files_obs <- data_aux$files_obs - - rm(data_aux) - - } else { - # Load data without splitting - data <- load_multimodel(recipe) - files_hcst <- data$hcst$attrs$source_files - files_fcst <- data$fcst$attrs$source_files - files_obs <- data$obs$attrs$source_files + ncores <- recipe$Analysis$ncores + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + na.rm <- recipe$Analysis$remove_NAs + skill_metrics <- list() + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + # Load data splitting by system individual members: + if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { + data_aux <- load_multimodel(recipe) + datos <- append(list(obs = data_aux$obs$data), lapply(data_aux$hcst, function(x) x$data)) + names(datos)[-1] <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + # Compute crps /crpss and return forecast for visualization purposes + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + sdate_dim <- dim(datos[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + } + # What about spread or spread-to error ratio? + rm(datos) } - + # Always load ensemble mean and probabilities: + source("modules/Multimodel/load_multimodel_mean.R") + datos <- load_multimodel_mean(recipe) # Building the multi-model multimodel_aux <- build_multimodel(data, recipe) data <- multimodel_aux$data diff --git a/modules/Multimodel/dims_multimodel.R b/modules/Multimodel/dims_multimodel.R index f9076153..4b581379 100644 --- a/modules/Multimodel/dims_multimodel.R +++ b/modules/Multimodel/dims_multimodel.R @@ -7,11 +7,7 @@ dims_multimodel <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index c6bd4585..b4cf8c25 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -2,16 +2,13 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") - +source("modules/Saving/Saving.R") +library(abind) load_multimodel <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] @@ -29,7 +26,7 @@ load_multimodel <- function(recipe) { } else { split_multiselected_dims = FALSE } - + # Find the saved data directory recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", recipe$Analysis$Datasets$Multimodel$createFrom) @@ -37,42 +34,44 @@ load_multimodel <- function(recipe) { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) - filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", + filename <- paste0("scorecards_$model$_", ref.name, "_$var$__$file_date$__", hcst_start, "-", hcst_end, "_s", shortdate, ".nc") } else { filename <- "$var$_$file_date$.nc" } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + "$var$", filename) - hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) - hcst.path <- gsub(variable[1], "$var$", hcst.path) - hcst.path <- gsub('Multimodel', "$model$", hcst.path) fcst.path <- obs.path <- hcst.path obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) - # Load hindcast #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, - var = variable, - file_date = sdates$hcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - + hcst <- list() + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + hcst <- append(hcst, list(as.s2dv_cube(aux))) + } ############################# #NOTE: NOT TESTED YET if (store.freq %in% c("daily_mean", "daily")) { @@ -94,37 +93,35 @@ load_multimodel <- function(recipe) { } ############################### - # Convert hcst to s2dv_cube object - hcst <- as.s2dv_cube(hcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) - } + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - + fcst <- list() + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + fcst <- append(fcst, list(as.s2dv_cube(aux))) + } ############################# #NOTE: NOT TESTED YET if (store.freq %in% c("daily_mean", "daily")) { @@ -145,15 +142,6 @@ load_multimodel <- function(recipe) { dim(attr(fcst, "Variables")$common$time) <- default_time_dims } ############################# - - # Convert fcst to s2dv_cube - fcst <- as.s2dv_cube(fcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - fcst$attrs$Dates[] <- - fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) - } - } else { fcst <- NULL } @@ -234,7 +222,7 @@ load_multimodel <- function(recipe) { obs <- as.s2dv_cube(obs) # Check for consistency between hcst and obs grid - if (!isTRUE(all.equal(as.vector(hcst$coords$latitude), as.vector(obs$coords$latitude)))) { + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -247,7 +235,7 @@ load_multimodel <- function(recipe) { info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - if (!isTRUE(all.equal(as.vector(hcst$coords$longitude), as.vector(obs$coords$longitude)))) { + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -260,18 +248,10 @@ load_multimodel <- function(recipe) { info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") } - - # Print a summary of the loaded data for the user, for each object - if (recipe$Run$logger$threshold <= 2) { - data_summary(hcst, recipe) - data_summary(obs, recipe) - if (!is.null(fcst)) { - data_summary(fcst, recipe) - } - } - - info(recipe$Run$logger, - "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/Multimodel/load_multimodel_mean.R new file mode 100644 index 00000000..7021438b --- /dev/null +++ b/modules/Multimodel/load_multimodel_mean.R @@ -0,0 +1,260 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_mean <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", variable, "_hcst_EM_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- obs.path <- hcst.path + fcst.path <- gsub("hcst", "fcst", fcst.path, fixed = T) + obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','',exp.name[1]), ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, + paste0("scorecards_", + gsub('\\.','',exp.name[1]), "_", ref.name, + "_$var$_-obs_$file_date$__", hcst_start, "-", + hcst_end, "_s", shortdate, ".nc")) + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = 'hcst_EM', + model = gsub('\\.','', sys), + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), +# metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + hcst <- append(hcst, list(as.s2dv_cube(aux))) + names(hcst)[length(hcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = 'fcst_EM', + model = gsub('\\.','', sys), + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + fcst <- append(fcst, list(as.s2dv_cube(aux))) + names(fcst)[length(fcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/recipe_tas.yml b/recipe_tas.yml index e9f7e028..9416e669 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,13 +11,18 @@ Analysis: flux: no Datasets: System: - name: ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 - Multimodel: no # Mandatory, bool: Either yes/true or no/false + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '0501' - fcst_year: '2024' + fcst_year: '2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months @@ -81,7 +86,7 @@ Run: Loglevel: INFO Terminal: yes filesystem: esarchive - output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs + output_dir: /esarchive/scratch/nperez/git4/ # replace with the directory where you want to save the outputs code_dir: /esarchive/scratch/nperez/git4/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit diff --git a/testing_multimodel.R b/testing_multimodel.R new file mode 100644 index 00000000..b1859cde --- /dev/null +++ b/testing_multimodel.R @@ -0,0 +1,137 @@ +# From Multimodel +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Multimodel/load_multimodel.R') +source('modules/Multimodel/load_multimodel_splitted.R') +source('modules/Multimodel/dims_multimodel.R') +source('modules/Multimodel/build_multimodel.R') +source('modules/Multimodel/clean_multimodel.R') + +# recipe <- "recipe_tas.yml" # I have included multimodel yes +library(yaml) +recipe <- read_yaml(recipe) + +# (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')) ## Fails becuase I don't know where split_loading comes from + +# I guess we need this: +# From load_multmodel + +archive <- read_yaml("conf/archive.yml")$esarchive +ref.name <- recipe$Analysis$Datasets$Reference$name + +# lets try creating a vector +## I don't know where this comes from +## recipe$Analysis$Datasets$System$models +exp.name <- c("ECMWF-SEAS5.1", "Meteo-France-System7") + +library(startR) + store.freq <- recipe$Analysis$Variables$freq + variable <- "tas" #strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + +recipe$Run$output_dir <- file.path("/esarchive/scratch/nperez/git4/recipe_tas_20240531165840", "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + +hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_$var$__$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + +source("modules/Saving/Saving.R") + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + "$var$", filename) +# hcst.path <- gsub(variable[1], "$var$", hcst.path) +# hcst.path <- gsub('Multimodel', "$model$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) + + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + # No need to check the dates if the files are saved by SUNSET: + #if (recipe$Analysis$Variables$freq == "monthly_mean") { + # hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + #} + +fcst <- as.s2dv_cube(fcst) + + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + +datos <- list(hcst = hcst, fcst = fcst, obs = obs) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) -- GitLab From 6717090c31d036e99355dc99e66d6abebe8602e7 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 4 Jun 2024 15:08:08 +0200 Subject: [PATCH 20/53] save fcst probs --- modules/Crossval/Crossval_anomalies.R | 20 +++++++++++++++++++- recipe_tas.yml | 6 +++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index fa421f22..e011d059 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -198,6 +198,7 @@ Crossval_anomalies <- function(recipe, data) { } # Save probability bins result <- list() + probs_fcst <- list() all_names <- NULL for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { @@ -211,6 +212,10 @@ Crossval_anomalies <- function(recipe, data) { } result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = ps, drop = 'all')), probs_fcst) + } all_names <- c(all_names, name_elem) } } @@ -220,13 +225,26 @@ Crossval_anomalies <- function(recipe, data) { dim(x) <- c(var = 1, dim(x)) return(x)}) } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = result, data_cube = data$hcst, agg = agg, type = "hcst") # TODO Forecast - probs_fcst <- NULL if (!is.null(probs_fcst)) { save_probabilities(recipe = recipe, probs = probs_fcst, data_cube = data$fcst, agg = agg, diff --git a/recipe_tas.yml b/recipe_tas.yml index 9416e669..90890cae 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} - #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System7'} + name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From d829eb86d802fcaf11ccc0592dbe30efbf0040b4 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 4 Jun 2024 17:37:30 +0200 Subject: [PATCH 21/53] load probs --- modules/Crossval/Crossval_anomalies.R | 12 + modules/Multimodel/Multimodel.R | 66 +--- modules/Multimodel/dims_multimodel.R | 103 ------ modules/Multimodel/load_multimodel_probs.R | 294 ++++++++++++++++++ modules/Multimodel/load_multimodel_splitted.R | 58 ---- modules/Multimodel/multimodel_metrics.R | 79 +++++ modules/Saving/R/save_probabilities.R | 9 +- recipe_tas.yml | 6 +- 8 files changed, 410 insertions(+), 217 deletions(-) delete mode 100644 modules/Multimodel/dims_multimodel.R create mode 100644 modules/Multimodel/load_multimodel_probs.R delete mode 100644 modules/Multimodel/load_multimodel_splitted.R create mode 100644 modules/Multimodel/multimodel_metrics.R diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index e011d059..30f05184 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -199,6 +199,7 @@ Crossval_anomalies <- function(recipe, data) { # Save probability bins result <- list() probs_fcst <- list() + probs_obs <- list() all_names <- NULL for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { @@ -212,6 +213,8 @@ Crossval_anomalies <- function(recipe, data) { } result <- append(list(Subset(hcst_probs_ev[[ps]], along = 'cat', indices = ps, drop = 'all')), result) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = ps, drop = 'all')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], along = 'cat', indices = ps, drop = 'all')), probs_fcst) @@ -225,6 +228,12 @@ Crossval_anomalies <- function(recipe, data) { dim(x) <- c(var = 1, dim(x)) return(x)}) } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } if (!is.null(data$fcst)) { names(probs_fcst) <- all_names @@ -244,6 +253,9 @@ Crossval_anomalies <- function(recipe, data) { save_probabilities(recipe = recipe, probs = result, data_cube = data$hcst, agg = agg, type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") # TODO Forecast if (!is.null(probs_fcst)) { save_probabilities(recipe = recipe, probs = probs_fcst, diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 7a9eb670..2390395c 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -19,8 +19,7 @@ library(yaml) recipe <- read_yaml("recipe_tas.yml") source("modules/Saving/R/get_dir.R") source("modules/Loading/Loading.R") -recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604104736/") -#recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240531165840/") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604162251/") # DON'T KNOW HOW TO SOLVE LOGGER: recipe$Run$Loglevel <- 'INFO' recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" @@ -47,59 +46,22 @@ Multimodel <- function(recipe) { ", | |,")[[1]] # Load data splitting by system individual members: if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { - data_aux <- load_multimodel(recipe) - datos <- append(list(obs = data_aux$obs$data), lapply(data_aux$hcst, function(x) x$data)) - names(datos)[-1] <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') - # Compute crps /crpss and return forecast for visualization purposes - if (any(c('crps', 'crpss') %in% requested_metrics)) { - crps <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble')}, - ncores = ncores)$output1 - skill_metrics$crps <- crps - # Build the reference forecast: - sdate_dim <- dim(datos[[1]])['syear'] - cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, - amt.points = sdate_dim, - amt.points_cor = NULL) - tmp <- array(1:length(cross), c(syear = length(cross))) - ref_clim <- Apply(list(datos$obs, tmp), - target_dims = list(c('ensemble', 'syear'), NULL), - function(x, y) { - Subset(x, along = 'syear', indices = cross[[y]]$train.dexes, drop = T)}, - ncores = ncores, output_dims = 'ensemble')$output1 - datos <- append(list(ref = ref_clim), datos) - crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), - fun = function(ref, obs) { - obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble')}, - ncores = ncores)$output1 - skill_metrics$crps_clim <- crps_clim - - - crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(ref, obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - CRPSS(exp = res, obs = obs, ref = ref, - memb_dim = 'ensemble', Fair = FALSE, - time_dim = 'syear', clim.cross.val = FALSE)}, - ncores = ncores) - skill_metrics$crpss <- crpss$crpss - skill_metrics$crpss_significance <- crpss$sign - } - # What about spread or spread-to error ratio? - rm(datos) + individual_memb <- load_multimodel(recipe) } # Always load ensemble mean and probabilities: source("modules/Multimodel/load_multimodel_mean.R") - datos <- load_multimodel_mean(recipe) + ensmean <- load_multimodel_mean(recipe) + + source("modules/Multimodel/load_multimodel_probs.R") + probs <- load_multimodel_probs(recipe) + + + + + + + +############# TO REMOVE: # Building the multi-model multimodel_aux <- build_multimodel(data, recipe) data <- multimodel_aux$data diff --git a/modules/Multimodel/dims_multimodel.R b/modules/Multimodel/dims_multimodel.R deleted file mode 100644 index 4b581379..00000000 --- a/modules/Multimodel/dims_multimodel.R +++ /dev/null @@ -1,103 +0,0 @@ - -source("modules/Loading/R/dates2load.R") -source("modules/Loading/R/get_timeidx.R") -source("modules/Loading/R/check_latlon.R") - -dims_multimodel <- function(recipe) { - - archive <- read_yaml("conf/archive.yml")$esarchive - ref.name <- recipe$Analysis$Datasets$Reference$name - exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') - store.freq <- recipe$Analysis$Variables$freq - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] - exp_descrip <- archive$System[[exp.name[1]]] - reference_descrip <- archive$Reference[[ref.name]] - sdates <- dates2load(recipe, recipe$Run$logger) - - lats.min <- recipe$Analysis$Region$latmin - lats.max <- recipe$Analysis$Region$latmax - lons.min <- recipe$Analysis$Region$lonmin - lons.max <- recipe$Analysis$Region$lonmax - circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - - if (recipe$Analysis$Variables$freq == "monthly_mean") { - split_multiselected_dims = TRUE - } else { - split_multiselected_dims = FALSE - } - - # Find the saved data directory - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) - if (tolower(recipe$Analysis$Output_format) == "scorecards") { - hcst_start <- recipe$Analysis$Time$hcst_start - hcst_end <- recipe$Analysis$Time$hcst_end - shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) - filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", - hcst_start, "-", hcst_end, "_s", shortdate, ".nc") - } else { - filename <- "$var$_$file_date$.nc" - } - - hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) - hcst.path <- gsub(variable[1], "$var$", hcst.path) - hcst.path <- gsub('Multimodel', "$model$", hcst.path) - fcst.path <- obs.path <- hcst.path - - # Load hindcast - #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, - var = variable, - file_date = sdates$hcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = FALSE) - - dim.hcst <- attr(hcst,'Dimensions') - - # Load forecast - #------------------------------------------------------------------- - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, - model = gsub('\\.','',exp.name), - time = 'all', - latitude = 'all', - latitude_reorder = Sort(), - longitude = 'all', - longitude_reorder = circularsort, - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = 'all', - metadata_dims = 'var', - largest_dims_length = TRUE, - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = split_multiselected_dims, - retrieve = FALSE) - - dim.fcst <- attr(fcst,'Dimensions') - - } else { - dim.fcst <- NULL - } - - return(list(dim.hcst = dim.hcst, dim.fcst = dim.fcst)) -} diff --git a/modules/Multimodel/load_multimodel_probs.R b/modules/Multimodel/load_multimodel_probs.R new file mode 100644 index 00000000..4b0eb358 --- /dev/null +++ b/modules/Multimodel/load_multimodel_probs.R @@ -0,0 +1,294 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_probs <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- hcst.path + obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','', exp.name), + ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, paste0("scorecards__", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc")) + + ## Generate probs names inner files variables: + prob_names <- NULL + all_names <- NULL + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + all_names <- c(all_names, name_elem) + } + prob_names <- append(prob_names, list(all_names)) + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[2] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + hcst <- append(hcst, list(probs)) + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = prob_names[[ps]], + file_date = sdates$fcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + fcst <- append(fcst, list(probs)) + } + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + obs <- list() + + if (store.freq == "monthly_mean") { + for (ps in 1:length(prob_names)) { + aux <- Start(dat = obs.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + obs <- append(obs, list(as.s2dv_cube(aux))) + } + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + #default_dims <- c(dat = 1, var = 1, sday = 1, + # sweek = 1, syear = 1, time = 1, + # latitude = 1, longitude = 1, ensemble = 1) + #default_dims[names(dim(obs))] <- dim(obs) + #dim(obs) <- default_dims + + # Convert obs to s2dv_cube + #obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Multimodel/load_multimodel_splitted.R b/modules/Multimodel/load_multimodel_splitted.R deleted file mode 100644 index 5b9a642f..00000000 --- a/modules/Multimodel/load_multimodel_splitted.R +++ /dev/null @@ -1,58 +0,0 @@ - -load_multimodel_splitted <- function(recipe){ - - # Retrieve data dimension only without loading data - dims <- dims_multimodel(recipe) - data_order <- names(dims$dim.hcst) - - # Create empty array with desired dimensions - data <- list(hcst = NULL, fcst = NULL, obs = NULL) - data$hcst$data <- array(data = NA, dim = dims$dim.hcst) - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- array(data = NA, dim = dims$dim.fcst) - } - files_hcst <- list() - files_fcst <- list() - files_obs <- list() - - - # Loop over system to load hindcast and forecast data - for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ - - recipe_aux <- recipe - system_load <- recipe$Analysis$Datasets$System$models[sys] - recipe_aux$Analysis$Datasets$System$models <- system_load - - data_aux <- load_multimodel(recipe_aux) - - data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) - - if(!is.null(recipe$Analysis$Time$fcst_year)){ - data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) - } - - files_hcst <- append(files_hcst, data_aux$hcst$attrs$source_files, after = length(files_hcst)) - files_fcst <- append(files_fcst, data_aux$fcst$attrs$source_files, after = length(files_fcst)) - files_obs <- append(files_obs, data_aux$obs$attrs$source_files, after = length(files_obs)) - - } # close loop on sys - - # Define obs data - data$obs$data <- data_aux$obs$data - - # Include data attributes - data$hcst$attrs <- data_aux$hcst$attrs - data$fcst$attrs <- data_aux$fcst$attrs - data$obs$attrs <- data_aux$obs$attrs - data$hcst$coords <- data_aux$hcst$coords - data$fcst$coords <- data_aux$fcst$coords - data$obs$coords <- data_aux$obs$coords - - # Remove temporary data_aux - rm(data_aux) - - return(list(data = data, - files_hcst = files_hcst, - files_fcst = files_fcst, - files_obs = files_obs)) -} diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R new file mode 100644 index 00000000..0eb0f7ac --- /dev/null +++ b/modules/Multimodel/multimodel_metrics.R @@ -0,0 +1,79 @@ +# One function to compute all the possible metrics or statistics +# indv_members is the list returned by load_multimodel + ## this is needed to compute crps/crpss but it not always will fit in mem +# ensmean is a list returned by load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss + +multimodel_metrics <- function(recipe, + indv_membs = NULL, + ensmean = NULL, + probs = NULL) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + + if (!is.null(indv_membs)) { + # conver $data elemenst to list to use multiApply: + indv_membs <- append(list(obs = indv_membs$obs$data), + lapply(indv_membs$hcst, function(x) x$data)) + names(indv_membs)[-1] <- sapply(recipe$Analysis$Datasets$System, + '[[', 'name') + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + sdate_dim <- dim(indv_membs[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + ref_clim <- Apply(list(indv_membs$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + indv_membs <- append(list(ref = ref_clim), indv_membs) + crps_clim <- Apply(indv_membs[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + + + } + } +} diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index a9ddc977..c6d5f222 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -117,7 +117,14 @@ save_probabilities <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, "probs") - + if (type == 'obs') { + filename <- strsplit(outfile, "/") + filename <- filename[[1]][length(filename[[1]])] + outfile <- file.path(outdir, + sub(gsub('\\.','', + recipe$Analysis$Datasets$System$name), "", + filename)) + } # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) diff --git a/recipe_tas.yml b/recipe_tas.yml index 90890cae..7fd02265 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - #- {name: 'ECMWF-SEAS5.1'} - #- {name: 'Meteo-France-System7'} - name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 9c2fe38e0e797333dc8cf9fcc0f7da4c553de241 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 10:59:34 +0200 Subject: [PATCH 22/53] make crossval_skill to work with proper param --- modules/Crossval/Crossval_skill.R | 38 +++++++++++++++++++------------ recipe_tas.yml | 6 ++--- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index 58007a5a..fa210302 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -34,7 +34,7 @@ source("modules/Crossval/R/tmp/Eno.R") ## if fair is TRUE, the nmemb used to compute the probabilities is needed ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference Crossval_skill <- function(recipe, data_crossval, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs @@ -47,12 +47,12 @@ Crossval_skill <- function(recipe, data_crossval, ", | |,")[[1]] if ('rps' %in% requested_metrics) { - rps <- RPS(exp = res$probs$hcst_ev[[1]], - obs = res$probs$obs_ev[[1]], memb_dim = NULL, + rps <- RPS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', Fair = fair, nmemb = nmemb, ncores = ncores) - rps_clim <- Apply(list(res$probs$obs_ev[[1]]), + rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), target_dims = c('cat', 'syear'), RPS_clim, bin_dim_abs = 'cat', Fair = fair, cross.val = FALSE, ncores = ncores)$output1 @@ -60,7 +60,8 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$rps_clim <- rps_clim } if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = res$probs$hcst_ev[[1]], obs = res$probs$obs_ev[[1]], + rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], ref = NULL, # ref is 1/3 by default if terciles time_dim = 'syear', memb_dim = NULL, cat_dim = 'cat', nmemb = nmemb, @@ -76,18 +77,22 @@ Crossval_skill <- function(recipe, data_crossval, } if ('crps' %in% requested_metrics) { - crps <- CRPS(exp = res$hcst$data, obs = res$obs$data, + crps <- CRPS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, time_dim = 'syear', memb_dim = 'ensemble', Fair = fair, ncores = ncores) skill_metrics$crps <- crps - crps_clim <- CRPS(exp = res$ref_obs_tr, obs = res$obs$data, + crps_clim <- CRPS(exp = data_crossval$ref_obs_tr, + obs = data_crossval$obs$data, time_dim = 'syear', memb_dim = 'ensemble', Fair = fair, ncores = ncores) skill_metrics$crps_clim <- crps_clim } if ('crpss' %in% requested_metrics) { - crpss <- CRPSS(exp = res$hcst$data, obs = res$obs$data, ref = res$ref_obs_tr, + crpss <- CRPSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = data_crossval$ref_obs_tr, memb_dim = 'ensemble', Fair = fair, time_dim = 'syear', clim.cross.val = FALSE, ncores = ncores) @@ -96,7 +101,8 @@ Crossval_skill <- function(recipe, data_crossval, } if ('enscorr' %in% requested_metrics) { - enscorr <- Corr(res$hcst$data, res$obs$data, + enscorr <- Corr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, dat_dim = NULL, time_dim = 'syear', method = 'pearson', @@ -110,7 +116,8 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$enscorr <- enscorr } if ('mean_bias' %in% requested_metrics) { - mean_bias <- Bias(res$hcst.full_val$data, res$obs.full_val$data, + mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, + obs = data_crossval$obs.full_val$data, time_dim = 'syear', memb_dim = 'ensemble', alpha = alpha, @@ -119,7 +126,8 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$mean_bias_significance <- mean_bias$sig } if ('enssprerr' %in% requested_metrics) { - enssprerr <- SprErr(exp = res$hcst$data, obs = res$obs$data, + enssprerr <- SprErr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', pval = TRUE, ncores = ncores) @@ -127,14 +135,16 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha } if ('rms' %in% requested_metrics) { - rms <- RMS(exp = res$hcst$data, obs = res$obs$data, + rms <- RMS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', alpha = alpha, ncores = ncores) skill_metrics$rms <- rms$rms } if ('rmss' %in% requested_metrics) { - rmss <- RMSSS(exp = res$hcst$data, obs = res$obs$data, + rmss <- RMSSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, ref = res$ref_obs_tr, memb_dim = 'ensemble', dat_dim = NULL, time_dim = 'syear', alpha = alpha, sign = TRUE, @@ -158,7 +168,7 @@ Crossval_skill <- function(recipe, data_crossval, # Save metrics save_metrics(recipe = recipe, metrics = skill_metrics, - data_cube = data$hcst, agg = 'global', + data_cube = data_crossval$hcst, agg = 'global', outdir = recipe$Run$output_dir) recipe$Run$output_dir <- original diff --git a/recipe_tas.yml b/recipe_tas.yml index 7fd02265..5dfe25a9 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} - #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + # - {name: 'ECMWF-SEAS5.1'} + # - {name: 'Meteo-France-System7'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 373c944d4e14974c6fa0ef56c0910261971332df Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 12:31:57 +0200 Subject: [PATCH 23/53] fix probs list --- modules/Crossval/Crossval_anomalies.R | 21 ++++--- modules/Crossval/Crossval_skill.R | 82 +++++++++++++++++---------- recipe_tas.yml | 2 +- 3 files changed, 66 insertions(+), 39 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 30f05184..ada64002 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -197,7 +197,7 @@ Crossval_anomalies <- function(recipe, data) { } } # Save probability bins - result <- list() + probs_hcst <- list() probs_fcst <- list() probs_obs <- list() all_names <- NULL @@ -211,20 +211,23 @@ Crossval_anomalies <- function(recipe, data) { name_elem <- paste0("from_", categories[[ps]][perc-1], "_to_", categories[[ps]][perc]) } - result <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'cat', indices = ps, drop = 'all')), result) + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'cat', indices = ps, drop = 'all')), probs_obs) + along = 'cat', indices = perc, drop = 'all')), + probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'cat', indices = ps, drop = 'all')), probs_fcst) + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) } all_names <- c(all_names, name_elem) } } - names(result) <- all_names - if (!('var' %in% names(dim(result[[1]])))) { - result <- lapply(result, function(x) { + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { dim(x) <- c(var = 1, dim(x)) return(x)}) } @@ -250,7 +253,7 @@ Crossval_anomalies <- function(recipe, data) { } if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = result, + save_probabilities(recipe = recipe, probs = probs_hcst, data_cube = data$hcst, agg = agg, type = "hcst") save_probabilities(recipe = recipe, probs = probs_obs, diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index fa210302..d8696890 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -38,6 +38,19 @@ Crossval_skill <- function(recipe, data_crossval, ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) if (is.null(alpha)) { alpha <- 0.05 } @@ -45,37 +58,48 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics <- list() requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] - - if ('rps' %in% requested_metrics) { - rps <- RPS(exp = data_crossval$probs$hcst_ev[[1]], - obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, - cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, - ncores = ncores) - rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), - target_dims = c('cat', 'syear'), - RPS_clim, bin_dim_abs = 'cat', Fair = fair, - cross.val = FALSE, ncores = ncores)$output1 - skill_metrics$rps <- rps - skill_metrics$rps_clim <- rps_clim - } - if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], - obs = data_crossval$probs$obs_ev[[1]], - ref = NULL, # ref is 1/3 by default if terciles - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'cat', nmemb = nmemb, - dat_dim = NULL, - prob_thresholds = 0.1, # un use param when providing probs - indices_for_clim = NULL, - Fair = fair, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, na.rm = na.rm, - sig_method.type = 'two.sided.approx', alpha = alpha, + # The recipe allows to requset more than only terciles: + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, ncores = ncores) - skill_metrics$rpss <- rpss$rpss - skill_metrics$rpss_significance <- rpss$sign + rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = 0.1, # un use param when providing probs + indices_for_clim = NULL, + Fair = fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + # TO USE IT when visualization works for more rpsss + #skill_metrics[[paste0('rpss', exe_rps[ps])]] <- rpss$rpss + #skill_metrics[[paste0('rpss', + # exe_rps[ps], + # "_significance")]] <- rpss$sign + } } - if ('crps' %in% requested_metrics) { crps <- CRPS(exp = data_crossval$hcst$data, obs = data_crossval$obs$data, diff --git a/recipe_tas.yml b/recipe_tas.yml index 5dfe25a9..82f5ad07 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -48,7 +48,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std neff save: 'all' cross_validation: yes Probabilities: -- GitLab From 750d354b76a8b0c1a67a93a92d664801fb3786aa Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 15:53:07 +0200 Subject: [PATCH 24/53] statistics in crossval_skill --- modules/Crossval/Crossval_anomalies.R | 2 ++ modules/Crossval/Crossval_skill.R | 44 +++++++++++++++++++++++++++ recipe_tas.yml | 4 +-- tools/check_recipe.R | 4 ++- 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index ada64002..829850d2 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -273,6 +273,7 @@ Crossval_anomalies <- function(recipe, data) { Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), data_cube = data$hcst, agg = agg, module = "statistics") + fcst_EM <- NULL if (!is.null(data$fcst)) { fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) save_metrics(recipe = recipe, @@ -283,6 +284,7 @@ Crossval_anomalies <- function(recipe, data) { } return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, obs_tr = lims_ano_obs_tr_res), probs = list(hcst_ev = hcst_probs_ev, diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index d8696890..f1356f3b 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -176,6 +176,50 @@ Crossval_skill <- function(recipe, data_crossval, skill_metrics$rmss <- rmss$rmss skill_metrics$rmss_significance <- rmss$sign } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + std_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 + + std_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 + + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + var_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } ## close if on variance + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(data = data_crossval$obs$data, + time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } ## close on n_eff + + if (any(c('cov', 'covariance') %in% requested_metrics)) { + covariance <- Apply(data = list(x = data_crossval$obs$data, + y = data_crossval$hcst_EM), + target_dims = 'syear', + fun = function(x, y) { + cov(as.vector(x), as.vector(y), + use = "everything", + method = "pearson")})$output1 + skill_metrics$covariance <- covariance + } original <- recipe$Run$output_dir recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") diff --git a/recipe_tas.yml b/recipe_tas.yml index 82f5ad07..0c9dfb8c 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -13,7 +13,7 @@ Analysis: System: # - {name: 'ECMWF-SEAS5.1'} # - {name: 'Meteo-France-System7'} - name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false @@ -48,7 +48,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std neff + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff save: 'all' cross_validation: yes Probabilities: diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 5c5ab721..b2449295 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -590,7 +590,9 @@ check_recipe <- function(recipe) { } # Skill - AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", + AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", + "rps_syear", "cov", "covariance", "std", + "standard_deviation", "n_eff", "spread", "rpss", "frps", "frpss", "crps", "crps_syear", "crpss", "bss10", "bss90", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", -- GitLab From f3ed21ecc50747ad979265c114244e83e8afc364 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 16:38:59 +0200 Subject: [PATCH 25/53] conf includes gpfs --- conf/archive.yml | 117 +++++++++++++++++- full_ecvs_anomalies.R | 2 + .../{ => old_modules}/Anomalies/Anomalies.R | 0 .../Calibration/Calibration.R | 0 .../Downscaling/Downscaling.R | 0 .../Downscaling/tmp/Analogs.R | 0 .../{ => old_modules}/Downscaling/tmp/Intbc.R | 0 .../Downscaling/tmp/Interpolation.R | 0 .../{ => old_modules}/Downscaling/tmp/Intlr.R | 0 .../Downscaling/tmp/LogisticReg.R | 0 .../{ => old_modules}/Downscaling/tmp/Utils.R | 0 modules/{ => old_modules}/Indices/Indices.R | 0 .../{ => old_modules}/Indices/R/compute_nao.R | 0 .../Indices/R/compute_nino.R | 0 .../Indices/R/correlation_eno.R | 0 .../Indices/R/drop_indices_dims.R | 0 .../Indices/R/plot_deterministic_forecast.R | 0 .../{ => old_modules}/Statistics/Statistics.R | 0 18 files changed, 118 insertions(+), 1 deletion(-) rename modules/{ => old_modules}/Anomalies/Anomalies.R (100%) rename modules/{ => old_modules}/Calibration/Calibration.R (100%) rename modules/{ => old_modules}/Downscaling/Downscaling.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Analogs.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Intbc.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Interpolation.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Intlr.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/LogisticReg.R (100%) rename modules/{ => old_modules}/Downscaling/tmp/Utils.R (100%) rename modules/{ => old_modules}/Indices/Indices.R (100%) rename modules/{ => old_modules}/Indices/R/compute_nao.R (100%) rename modules/{ => old_modules}/Indices/R/compute_nino.R (100%) rename modules/{ => old_modules}/Indices/R/correlation_eno.R (100%) rename modules/{ => old_modules}/Indices/R/drop_indices_dims.R (100%) rename modules/{ => old_modules}/Indices/R/plot_deterministic_forecast.R (100%) rename modules/{ => old_modules}/Statistics/Statistics.R (100%) diff --git a/conf/archive.yml b/conf/archive.yml index 61f62be2..c892f815 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,3 +1,118 @@ +gpfs: + src: "/gpfs/projects/bsc32/esarchive_cache/" + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/monthly_mean/tas_f6h/tas_20180501.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/system51c3s/constant/lsm/lsm.nc" + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/sfcWind_f6h/"} + nmember: + fcst: 50 + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system8c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind": "monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + UK-MetOffice-Glosea601: + name: "UK MetOffice GloSea 6 (v6.01)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system601-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 124 + hcst: 24 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + DWD-GCFS2.1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/dwd/system21_m1/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + nmember: + fcst: 50 + hcst: 30 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system21_m1.txt" + ECCC-CanCM4i: + name: "ECCC CanCM4i (v3)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc3/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + +######################################################################### esarchive: src: "/esarchive/" System: @@ -181,7 +296,7 @@ esarchive: "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", - "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", + c "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", "tdps":"monthly_mean/tdps_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 44c7e57d..1c873a0b 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -28,4 +28,6 @@ Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, signi source("tools/add_logo.R") add_logo(recipe, "rsz_rsz_bsc_logo.png") +# scorecards computation: + diff --git a/modules/Anomalies/Anomalies.R b/modules/old_modules/Anomalies/Anomalies.R similarity index 100% rename from modules/Anomalies/Anomalies.R rename to modules/old_modules/Anomalies/Anomalies.R diff --git a/modules/Calibration/Calibration.R b/modules/old_modules/Calibration/Calibration.R similarity index 100% rename from modules/Calibration/Calibration.R rename to modules/old_modules/Calibration/Calibration.R diff --git a/modules/Downscaling/Downscaling.R b/modules/old_modules/Downscaling/Downscaling.R similarity index 100% rename from modules/Downscaling/Downscaling.R rename to modules/old_modules/Downscaling/Downscaling.R diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/old_modules/Downscaling/tmp/Analogs.R similarity index 100% rename from modules/Downscaling/tmp/Analogs.R rename to modules/old_modules/Downscaling/tmp/Analogs.R diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/old_modules/Downscaling/tmp/Intbc.R similarity index 100% rename from modules/Downscaling/tmp/Intbc.R rename to modules/old_modules/Downscaling/tmp/Intbc.R diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/old_modules/Downscaling/tmp/Interpolation.R similarity index 100% rename from modules/Downscaling/tmp/Interpolation.R rename to modules/old_modules/Downscaling/tmp/Interpolation.R diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/old_modules/Downscaling/tmp/Intlr.R similarity index 100% rename from modules/Downscaling/tmp/Intlr.R rename to modules/old_modules/Downscaling/tmp/Intlr.R diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/old_modules/Downscaling/tmp/LogisticReg.R similarity index 100% rename from modules/Downscaling/tmp/LogisticReg.R rename to modules/old_modules/Downscaling/tmp/LogisticReg.R diff --git a/modules/Downscaling/tmp/Utils.R b/modules/old_modules/Downscaling/tmp/Utils.R similarity index 100% rename from modules/Downscaling/tmp/Utils.R rename to modules/old_modules/Downscaling/tmp/Utils.R diff --git a/modules/Indices/Indices.R b/modules/old_modules/Indices/Indices.R similarity index 100% rename from modules/Indices/Indices.R rename to modules/old_modules/Indices/Indices.R diff --git a/modules/Indices/R/compute_nao.R b/modules/old_modules/Indices/R/compute_nao.R similarity index 100% rename from modules/Indices/R/compute_nao.R rename to modules/old_modules/Indices/R/compute_nao.R diff --git a/modules/Indices/R/compute_nino.R b/modules/old_modules/Indices/R/compute_nino.R similarity index 100% rename from modules/Indices/R/compute_nino.R rename to modules/old_modules/Indices/R/compute_nino.R diff --git a/modules/Indices/R/correlation_eno.R b/modules/old_modules/Indices/R/correlation_eno.R similarity index 100% rename from modules/Indices/R/correlation_eno.R rename to modules/old_modules/Indices/R/correlation_eno.R diff --git a/modules/Indices/R/drop_indices_dims.R b/modules/old_modules/Indices/R/drop_indices_dims.R similarity index 100% rename from modules/Indices/R/drop_indices_dims.R rename to modules/old_modules/Indices/R/drop_indices_dims.R diff --git a/modules/Indices/R/plot_deterministic_forecast.R b/modules/old_modules/Indices/R/plot_deterministic_forecast.R similarity index 100% rename from modules/Indices/R/plot_deterministic_forecast.R rename to modules/old_modules/Indices/R/plot_deterministic_forecast.R diff --git a/modules/Statistics/Statistics.R b/modules/old_modules/Statistics/Statistics.R similarity index 100% rename from modules/Statistics/Statistics.R rename to modules/old_modules/Statistics/Statistics.R -- GitLab From 5072d5c897356960ffc1146674de7aafd3b109c0 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 16:55:30 +0200 Subject: [PATCH 26/53] fix conf gpfs --- conf/archive.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index c892f815..2dc9142b 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -48,10 +48,10 @@ gpfs: name: "UK MetOffice GloSea 6 (v6.01)" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system601-c3s/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/psl_f6h/"} + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 62 hcst: 28 @@ -79,6 +79,7 @@ gpfs: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 50 hcst: 30 -- GitLab From 4aa5456b2293080be84862d31e12838153a8aada Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 5 Jun 2024 17:32:46 +0200 Subject: [PATCH 27/53] fix typo archive and include multimodel metrics --- conf/archive.yml | 2 +- modules/Multimodel/Multimodel.R | 30 +++++++++++++++++++----------- recipe_tas.yml | 6 +++--- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 2dc9142b..b2237b46 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -297,7 +297,7 @@ esarchive: "tasmin":"daily/tasmin/", "tasmax":"daily/tasmax/"} monthly_mean: {"tas":"monthly_mean/tas_f1h/","tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", "prlr":"monthly_mean/prlr_f1h/", - c "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", + "sfcWind":"monthly_mean/sfcWind_f1h/", "rsds":"monthly_mean/rsds_f1h/", "tdps":"monthly_mean/tdps_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 2390395c..8b178033 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -19,7 +19,7 @@ library(yaml) recipe <- read_yaml("recipe_tas.yml") source("modules/Saving/R/get_dir.R") source("modules/Loading/Loading.R") -recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240604162251/") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240605141443/") # DON'T KNOW HOW TO SOLVE LOGGER: recipe$Run$Loglevel <- 'INFO' recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" @@ -47,26 +47,34 @@ Multimodel <- function(recipe) { # Load data splitting by system individual members: if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { individual_memb <- load_multimodel(recipe) - } + } else { + individual_memb <- NULL + } # Always load ensemble mean and probabilities: source("modules/Multimodel/load_multimodel_mean.R") ensmean <- load_multimodel_mean(recipe) source("modules/Multimodel/load_multimodel_probs.R") probs <- load_multimodel_probs(recipe) - - - + if (recipe$Analysis$Datasets$Multimodel$approac == "weighted") { + stop("Implement the weighting method here") + # I imagine the probs and the ensmean are modified and returned. + multimodel_aux <- build_multimodel(data, recipe) + ensmean <- multimodel_aux$ensmean + probs <- multimodel_aux$prob + rm(multimodel_aux) + } + + # assessment: + source("modules/Multimodel/multimodel_metrics.R") + skill_metrics <- multimodel_metrics(recipe = recipe, + indv_membs = individual_memb, + ensmean = ensmean, + probs = probs) -############# TO REMOVE: - # Building the multi-model - multimodel_aux <- build_multimodel(data, recipe) - data <- multimodel_aux$data - prob <- multimodel_aux$prob - rm(multimodel_aux) # Saving multimodel if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { diff --git a/recipe_tas.yml b/recipe_tas.yml index 0c9dfb8c..64d71f64 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - # - {name: 'ECMWF-SEAS5.1'} - # - {name: 'Meteo-France-System7'} - name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 0c1bf345ceadb5918374826f46a4a6f73bc0cd64 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 6 Jun 2024 16:59:29 +0200 Subject: [PATCH 28/53] modify cat lims and improve multimodel --- modules/Crossval/Crossval_anomalies.R | 8 +- modules/Multimodel/Multimodel.R | 54 ++---- modules/Multimodel/load_multimodel.R | 17 +- modules/Multimodel/load_multimodel_mean.R | 36 ++-- modules/Multimodel/multimodel_metrics.R | 210 +++++++++++++++++++--- 5 files changed, 247 insertions(+), 78 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 829850d2..8fe9fa01 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -11,7 +11,7 @@ Crossval_anomalies <- function(recipe, data) { categories <- recipe$Analysis$Workflow$Probabilities$percentiles categories <- lapply(categories, function (x) { sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) + eval(parse(text = y))})}) ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs ## data dimensions @@ -201,6 +201,12 @@ Crossval_anomalies <- function(recipe, data) { probs_fcst <- list() probs_obs <- list() all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 8b178033..b52e7047 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -9,8 +9,6 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") source('modules/Multimodel/load_multimodel.R') -source('modules/Multimodel/load_multimodel_splitted.R') -source('modules/Multimodel/dims_multimodel.R') source('modules/Multimodel/build_multimodel.R') source('modules/Multimodel/clean_multimodel.R') library(startR) @@ -24,45 +22,29 @@ recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_2024060514144 recipe$Run$Loglevel <- 'INFO' recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" recipe$Run$logger <- logger <- log4r::logger(threshold = recipe$Run$Loglevel, - appenders = list(console_appender(layout = default_log_layout()), - file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) Multimodel <- function(recipe) { - # recipe: auto-s2s recipe as provided by read_yaml - ncores <- recipe$Analysis$ncores - cross.method <- recipe$Analysis$cross.method - if (is.null(cross.method)) { - cross.method <- 'leave-one-out' - } - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) - na.rm <- recipe$Analysis$remove_NAs - skill_metrics <- list() - requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, - ", | |,")[[1]] # Load data splitting by system individual members: if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { - individual_memb <- load_multimodel(recipe) - } else { - individual_memb <- NULL - } - # Always load ensemble mean and probabilities: - source("modules/Multimodel/load_multimodel_mean.R") - ensmean <- load_multimodel_mean(recipe) - - source("modules/Multimodel/load_multimodel_probs.R") - probs <- load_multimodel_probs(recipe) - if (recipe$Analysis$Datasets$Multimodel$approac == "weighted") { - stop("Implement the weighting method here") - # I imagine the probs and the ensmean are modified and returned. - multimodel_aux <- build_multimodel(data, recipe) - ensmean <- multimodel_aux$ensmean - probs <- multimodel_aux$prob - rm(multimodel_aux) + datos <- load_multimodel(recipe) + } else if (recipe$Analysis$Datasets$Multimodel$approach == 'mean') { + # In this case the ensemble dim is 1 for each model + # The functions in skill can still work + source("modules/Multimodel/load_multimodel_mean.R") + datos <- load_multimodel_mean(recipe) + source("modules/Multimodel/load_multimodel_probs.R") + probs <- load_multimodel_probs(recipe) + } else { #(recipe$Analysis$Datasets$Multimodel$approac == "weighted") + stop("Implement the weighting method here") + # I imagine the probs and the ensmean are modified and returned. + multimodel_aux <- build_multimodel(data, recipe) + ensmean <- multimodel_aux$ensmean + probs <- multimodel_aux$prob + rm(multimodel_aux) } # assessment: diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index b4cf8c25..22ad60c6 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -29,7 +29,7 @@ load_multimodel <- function(recipe) { # Find the saved data directory recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", - recipe$Analysis$Datasets$Multimodel$createFrom) + recipe$Analysis$Datasets$Multimodel$createFrom) if (tolower(recipe$Analysis$Output_format) == "scorecards") { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end @@ -50,10 +50,10 @@ load_multimodel <- function(recipe) { #------------------------------------------------------------------- hcst <- list() for (sys in exp.name) { - aux <- Start(dat = hcst.path, + hcst.path_aux <- gsub("$model$", gsub('\\.','', sys), hcst.path, fixed = T) + aux <- Start(dat = hcst.path_aux, var = variable, file_date = sdates$hcst, - model = gsub('\\.','', sys), time = 'all', latitude = 'all', latitude_reorder = Sort(), @@ -71,6 +71,7 @@ load_multimodel <- function(recipe) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) hcst <- append(hcst, list(as.s2dv_cube(aux))) + names(hcst)[length(hcst)] <- gsub('\\.','', sys) } ############################# #NOTE: NOT TESTED YET @@ -100,10 +101,11 @@ load_multimodel <- function(recipe) { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst <- list() for (sys in exp.name) { - aux <- Start(dat = fcst.path, + fcst.path_aux <- gsub("$model$", gsub('\\.','', sys), + fcst.path, fixed = T) + aux <- Start(dat = fcst.path_aux, var = variable, file_date = sdates$fcst, - model = gsub('\\.','', sys), time = 'all', latitude = 'all', latitude_reorder = Sort(), @@ -121,6 +123,7 @@ load_multimodel <- function(recipe) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) fcst <- append(fcst, list(as.s2dv_cube(aux))) + names(fcst)[length(fcst)] <- gsub('\\.','', sys) } ############################# #NOTE: NOT TESTED YET @@ -175,8 +178,8 @@ load_multimodel <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. - dates <- hcst$attrs$Dates - dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] # Get year and month for file_date dates_file <- sapply(dates, format, '%Y%m') diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/Multimodel/load_multimodel_mean.R index 7021438b..d23f9879 100644 --- a/modules/Multimodel/load_multimodel_mean.R +++ b/modules/Multimodel/load_multimodel_mean.R @@ -34,7 +34,8 @@ load_multimodel_mean <- function(recipe) { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) - filename <- paste0("scorecards_$model$_", ref.name, "_", variable, "_hcst_EM_", + filename <- paste0("scorecards_$model$_", ref.name, "_", + variable, "_hcst_EM_", hcst_start, "-", hcst_end, "_s", shortdate, ".nc") } else { filename <- "$var$_$file_date$.nc" @@ -45,9 +46,10 @@ load_multimodel_mean <- function(recipe) { fcst.path <- obs.path <- hcst.path fcst.path <- gsub("hcst", "fcst", fcst.path, fixed = T) - obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','',exp.name[1]), ref.name, - recipe$Analysis$Workflow$Calibration$method, - variable, + obs.path <- file.path(recipe$Run$output_dir, + gsub('\\.','',exp.name[1]), ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, paste0("scorecards_", gsub('\\.','',exp.name[1]), "_", ref.name, "_$var$_-obs_$file_date$__", hcst_start, "-", @@ -56,9 +58,9 @@ load_multimodel_mean <- function(recipe) { #------------------------------------------------------------------- hcst <- list() for (sys in exp.name) { - aux <- Start(dat = hcst.path, + hcst.path_aux <- gsub("$model$", gsub('\\.','', sys), hcst.path, fixed = T) + aux <- Start(dat = hcst.path_aux, var = 'hcst_EM', - model = gsub('\\.','', sys), sday = 'all', sweek = 'all', syear = 'all', @@ -69,11 +71,17 @@ load_multimodel_mean <- function(recipe) { longitude_reorder = circularsort, synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude')), -# metadata_dims = 'var', return_vars = list(latitude = 'dat', longitude = 'dat', time = 'dat'), retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube hcst <- append(hcst, list(as.s2dv_cube(aux))) names(hcst)[length(hcst)] <- sys } @@ -105,9 +113,10 @@ load_multimodel_mean <- function(recipe) { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst <- list() for (sys in exp.name) { - aux <- Start(dat = fcst.path, + fcst.path_aux <- gsub("$model$", gsub('\\.','', sys), + fcst.path, fixed = T) + aux <- Start(dat = fcst.path_aux, var = 'fcst_EM', - model = gsub('\\.','', sys), sday = 'all', sweek = 'all', syear = 'all', @@ -122,6 +131,13 @@ load_multimodel_mean <- function(recipe) { longitude = 'dat', time = 'dat'), retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube fcst <- append(fcst, list(as.s2dv_cube(aux))) names(fcst)[length(fcst)] <- sys } @@ -156,7 +172,7 @@ load_multimodel_mean <- function(recipe) { obs <- Start(dat = obs.path, var = variable, - file_date = sdates$hcst, + file_date = sdates$hcst, time = 'all', latitude = 'all', latitude_reorder = Sort(), diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R index 0eb0f7ac..3128f117 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/multimodel_metrics.R @@ -1,32 +1,45 @@ # One function to compute all the possible metrics or statistics -# indv_members is the list returned by load_multimodel - ## this is needed to compute crps/crpss but it not always will fit in mem -# ensmean is a list returned by load_multimodel_mean +# datos is a list returned by load_multimodel or load_multimodel_mean ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model # probs is a list returned by load_multimodel_probs ## this is needed to compute rps/rpss - +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") multimodel_metrics <- function(recipe, - indv_membs = NULL, - ensmean = NULL, + datos = NULL, probs = NULL) { ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs cross.method <- recipe$Analysis$cross.method + # Prepare indices for crossval + sdate_dim <- dim(datos[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha if (is.null(cross.method)) { cross.method <- 'leave-one-out' } requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] - - if (!is.null(indv_membs)) { - # conver $data elemenst to list to use multiApply: - indv_membs <- append(list(obs = indv_membs$obs$data), - lapply(indv_membs$hcst, function(x) x$data)) - names(indv_membs)[-1] <- sapply(recipe$Analysis$Datasets$System, - '[[', 'name') + skill_metrics <- list() + if (!is.null(datos)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = datos$obs$data), + lapply(datos$hcst, function(x) x$data)) + ## CRPS metrics only make sense in pool method: if (any(c('crps', 'crpss') %in% requested_metrics)) { - crps <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + crps <- Apply(datos, target_dims = c('syear', 'eCnsemble'), fun = function(obs, ...) { res <- abind(..., along = 2) names(dim(res)) <- names(dim(obs)) @@ -38,19 +51,14 @@ multimodel_metrics <- function(recipe, ncores = ncores)$output1 skill_metrics$crps <- crps # Build the reference forecast: - sdate_dim <- dim(indv_membs[[1]])['syear'] - cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, - amt.points = sdate_dim, - amt.points_cor = NULL) - tmp <- array(1:length(cross), c(syear = length(cross))) - ref_clim <- Apply(list(indv_membs$obs, tmp), + ref_clim <- Apply(list(datos$obs, tmp), target_dims = list(c('ensemble', 'syear'), NULL), function(x, y) { Subset(x, along = 'syear', indices = cross[[y]]$train.dexes, drop = T)}, ncores = ncores, output_dims = 'ensemble')$output1 - indv_membs <- append(list(ref = ref_clim), indv_membs) - crps_clim <- Apply(indv_membs[1:2], target_dims = c('syear', 'ensemble'), + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), fun = function(ref, obs) { obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') @@ -60,7 +68,7 @@ multimodel_metrics <- function(recipe, skill_metrics$crps_clim <- crps_clim - crpss <- Apply(indv_membs, target_dims = c('syear', 'ensemble'), + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), fun = function(ref, obs, ...) { res <- abind(..., along = 2) names(dim(res)) <- names(dim(obs)) @@ -72,8 +80,162 @@ multimodel_metrics <- function(recipe, ncores = ncores) skill_metrics$crpss <- crpss$crpss skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } - + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + if (is.null(probs)) { + # calculate probs from the ensemble distribution + # Firs compure tercile limits in crossval + lims <- list() + syears_ind <- list(t = tmp) + lims$hcst <- Apply(append(syears_ind, datos[-1]), + target_dims = append(list(NULL), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(t, ..., crossin, prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- Subset(res, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)}) + }, output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + crossin = cross, ncores = ncores) + lims$obs <- Apply(append(syears_ind, datos[1]), + target_dims = list(NULL, c('syear', 'ensemble')), + fun = function(t, obs, crossin, prob_lims) { + res <- Subset(obs, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)}) + }, output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + crossin = cross, ncores = ncores) + } + # Compute rps + return(skill_metrics) } -- GitLab From dd802e92b92858b27081279a35ede53dcc024198 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 6 Jun 2024 17:02:15 +0200 Subject: [PATCH 29/53] update conf archive --- conf/archive.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index b2237b46..22986cd2 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -23,7 +23,7 @@ gpfs: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", - "psl":"monthly_mean/sfcWind_f6h/"} + "psl":"monthly_mean/psl_f6h/"} nmember: fcst: 50 hcst: 40 @@ -35,7 +35,7 @@ gpfs: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system8c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: @@ -91,7 +91,7 @@ gpfs: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc3/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: -- GitLab From 44d88a98c1ce6eeab12ee04ea2aa84eae4935ce5 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 6 Jun 2024 18:22:31 +0200 Subject: [PATCH 30/53] calculating multimodle crossval probs --- modules/Multimodel/multimodel_metrics.R | 113 +++++++++++++++++++----- 1 file changed, 90 insertions(+), 23 deletions(-) diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R index 3128f117..02488f76 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/multimodel_metrics.R @@ -14,6 +14,8 @@ source("modules/Crossval/R/tmp/Corr.R") source("modules/Crossval/R/tmp/Bias.R") source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils multimodel_metrics <- function(recipe, datos = NULL, probs = NULL) { @@ -205,37 +207,102 @@ multimodel_metrics <- function(recipe, categories <- lapply(categories, function (x) { sapply(x, function(y) { eval(parse(text = y))})}) + # For the pool method: if (is.null(probs)) { # calculate probs from the ensemble distribution # Firs compure tercile limits in crossval lims <- list() syears_ind <- list(t = tmp) lims$hcst <- Apply(append(syears_ind, datos[-1]), - target_dims = append(list(NULL), - lapply(1:length(datos[-1]), - function(x){c('syear', 'ensemble')})), - fun = function(t, ..., crossin, prob_lims) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- Subset(res, along = 'syear', - indices = crossin[[t]]$train.dexes, drop =T) - lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)}) - }, output_dims = lapply(categories, function(x){'cat'}), - prob_lims = categories, - crossin = cross, ncores = ncores) + target_dims = append(list(NULL), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(t, ..., crossin, prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- Subset(res, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) lims$obs <- Apply(append(syears_ind, datos[1]), - target_dims = list(NULL, c('syear', 'ensemble')), - fun = function(t, obs, crossin, prob_lims) { - res <- Subset(obs, along = 'syear', - indices = crossin[[t]]$train.dexes, drop =T) - lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)}) - }, output_dims = lapply(categories, function(x){'cat'}), - prob_lims = categories, - crossin = cross, ncores = ncores) + target_dims = list(NULL, c('syear', 'ensemble')), + fun = function(t, obs, crossin, prob_lims) { + res <- Subset(obs, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) + probs <- list() + probs$hcst <- list() + probs$fcst <- list(NULL) + probs$obs <- list() + for (ps in 1:length(categories)) { + lim <- lims$hcst[ps] + probs$hcst <- append(probs$hcst, + list(Apply(append(lim, datos[-1]), + target_dims = append(list(c('syear', 'cat')), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + lim <- lims$obs[ps] + probs$obs <- append(probs$obs, + list(Apply(append(lim, datos[1]), + target_dims = list(c('syear', 'cat'), + c('syear', 'ensemble')), + fun = function(lim, obs) { + GetProbs(data = obs, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) - } +# if (!is.null(datos$fcst)) { +## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD +# lim <- lims$fcst[ps] +# probs$hcst <- append(probs$fcst, +# list(Apply(append(lim, datos[-1]), +# target_dims = append(list(c('syear', 'cat')), +# lapply(1:length(datos[-1]), +# function(x){c('syear', 'ensemble')})), +# fun = function(lim, ...) { +# res <- abind(..., along = 2) +# names(dim(res)) <- c('syear', 'ensemble') +# GetProbs(data = res, time_dim = 'syear', +# memb_dim = 'ensemble', +# indices_for_quantiles = NULL, +# prob_thresholds = NULL, +# abs_thresholds = lim, +# bin_dim_abs = 'cat', +# weights = NULL, cross.val = FALSE)}, +# ncores = ncores)$output1)) +# } + } + # At this point all method can have probs calculated: + # RPS/RPS_CLIM/RPSS + + } # Compute rps return(skill_metrics) } -- GitLab From 33bb3c81fd8f6c2763621e9bb2515571d627d40b Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 7 Jun 2024 11:29:12 +0200 Subject: [PATCH 31/53] multimodel rps --- modules/Multimodel/Multimodel.R | 9 +- modules/Multimodel/multimodel_metrics.R | 127 ++++++++++++++++++------ 2 files changed, 104 insertions(+), 32 deletions(-) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index b52e7047..7b5a9d71 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -48,11 +48,14 @@ Multimodel <- function(recipe) { } # assessment: + # IF Fair is TRUE, the number of nmembs used to compute the probabilities + # need to be passed + source("modules/Multimodel/multimodel_metrics.R") skill_metrics <- multimodel_metrics(recipe = recipe, - indv_membs = individual_memb, - ensmean = ensmean, - probs = probs) + datos = datos, + probs = probs, + Fair = FALSE) diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/multimodel_metrics.R index 02488f76..7a612311 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/multimodel_metrics.R @@ -18,20 +18,25 @@ source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Visualization/Visualization.R") # to load .warning from utils multimodel_metrics <- function(recipe, datos = NULL, - probs = NULL) { + probs = NULL, + Fair = FALSE) { ncores <- recipe$Analysis$ncores na.rm <- recipe$Analysis$remove_NAs cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } # Prepare indices for crossval - sdate_dim <- dim(datos[[1]])['syear'] + sdate_dim <- dim(datos$hcst[[1]]$data)['syear'] cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, amt.points = sdate_dim, amt.points_cor = NULL) tmp <- array(1:length(cross), c(syear = length(cross))) alpha <- recipe$Analysis$alpha - if (is.null(cross.method)) { - cross.method <- 'leave-one-out' + if (is.null(alpha)) { + alpha <- 0.05 } + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] skill_metrics <- list() @@ -41,7 +46,7 @@ multimodel_metrics <- function(recipe, lapply(datos$hcst, function(x) x$data)) ## CRPS metrics only make sense in pool method: if (any(c('crps', 'crpss') %in% requested_metrics)) { - crps <- Apply(datos, target_dims = c('syear', 'eCnsemble'), + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), fun = function(obs, ...) { res <- abind(..., along = 2) names(dim(res)) <- names(dim(obs)) @@ -243,7 +248,7 @@ multimodel_metrics <- function(recipe, crossin = cross, ncores = ncores) probs <- list() probs$hcst <- list() - probs$fcst <- list(NULL) + probs$fcst <- list() probs$obs <- list() for (ps in 1:length(categories)) { lim <- lims$hcst[ps] @@ -278,31 +283,95 @@ multimodel_metrics <- function(recipe, weights = NULL, cross.val = FALSE)}, ncores = ncores)$output1)) -# if (!is.null(datos$fcst)) { -## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD -# lim <- lims$fcst[ps] -# probs$hcst <- append(probs$fcst, -# list(Apply(append(lim, datos[-1]), -# target_dims = append(list(c('syear', 'cat')), -# lapply(1:length(datos[-1]), -# function(x){c('syear', 'ensemble')})), -# fun = function(lim, ...) { -# res <- abind(..., along = 2) -# names(dim(res)) <- c('syear', 'ensemble') -# GetProbs(data = res, time_dim = 'syear', -# memb_dim = 'ensemble', -# indices_for_quantiles = NULL, -# prob_thresholds = NULL, -# abs_thresholds = lim, -# bin_dim_abs = 'cat', -# weights = NULL, cross.val = FALSE)}, -# ncores = ncores)$output1)) -# } - } - # At this point all method can have probs calculated: - # RPS/RPS_CLIM/RPSS + if (!is.null(datos$fcst)) { + ## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD + lims$fcst[ps] <- Apply(datos[-1], + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + ncores = ncores) + probs$fcst <- append(probs$fcst, + list(Apply(append(lims$fcst[ps], datos[-1]), + target_dims = append(list('cat'), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + } + } + # At this point all method can have probs calculated } + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(probs$obs[[ps]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = 0.1, # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } return(skill_metrics) } -- GitLab From ede7caf17cdcf60f2da0afa654cfe9f79dcf44f3 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 7 Jun 2024 12:07:58 +0200 Subject: [PATCH 32/53] fix rpss --- modules/Crossval/Crossval_skill.R | 4 ++-- modules/Multimodel/Multimodel.R | 2 +- .../Multimodel/{multimodel_metrics.R => Multimodel_skill.R} | 2 +- recipe_tas.yml | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) rename modules/Multimodel/{multimodel_metrics.R => Multimodel_skill.R} (99%) diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_skill.R index f1356f3b..417ee33b 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_skill.R @@ -41,7 +41,7 @@ Crossval_skill <- function(recipe, data_crossval, categories <- recipe$Analysis$Workflow$Probabilities$percentiles categories <- lapply(categories, function (x) { sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) + eval(parse(text = y))})}) # TODO: distinguish between rpss and bss # if 1 percentile -> bss @@ -85,7 +85,7 @@ Crossval_skill <- function(recipe, data_crossval, time_dim = 'syear', memb_dim = NULL, cat_dim = 'cat', nmemb = nmemb, dat_dim = NULL, - prob_thresholds = 0.1, # un use param when providing probs + prob_thresholds = categories[[ps]], indices_for_clim = NULL, Fair = fair, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = na.rm, diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 7b5a9d71..4aa48924 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -51,7 +51,7 @@ Multimodel <- function(recipe) { # IF Fair is TRUE, the number of nmembs used to compute the probabilities # need to be passed - source("modules/Multimodel/multimodel_metrics.R") + source("modules/Multimodel/Multimodel_skill.R") skill_metrics <- multimodel_metrics(recipe = recipe, datos = datos, probs = probs, diff --git a/modules/Multimodel/multimodel_metrics.R b/modules/Multimodel/Multimodel_skill.R similarity index 99% rename from modules/Multimodel/multimodel_metrics.R rename to modules/Multimodel/Multimodel_skill.R index 7a612311..242398dc 100644 --- a/modules/Multimodel/multimodel_metrics.R +++ b/modules/Multimodel/Multimodel_skill.R @@ -363,7 +363,7 @@ multimodel_metrics <- function(recipe, time_dim = 'syear', memb_dim = NULL, cat_dim = 'cat', nmemb = nmemb, dat_dim = NULL, - prob_thresholds = 0.1, # un use param when providing probs + prob_thresholds = categories[[ps]], # un use param when providing probs indices_for_clim = NULL, Fair = Fair, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = na.rm, diff --git a/recipe_tas.yml b/recipe_tas.yml index 64d71f64..4b6a3686 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} - #name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System7'} + name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 289fc152390564b2ab8c71db99a5875f8743870c Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 7 Jun 2024 17:52:58 +0200 Subject: [PATCH 33/53] crossval multimodel --- full_ecvs_multimodel_anomalies.R | 47 +++ .../Crossval/Crossval_multimodel_anomalies.R | 339 ++++++++++++++++++ modules/Loading/R/load_seasonal.R | 3 +- recipe_tas.yml | 6 +- 4 files changed, 390 insertions(+), 5 deletions(-) create mode 100644 full_ecvs_multimodel_anomalies.R create mode 100644 modules/Crossval/Crossval_multimodel_anomalies.R diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R new file mode 100644 index 00000000..a6b07220 --- /dev/null +++ b/full_ecvs_multimodel_anomalies.R @@ -0,0 +1,47 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_tas.yml" +# Will it work with a single job? +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +models <- unlist(original_recipe$Analysis$Datasets$System) +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +for (sys in models) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + data <- Loading(recipe = recipe_aux) + data <- Units(recipe_aux, data) + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) +} +datos$obs <- data$obs +data <- datos +rm(list=list('datos')) + +source("modules/Crossval/Crossval_anomalies.R") +res <- Crossval_anomalies(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_skill.R") +skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R new file mode 100644 index 00000000..7548fd5c --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -0,0 +1,339 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + ano_obs_ev_res <- NULL + ano_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + ano_hcst_tr <- ano_hcst_ev <- ano_fcst <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # Store cross validation loops: + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + + # Anomalies of individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_hcst_tr <- append(ano_hcst_tr, + list(s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev <- append(ano_hcst_ev, + list(s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev_res[[sys]] <- abind(ano_hcst_ev_res[[sys]], + ano_hcst_ev[[sys]], + along = length(dim(ano_hcst_ev[[sys]])) + 1) + } + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), + ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + + #store results + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + ano_hcst_ev_res <- lapply(ano_hcst_ev_res, function(x) { + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_fcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + function(x) { + res <- abind(..., along = 2) + res <- mean(as.vector(res), na.rm = na.rm)} + ncores = ncores)$output1 +browser() + ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$hcst, agg = agg, + module = "statistics") + fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 42b74b16..414c88f3 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -18,7 +18,6 @@ load_seasonal <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq @@ -386,7 +385,7 @@ load_seasonal <- function(recipe) { ############################################################################ ############################################################################ - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/recipe_tas.yml b/recipe_tas.yml index 4b6a3686..5af89ec1 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,9 +11,9 @@ Analysis: flux: no Datasets: System: - #- {name: 'ECMWF-SEAS5.1'} - #- {name: 'Meteo-France-System7'} - name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System7'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes approach: pooled # Mandatory, bool: Either yes/true or no/false -- GitLab From 3f9f49f7df7fe1b9264d48dcfa48c137d6a9835e Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 10 Jun 2024 14:48:09 +0200 Subject: [PATCH 34/53] multimodel version --- full_ecvs_multimodel_anomalies.R | 17 +- .../Crossval/Crossval_multimodel_anomalies.R | 310 ++++++++++-------- modules/Crossval/Crossval_multimodel_skill.R | 269 +++++++++++++++ 3 files changed, 449 insertions(+), 147 deletions(-) create mode 100644 modules/Crossval/Crossval_multimodel_skill.R diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index a6b07220..73970fa8 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -25,18 +25,21 @@ for (sys in models) { names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) } +data_aux <- data datos$obs <- data$obs data <- datos -rm(list=list('datos')) +rm(list = 'datos') -source("modules/Crossval/Crossval_anomalies.R") -res <- Crossval_anomalies(recipe = recipe, data = data) +source("modules/Crossval/Crossval_multimodel_anomalies.R") +res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) -source("modules/Crossval/Crossval_skill.R") -skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) +source("modules/Crossval/Crossval_multimodel_skill.R") +skill_metrics <- multimodel_metrics(recipe = original_recipe, + data = res, Fair = FALSE) + + +Visualization(recipe = recipe_aux, data = data_aux, skill_metrics = skill_metrics, significance = TRUE) -Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) ## Check logo size is appropiated for your maps: source("tools/add_logo.R") diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R index 7548fd5c..89afff40 100644 --- a/modules/Crossval/Crossval_multimodel_anomalies.R +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -140,22 +140,23 @@ Crossval_multimodel_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } - # Forecast anomalies: + ano_fcst <- lims_fcst <- NULL if (!is.null(data$fcst)) { - clim_fcst <- Apply(ano_hcst_ev_res, + ano_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), - function(x) { + function(...) { res <- abind(..., along = 2) - res <- mean(as.vector(res), na.rm = na.rm)} - ncores = ncores)$output1 -browser() - ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + clim <- mean(as.vector(res), na.rm = na.rm) + res <- res - clim}, + ncores = ncores, + output_dims = c('syear', 'ensemble'))$output1 # Terciles limits using the whole hindcast period: lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { + fun = function(..., prob_lims) { + res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + quantile(as.vector(res), ps, na.rm = na.rm)})}, output_dims = lapply(categories, function(x) {'cat'}), prob_lims = categories, ncores = ncores) @@ -163,12 +164,23 @@ browser() # Compute Probabilities for (ps in 1:length(categories)) { - hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'cat', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], - ncores = ncores) + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'cat'), + lapply(ano_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_ano_hcst_tr[[ps]]), + ano_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, bin_dim_abs = 'cat', @@ -176,144 +188,162 @@ browser() memb_dim = 'ensemble', abs_thresholds = lims_ano_obs_tr_res[[ps]], ncores = ncores) +fcst_probs <- NULL if (!is.null(data$fcst)) { - fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'cat', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = lims_fcst[[ps]], - ncores = ncores) +# fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), +# ano_fcst), +# target_dims = list('cat', +# c('syear', 'ensemble')), +# function(lims, fcst) { + fcst_probs[[ps]] <- GetProbs(ano_fcst, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], +#}, + ncores = ncores) } } + return(list(hcst = ano_hcst_ev_res, obs = ano_obs_ev_res, fcst = ano_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, fcst_probs), + ref_obs_tr = ano_obs_tr_res)) +} +##### TO SAVE edit the lines below: # Convert to s2dv_cubes the resulting anomalies - ano_hcst <- data$hcst - ano_hcst$data <- ano_hcst_ev_res - ano_obs <- data$obs - ano_obs$data <- ano_obs_ev_res +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res - info(recipe$Run$logger, - "#### Anomalies and Probabilities Done #####") - if (recipe$Analysis$Workflow$Anomalies$save != 'none') { - info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Anomalies/") +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") # Save forecast - if ((recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { - save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - } +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } # Save hindcast - if (recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') - } - # Save observation - if (recipe$Analysis$Workflow$Anomalies$save == 'all') { - save_observations(recipe = recipe, data_cube = ano_obs) - } - } +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } # Save probability bins - probs_hcst <- list() - probs_fcst <- list() - probs_obs <- list() - all_names <- NULL +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL # Make categories rounded number to use as names: - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) - for (ps in 1:length(categories)) { - for (perc in 1:(length(categories[[ps]]) + 1)) { - if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]) - } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]) - } else { - name_elem <- paste0("from_", categories[[ps]][perc-1], - "_to_", categories[[ps]][perc]) - } - probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), - probs_hcst) - probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'cat', indices = perc, drop = 'all')), - probs_obs) - if (!is.null(data$fcst)) { - probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'cat', indices = perc, drop = 'all')), - probs_fcst) - } - all_names <- c(all_names, name_elem) - } - } - names(probs_hcst) <- all_names - if (!('var' %in% names(dim(probs_hcst[[1]])))) { - probs_hcst <- lapply(probs_hcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - names(probs_obs) <- all_names - if (!('var' %in% names(dim(probs_obs[[1]])))) { - probs_obs <- lapply(probs_obs, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } - if (!is.null(data$fcst)) { - names(probs_fcst) <- all_names - if (!('var' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - if (!('syear' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(syear = 1, dim(x)) - return(x)}) - } - } - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = probs_hcst, - data_cube = data$hcst, agg = agg, - type = "hcst") - save_probabilities(recipe = recipe, probs = probs_obs, - data_cube = data$hcst, agg = agg, - type = "obs") + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") # TODO Forecast - if (!is.null(probs_fcst)) { - save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, agg = agg, - type = "fcst") - } - } + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } # Save ensemble mean for multimodel option: - hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, - metrics = list(hcst_EM = - Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), - data_cube = data$hcst, agg = agg, - module = "statistics") - fcst_EM <- NULL - if (!is.null(data$fcst)) { - fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) - save_metrics(recipe = recipe, - metrics = list(fcst_EM = - Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), - data_cube = data$fcst, agg = agg, - module = "statistics") - } - return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, - hcst.full_val = data$hcst, obs.full_val = data$obs, - hcst_EM = hcst_EM, fcst_EM = fcst_EM, - cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, - obs_tr = lims_ano_obs_tr_res), - probs = list(hcst_ev = hcst_probs_ev, - obs_ev = obs_probs_ev), - ref_obs_tr = ano_obs_tr_res)) -} + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} ## The result contains the inputs for Skill_full_crossval. diff --git a/modules/Crossval/Crossval_multimodel_skill.R b/modules/Crossval/Crossval_multimodel_skill.R new file mode 100644 index 00000000..326b9c39 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_skill.R @@ -0,0 +1,269 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +multimodel_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + skill_metrics <- list() + if (!is.null(data)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = data$obs), data$hcst) + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data$probs$obs[[ps]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } + return(skill_metrics) +} -- GitLab From f6d8b7c0ead927f63c6a49db24107609e73755a4 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 10 Jun 2024 15:12:41 +0200 Subject: [PATCH 35/53] fixes and renames --- full_ecvs_anomalies.R | 4 ++-- full_ecvs_multimodel_anomalies.R | 4 ++-- .../{Crossval_skill.R => Crossval_metris.R} | 2 +- ...model_skill.R => Crossval_multimodel_metrics.R} | 14 +++++++++----- modules/Visualization/R/plot_metrics.R | 1 + modules/{ => old_modules}/Multimodel/Multimodel.R | 0 .../Multimodel/Multimodel_skill.R | 0 .../Multimodel/build_multimodel.R | 0 .../Multimodel/clean_multimodel.R | 0 .../{ => old_modules}/Multimodel/load_multimodel.R | 0 .../Multimodel/load_multimodel_mean.R | 0 .../Multimodel/load_multimodel_probs.R | 0 12 files changed, 15 insertions(+), 10 deletions(-) rename modules/Crossval/{Crossval_skill.R => Crossval_metris.R} (99%) rename modules/Crossval/{Crossval_multimodel_skill.R => Crossval_multimodel_metrics.R} (96%) rename modules/{ => old_modules}/Multimodel/Multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/Multimodel_skill.R (100%) rename modules/{ => old_modules}/Multimodel/build_multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/clean_multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/load_multimodel.R (100%) rename modules/{ => old_modules}/Multimodel/load_multimodel_mean.R (100%) rename modules/{ => old_modules}/Multimodel/load_multimodel_probs.R (100%) diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 1c873a0b..769db0e4 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -18,8 +18,8 @@ data_summary(data$obs, recipe) source("modules/Crossval/Crossval_anomalies.R") res <- Crossval_anomalies(recipe = recipe, data = data) -source("modules/Crossval/Crossval_skill.R") -skill_metrics <- Crossval_skill(recipe = recipe, data_crossval = res, +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE) diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 73970fa8..d1c535ec 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -33,8 +33,8 @@ rm(list = 'datos') source("modules/Crossval/Crossval_multimodel_anomalies.R") res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) -source("modules/Crossval/Crossval_multimodel_skill.R") -skill_metrics <- multimodel_metrics(recipe = original_recipe, +source("modules/Crossval/Crossval_multimodel_metrics.R") +skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, data = res, Fair = FALSE) diff --git a/modules/Crossval/Crossval_skill.R b/modules/Crossval/Crossval_metris.R similarity index 99% rename from modules/Crossval/Crossval_skill.R rename to modules/Crossval/Crossval_metris.R index 417ee33b..dd31d23a 100644 --- a/modules/Crossval/Crossval_skill.R +++ b/modules/Crossval/Crossval_metris.R @@ -33,7 +33,7 @@ source("modules/Crossval/R/tmp/Eno.R") ## the recipe could be used to read the Percentiles ## if fair is TRUE, the nmemb used to compute the probabilities is needed ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -Crossval_skill <- function(recipe, data_crossval, +Crossval_metrics <- function(recipe, data_crossval, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha diff --git a/modules/Crossval/Crossval_multimodel_skill.R b/modules/Crossval/Crossval_multimodel_metrics.R similarity index 96% rename from modules/Crossval/Crossval_multimodel_skill.R rename to modules/Crossval/Crossval_multimodel_metrics.R index 326b9c39..7f6a7292 100644 --- a/modules/Crossval/Crossval_multimodel_skill.R +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -16,7 +16,7 @@ source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Visualization/Visualization.R") # to load .warning from utils -multimodel_metrics <- function(recipe, +Crossval_multimodel_metrics <- function(recipe, data = NULL, Fair = FALSE) { ncores <- recipe$Analysis$ncores @@ -50,9 +50,9 @@ multimodel_metrics <- function(recipe, names(dim(res)) <- names(dim(obs)) obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, time_dim = 'syear', - memb_dim = 'ensemble')}, + memb_dim = 'ensemble'))}, ncores = ncores)$output1 skill_metrics$crps <- crps # Build the reference forecast: @@ -67,8 +67,9 @@ multimodel_metrics <- function(recipe, fun = function(ref, obs) { obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') - s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble')}, + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble'))}, ncores = ncores)$output1 skill_metrics$crps_clim <- crps_clim @@ -265,5 +266,8 @@ multimodel_metrics <- function(recipe, skill_metrics$rpss_significance <- rpss$sign } } + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) return(skill_metrics) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 2df06e8b..3ade1a08 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -173,6 +173,7 @@ plot_metrics <- function(recipe, data_cube, metrics, } # Reorder dimensions +browser() metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. diff --git a/modules/Multimodel/Multimodel.R b/modules/old_modules/Multimodel/Multimodel.R similarity index 100% rename from modules/Multimodel/Multimodel.R rename to modules/old_modules/Multimodel/Multimodel.R diff --git a/modules/Multimodel/Multimodel_skill.R b/modules/old_modules/Multimodel/Multimodel_skill.R similarity index 100% rename from modules/Multimodel/Multimodel_skill.R rename to modules/old_modules/Multimodel/Multimodel_skill.R diff --git a/modules/Multimodel/build_multimodel.R b/modules/old_modules/Multimodel/build_multimodel.R similarity index 100% rename from modules/Multimodel/build_multimodel.R rename to modules/old_modules/Multimodel/build_multimodel.R diff --git a/modules/Multimodel/clean_multimodel.R b/modules/old_modules/Multimodel/clean_multimodel.R similarity index 100% rename from modules/Multimodel/clean_multimodel.R rename to modules/old_modules/Multimodel/clean_multimodel.R diff --git a/modules/Multimodel/load_multimodel.R b/modules/old_modules/Multimodel/load_multimodel.R similarity index 100% rename from modules/Multimodel/load_multimodel.R rename to modules/old_modules/Multimodel/load_multimodel.R diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/old_modules/Multimodel/load_multimodel_mean.R similarity index 100% rename from modules/Multimodel/load_multimodel_mean.R rename to modules/old_modules/Multimodel/load_multimodel_mean.R diff --git a/modules/Multimodel/load_multimodel_probs.R b/modules/old_modules/Multimodel/load_multimodel_probs.R similarity index 100% rename from modules/Multimodel/load_multimodel_probs.R rename to modules/old_modules/Multimodel/load_multimodel_probs.R -- GitLab From fb05eb5263c4b1702c7f5da7f957244ceea9ad7b Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Tue, 11 Jun 2024 11:54:41 +0200 Subject: [PATCH 36/53] use grid type from conf --- .gitignore | 2 + conf/archive.yml | 4 +- full_ecvs_anomalies.R | 2 +- recipe_tas.yml | 21 +++++---- recipe_tas_singl.yml | 105 ++++++++++++++++++++++++++++++++++++++++++ sunset.sh | 14 ++++++ 6 files changed, 135 insertions(+), 13 deletions(-) create mode 100644 recipe_tas_singl.yml create mode 100644 sunset.sh diff --git a/.gitignore b/.gitignore index f05c28f4..9244f19d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ ecsbatch.log* modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml +*.err +*.out diff --git a/conf/archive.yml b/conf/archive.yml index 22986cd2..6c5c7379 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -110,8 +110,8 @@ gpfs: "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} calendar: "standard" - reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - land_sea_mask: "/esarchive/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" ######################################################################### esarchive: diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R index 769db0e4..dc83ebb7 100644 --- a/full_ecvs_anomalies.R +++ b/full_ecvs_anomalies.R @@ -5,7 +5,7 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_tas.yml" +#recipe_file <- "recipe_tas_singl.yml" recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/recipe_tas.yml b/recipe_tas.yml index 5af89ec1..2f995c9a 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -11,8 +11,9 @@ Analysis: flux: no Datasets: System: + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} - - {name: 'Meteo-France-System7'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -26,15 +27,15 @@ Analysis: hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 3 # Mandatory, int: Last leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - latmin: 20 - latmax: 10 - lonmin: -20 - lonmax: 10 + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: "to_reference" + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -85,9 +86,9 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: esarchive - output_dir: /esarchive/scratch/nperez/git4/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git4/sunset/ # replace with the directory where your code is + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: diff --git a/recipe_tas_singl.yml b/recipe_tas_singl.yml new file mode 100644 index 00000000..6c509521 --- /dev/null +++ b/recipe_tas_singl.yml @@ -0,0 +1,105 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: '2021' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: all + Time_aggregation: + execute: no + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/sunset.sh b/sunset.sh new file mode 100644 index 00000000..7b2c3214 --- /dev/null +++ b/sunset.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH -n 8 +#SBATCH -t 01:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R + -- GitLab From 642d5da27f7acc4dd31fd006e1085d7b73d124e9 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 12 Jun 2024 17:28:21 +0200 Subject: [PATCH 37/53] full-crossval in MN5 --- full_ecvs_multimodel_anomalies.R | 1 + modules/Visualization/R/plot_metrics.R | 1 - recipe_tas.yml | 6 +++++- sunset.sh | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index d1c535ec..00e94036 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -30,6 +30,7 @@ datos$obs <- data$obs data <- datos rm(list = 'datos') +original_recipe$Analysis$ncores <- 32 source("modules/Crossval/Crossval_multimodel_anomalies.R") res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 3ade1a08..2df06e8b 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -173,7 +173,6 @@ plot_metrics <- function(recipe, data_cube, metrics, } # Reorder dimensions -browser() metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. diff --git a/recipe_tas.yml b/recipe_tas.yml index 2f995c9a..ede20ab1 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -14,6 +14,10 @@ Analysis: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea601'} + #- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-CanCM4i'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -23,7 +27,7 @@ Analysis: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '0501' - fcst_year: '2021' + fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months diff --git a/sunset.sh b/sunset.sh index 7b2c3214..c37bc9bb 100644 --- a/sunset.sh +++ b/sunset.sh @@ -1,5 +1,5 @@ #!/bin/bash -#SBATCH -n 8 +#SBATCH -n 16 #SBATCH -t 01:00:00 #SBATCH -J sunset_multimodel #SBATCH -o sunset_multimodel-%J.out -- GitLab From 4fb5d08213c21bfd05861d0ba4d7d7353ac44ce4 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 13 Jun 2024 17:54:12 +0200 Subject: [PATCH 38/53] run for individual and multimodel --- full_ecvs_multimodel_anomalies.R | 13 ++++++++++++- .../{Crossval_metris.R => Crossval_metrics.R} | 0 recipe_tas.yml | 11 ++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) rename modules/Crossval/{Crossval_metris.R => Crossval_metrics.R} (100%) diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 00e94036..1416b72a 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -11,15 +11,27 @@ recipe_file <- "recipe_tas.yml" original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) # Load datasets models <- unlist(original_recipe$Analysis$Datasets$System) + +original_recipe$Analysis$ncores <- 32 + recipe_aux <- original_recipe datos <- list() datos$hcst <- list() datos$fcst <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_anomalies.R") for (sys in models) { recipe_aux$Analysis$Datasets$System <- NULL recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) data <- Loading(recipe = recipe_aux) data <- Units(recipe_aux, data) + # verification individual models + product <- Crossval_anomalies(recipe = recipe_aux, data = data) + skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + Visualization(recipe = recipe_aux, data = data, + skill_metrics = skill_model, significance = TRUE) + ## end ver datos$hcst <- append(datos$hcst, list(data$hcst)) datos$fcst <- append(datos$fcst, list(data$fcst)) names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) @@ -30,7 +42,6 @@ datos$obs <- data$obs data <- datos rm(list = 'datos') -original_recipe$Analysis$ncores <- 32 source("modules/Crossval/Crossval_multimodel_anomalies.R") res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) diff --git a/modules/Crossval/Crossval_metris.R b/modules/Crossval/Crossval_metrics.R similarity index 100% rename from modules/Crossval/Crossval_metris.R rename to modules/Crossval/Crossval_metrics.R diff --git a/recipe_tas.yml b/recipe_tas.yml index ede20ab1..32862c16 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -13,7 +13,7 @@ Analysis: System: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - - {name: 'ECMWF-SEAS5.1'} + #- {name: 'ECMWF-SEAS5.1'} #- {name: 'UK-MetOffice-Glosea601'} #- {name: 'NCEP-CFSv2'} #- {name: 'DWD-GCFS2.1'} @@ -29,7 +29,7 @@ Analysis: sdate: '0501' fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: @@ -45,7 +45,7 @@ Analysis: Anomalies: compute: yes cross_validation: no - save: all + save: none Time_aggregation: execute: no Calibration: @@ -58,13 +58,13 @@ Analysis: cross_validation: yes Probabilities: percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. - save: all + save: none Indicators: index: no Visualization: plots: skill_metrics #forecast_ensemble_mean most_likely_terciles multi_panel: no - dots: both + #dots: both projection: Robinson file_format: 'PNG' #projection: robinson @@ -85,6 +85,7 @@ Analysis: calculate_diff: FALSE ncores: 4 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 Output_format: scorecards logo: yes Run: -- GitLab From 8cdde729e11bdbae8fe7519aef7ea75c300b2d22 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 26 Jun 2024 17:37:33 +0200 Subject: [PATCH 39/53] pNAO calculation --- Crossval_NAO.R | 322 +++++ full_nao.R | 26 + modules/Crossval/Crossval_NAO.R | 374 +++++ modules/Crossval/Crossval_anomalies.R | 5 +- modules/Crossval/Crossval_metrics.R | 26 +- modules/Crossval/R/tmp/EOF.R | 293 ++++ modules/Crossval/R/tmp/GetProbs.R | 2 - modules/Crossval/R/tmp/NAO.R | 574 ++++++++ modules/Crossval/R/tmp/ProjectField.R | 272 ++++ modules/Crossval/R/tmp/Utils.R | 1885 +++++++++++++++++++++++++ recipe_nao_single.yml | 108 ++ recipe_tas_singl.yml | 8 +- 12 files changed, 3879 insertions(+), 16 deletions(-) create mode 100644 Crossval_NAO.R create mode 100644 full_nao.R create mode 100644 modules/Crossval/Crossval_NAO.R create mode 100644 modules/Crossval/R/tmp/EOF.R create mode 100644 modules/Crossval/R/tmp/NAO.R create mode 100644 modules/Crossval/R/tmp/ProjectField.R create mode 100644 modules/Crossval/R/tmp/Utils.R create mode 100644 recipe_nao_single.yml diff --git a/Crossval_NAO.R b/Crossval_NAO.R new file mode 100644 index 00000000..8fe9fa01 --- /dev/null +++ b/Crossval_NAO.R @@ -0,0 +1,322 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$hcst, agg = agg, + module = "statistics") + fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/full_nao.R b/full_nao.R new file mode 100644 index 00000000..43b1e2e3 --- /dev/null +++ b/full_nao.R @@ -0,0 +1,26 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_nao_single.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + + +source("modules/Crossval/Crossval_NAO.R") +res <- Crossval_NAO(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +# No visualisation available for region aggregated + diff --git a/modules/Crossval/Crossval_NAO.R b/modules/Crossval/Crossval_NAO.R new file mode 100644 index 00000000..9fb31918 --- /dev/null +++ b/modules/Crossval/Crossval_NAO.R @@ -0,0 +1,374 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Crossval/R/tmp/NAO.R") +source("modules/Crossval/R/tmp/ProjectField.R") +source("modules/Crossval/R/tmp/EOF.R") +source("modules/Crossval/R/tmp/Utils.R") + +Crossval_NAO <- function(recipe, data) { + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'region' + + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + nao_hcst_ev_res <- NULL + nao_obs_ev_res <- NULL + nao_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_nao_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_nao_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + + # compute category limits + lims_nao_hcst_tr <- Apply(nao_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_nao_obs_tr <- Apply(nao_obs_tr, target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + nao_hcst_ev_res <- abind(nao_hcst_ev_res, nao_hcst_ev, + along = length(dim(nao_hcst_ev)) + 1) + nao_obs_ev_res <- abind(nao_obs_ev_res, nao_obs_ev, + along = length(dim(nao_obs_ev)) + 1) + nao_obs_tr_res <- abind(nao_obs_tr_res, nao_obs_tr, + along = length(dim(nao_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_nao_hcst_tr_res[[ps]] <- abind(lims_nao_hcst_tr_res[[ps]], + lims_nao_hcst_tr[[ps]], + along = length(dim(lims_nao_hcst_tr[[ps]])) + 1) + lims_nao_obs_tr_res[[ps]] <- abind(lims_nao_obs_tr_res[[ps]], + lims_nao_obs_tr[[ps]], + along = length(dim(lims_nao_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(nao_hcst_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_tr_res)) <- c('ensemble', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + # To make crps_clim to work the reference forecast need to have same dims as obs: + nao_hcst_ev_res <- Subset(nao_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_ev_res <- Subset(nao_obs_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_tr_res <- Subset(nao_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_nao_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', + 'sweek', 'time', 'syear') + names(dim(lims_nao_obs_tr_res[[ps]])) <- c('cat', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + lims_nao_obs_tr_res[[ps]] <- Subset(lims_nao_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + ano_hcst <- Ano(data = data$hcst$data, clim = clim_hcst) + ano_obs <- Ano(data= data$obs$data, clim = clim_obs) + nao_fcst <- NAO(exp = ano_hcst, obs = ano_obs, exp_cor = ano_fcst, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(nao_fcst$exp, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(nao_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_hcst_tr_res[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(nao_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(nao_fcst$exp_cor, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes + nao_hcst <- data$hcst + nao_hcst$data <- nao_hcst_ev_res + nao_obs <- data$obs + nao_obs$data <- nao_obs_ev_res + + info(recipe$Run$logger, + "#### NAO and Probabilities Done #####") + if (recipe$Analysis$Workflow$Indices$NAO$save != 'none') { + info(recipe$Run$logger, "##### START SAVING NAO #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Indices$NAO$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = nao_fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Indices$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = nao_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { + save_observations(recipe = recipe, data_cube = nao_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + return(list(hcst = nao_hcst, obs = nao_obs, fcst = nao_fcst, + # Mean Bias for the NAO doesn't make sense, so + # not returning fullvals + #hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(hcst_tr = lims_nao_hcst_tr_res, + obs_tr = lims_nao_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = nao_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 8fe9fa01..3ff0f50e 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -125,7 +125,7 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } - +browser() # Forecast anomalies: if (!is.null(data$fcst)) { clim_hcst <- Apply(ano_hcst_ev_res, @@ -150,7 +150,8 @@ Crossval_anomalies <- function(recipe, data) { prob_thresholds = NULL, bin_dim_abs = 'cat', indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + memb_dim = 'ensemble', + abs_thresholds = lims_ano_hcst_tr_res[[ps]], ncores = ncores) obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', prob_thresholds = NULL, diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index dd31d23a..1c592b15 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -140,14 +140,19 @@ Crossval_metrics <- function(recipe, data_crossval, skill_metrics$enscorr <- enscorr } if ('mean_bias' %in% requested_metrics) { - mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, - obs = data_crossval$obs.full_val$data, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = alpha, - ncores = ncores) - skill_metrics$mean_bias <- mean_bias$bias - skill_metrics$mean_bias_significance <- mean_bias$sig + if (!is.null(data_crossval$hcst.full_val$data)) { + mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, + obs = data_crossval$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } else { + info(recipe$Run$logger, + "Full values not available") + } } if ('enssprerr' %in% requested_metrics) { enssprerr <- SprErr(exp = data_crossval$hcst$data, @@ -176,6 +181,11 @@ Crossval_metrics <- function(recipe, data_crossval, skill_metrics$rmss <- rmss$rmss skill_metrics$rmss_significance <- rmss$sign } + if (is.null(data_crossval$hcst_EM)) { + data_crossval$hcst_EM <- MeanDims(data_crossval$hcst$data, + dims = 'ensemble', + drop = TRUE) + } if (any(c('std', 'standard_deviation') %in% requested_metrics)) { std_hcst <- Apply(data = data_crossval$hcst_EM, target_dims = 'syear', diff --git a/modules/Crossval/R/tmp/EOF.R b/modules/Crossval/R/tmp/EOF.R new file mode 100644 index 00000000..87795b66 --- /dev/null +++ b/modules/Crossval/R/tmp/EOF.R @@ -0,0 +1,293 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano / InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} + diff --git a/modules/Crossval/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R index 2a538892..fb2cda0c 100644 --- a/modules/Crossval/R/tmp/GetProbs.R +++ b/modules/Crossval/R/tmp/GetProbs.R @@ -212,7 +212,6 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - ############################### if (is.null(abs_thresholds)) { res <- Apply(data = list(data = data), @@ -298,7 +297,6 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', } } # quantiles: [bin-1, sdate] - # Probabilities probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] for (i_time in seq_len(dim(data)[1])) { diff --git a/modules/Crossval/R/tmp/NAO.R b/modules/Crossval/R/tmp/NAO.R new file mode 100644 index 00000000..255e2a9f --- /dev/null +++ b/modules/Crossval/R/tmp/NAO.R @@ -0,0 +1,574 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains some of the following items depending on the data inputs: +#'\item{exp}{ +#' A numeric array of hindcast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observation NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + # Check inputs + ## exp, obs, and exp_cor (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + add_member_back <- FALSE + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (!all(space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!all(space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } + ## exp and obs (2) + #TODO: Add checks for exp_cor + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(obs)) { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(obs)) { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.") + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } + + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } + + if (is.null(exp_cor)) { + + for (tt in 1:ntime) { # cross-validation + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] + } + + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } + # add_member_back + if (add_member_back) { + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) + } + + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) + } +} + diff --git a/modules/Crossval/R/tmp/ProjectField.R b/modules/Crossval/R/tmp/ProjectField.R new file mode 100644 index 00000000..efa35dc3 --- /dev/null +++ b/modules/Crossval/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") + } + if (!'wght' %in% names(eof)) { + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in seq_along(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + + diff --git a/modules/Crossval/R/tmp/Utils.R b/modules/Crossval/R/tmp/Utils.R new file mode 100644 index 00000000..cd7a1e10 --- /dev/null +++ b/modules/Crossval/R/tmp/Utils.R @@ -0,0 +1,1885 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + x > 0 && attributes(x)$match.length == nchar(name) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste0(output, part) + } else { + if (part %in% names(replace_values)) { + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (any(grep("^http", filename))) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop("Error: The variable", namevar, "is not defined in the file", filename) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (any(grep('xsize', x))) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (any(grep('ysize', x))) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360 / common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360 / common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop("Error: Couldn't find the mask file", mask[['path']]) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) + } + } else { + if (length(vars_in_mask) != 1) { + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']]) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- seq_along(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[rev(seq_along(lat_indices))] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- seq_along(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(1) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- NULL + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (!all(is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- seq_along(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- seq_along(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- rev(seq_along(sub_lat)) + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in seq_along(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL + found_value <- NULL + for (i in seq_along(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!is(res, "try-error")) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(seq_along(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- NULL + new_dims2 <- NULL + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in seq_along(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim + } + } + + } else if (type %in% c('obs', 'hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else { + stop('type must be dcpp, hist or obs') + } + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(seq_along(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in seq_along(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/recipe_nao_single.yml b/recipe_nao_single.yml new file mode 100644 index 00000000..a2b3cb32 --- /dev/null +++ b/recipe_nao_single.yml @@ -0,0 +1,108 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: '2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + # type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: all + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive #gpfs + output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl.yml b/recipe_tas_singl.yml index 6c509521..9dc99cbc 100644 --- a/recipe_tas_singl.yml +++ b/recipe_tas_singl.yml @@ -35,7 +35,7 @@ Analysis: lonmax: 359.9 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -86,9 +86,9 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: gpfs - output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs - code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + filesystem: esarchive #gpfs + output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: -- GitLab From 333bcd545c6dda8be20a882f36b62a8c5855fe1f Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 27 Jun 2024 09:10:49 +0200 Subject: [PATCH 40/53] capitals for NAO --- full_nao.R => full_NAO.R | 2 +- recipe_nao_single.yml => recipe_NAO_single.yml | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename full_nao.R => full_NAO.R (95%) rename recipe_nao_single.yml => recipe_NAO_single.yml (100%) diff --git a/full_nao.R b/full_NAO.R similarity index 95% rename from full_nao.R rename to full_NAO.R index 43b1e2e3..481cbc0c 100644 --- a/full_nao.R +++ b/full_NAO.R @@ -5,7 +5,7 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] -#recipe_file <- "recipe_nao_single.yml" +#recipe_file <- "recipe_NAO_single.yml" recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/recipe_nao_single.yml b/recipe_NAO_single.yml similarity index 100% rename from recipe_nao_single.yml rename to recipe_NAO_single.yml -- GitLab From 368c5d8e0895241eaaae3fdb63fbe517c0ade492 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 27 Jun 2024 17:34:30 +0200 Subject: [PATCH 41/53] NAO working on MN5 trying plot_NAO and Calibration --- full_NAO.R | 13 +- modules/Crossval/Crossval_Calibration.R | 97 ++++++++ modules/Crossval/Crossval_NAO.R | 1 + plot_NAO.R | 307 ++++++++++++++++++++++++ recipe_NAO_single.yml | 20 +- sunset.sh | 2 +- 6 files changed, 423 insertions(+), 17 deletions(-) create mode 100644 modules/Crossval/Crossval_Calibration.R create mode 100644 plot_NAO.R diff --git a/full_NAO.R b/full_NAO.R index 481cbc0c..b0db00df 100644 --- a/full_NAO.R +++ b/full_NAO.R @@ -3,11 +3,11 @@ source("modules/Loading/Loading.R") source("modules/Saving/Saving.R") source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -#recipe_file <- "recipe_NAO_single.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO_single.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) # Load datasets data <- Loading(recipe) data <- Units(recipe, data) @@ -18,6 +18,9 @@ data_summary(data$obs, recipe) source("modules/Crossval/Crossval_NAO.R") res <- Crossval_NAO(recipe = recipe, data = data) +source("plot_NAO.R") +plot_NAO(recipe = recipe, nao = res, data = data) + source("modules/Crossval/Crossval_metrics.R") skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, fair = FALSE, nmemb = NULL, nmemb_ref = NULL) diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R new file mode 100644 index 00000000..c425a84c --- /dev/null +++ b/modules/Crossval/Crossval_Calibration.R @@ -0,0 +1,97 @@ +# take the output of Flor/s2s/subseasonal_loading.R + +source("../git/sunset/modules/Crossval/R/tmp/GetProbs.R") + +Crossval_calibration <- function(recipe, data) { + + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + + + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + cal_hcst_ev_res <- NULL + cal_obs_ev_res <- NULL + cal_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_cal_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_cal_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- CST_Subset(data$obs, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- CST_Subset(data$hcst, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- CST_Subset(data$hcst, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- CST_Subset(data$obs, along = 'syear', + indices = cross[[t]]$eval.dexes) + + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4a191e9a0362b56a5f0c/R/CST_MergeDims.R") + + hcst_tr <- CST_MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs_tr <- CST_MergeDims(obs_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst_ev <- CST_MergeDims(hcst_ev, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + } + cal_hcst_tr <- CST_Calibration(exp = hcst_tr, obs = obs_tr, + memb_dim = 'ensemble', sdate_dim = 'syear', + ncores = ncores) + cal_hcst_ev <- CST_Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + memb_dim = 'ensemble', sdate_dim = 'syear', + ncores = ncores) + + lims_cal_hcst_tr <- Apply(cal_hcst_tr$data, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_cal_obs_tr <- Apply(obs_tr$data, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + + + + +} diff --git a/modules/Crossval/Crossval_NAO.R b/modules/Crossval/Crossval_NAO.R index 9fb31918..d4882f3e 100644 --- a/modules/Crossval/Crossval_NAO.R +++ b/modules/Crossval/Crossval_NAO.R @@ -178,6 +178,7 @@ Crossval_NAO <- function(recipe, data) { along = 'unneeded', indices = 1, drop = 'selected') } # Forecast anomalies: + nao_fcst <- NULL if (!is.null(data$fcst)) { clim_hcst <- Apply(data$hcst$data, target_dims = c('syear', 'ensemble'), diff --git a/plot_NAO.R b/plot_NAO.R new file mode 100644 index 00000000..8f01c45a --- /dev/null +++ b/plot_NAO.R @@ -0,0 +1,307 @@ +plot_NAO <- function(recipe, nao, data) { + +# Read variable long_name to plot it + conf <- yaml::read_yaml("conf/variable-dictionary.yml") + var_name <- conf$vars[[which(names(conf$vars) == + recipe$Analysis$Variables$name)]]$long + plot_ts <- TRUE + plot_sp <- TRUE + alpha <- 0.05 + lons <- data$hcst$coords$longitude + lats <- data$hcst$coords$latitude + nao_region <- c(lonmin = -80, lonmax = 40, + latmin = 20, latmax = 80) + + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + # Use startdates param from SaveExp to correctly name the files: + if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { + file_dates <- Apply(data$hcst$attrs$source_files, target_dim = NULL, + fun = function(x) { + pos <- which(strsplit(x, "")[[1]] == ".") + res <- substr(x, pos-8, pos-1) + })$output1 + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + file_dates <- paste0('s', + recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) + } + + if (plot_ts) { + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + source("modules/old_modules/Indices/R/plot_deterministic_forecast.R") + for (tstep in 1:dim(nao$hcst$data)['time']) { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + obs <- Subset(nao$obs$data, along = 'time', ind = tstep) + exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <-gsub(".", "", recipe$Analysis$Datasets$System$name) + } + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste("NAO Index\n", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n") + xlabs <- as.numeric(substr(file_dates, 1, 4)) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste("NAO Index\n", + "Lead time", fmonth, + " / Start dates", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Lead time: ", fmonth, "\n") + xlabs <- file_dates + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + plot_deterministic_forecast(obs, exp, + time_dim = 'syear', + member_dim = 'ensemble', style = 'boxplot', + xlabs = xlabs, + title = toptitle, fileout = plotfile, + caption = caption, caption_line = 6.5, + legend_text = c( + recipe$Analysis$Datasets$Reference$name, + recipe$Analysis$Datasets$System$name)) + } + } + if (plot_sp) { + ## TODO: To be removed when s2dv is released: + source("modules/Visualization/R/tmp/PlotRobinson.R") + source("modules/old_modules/Indices/R/correlation_eno.R") + source("modules/Visualization/R/get_proj_code.R") + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + # Get correct code for stereographic projection + projection_code <- get_proj_code(proj_name = "stereographic") + correl_obs <- Apply(list(data$obs$data, nao$obs$data), + target_dims = 'syear', fun = .correlation_eno, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE, + ncores = recipe$Analysis$ncores) + correl_hcst <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + x <- apply(x, 1, mean, na.rm = TRUE) + y <- apply(y, 1, mean, na.rm = TRUE) + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + correl_hcst_full <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x,y) { + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + + for (tstep in 1:dim(nao$obs$data)['time']) { + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + ## Observations + map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date: month ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <- gsub(".", "", recipe$Analysis$Datasets$System$name) + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'polygon', + toptitle = toptitle, crop_coastlines = nao_region, + caption = caption, mask = sig, bar_extra_margin = c(4,0,4,0), + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + ## Ensemble-mean + map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "NAO Index -", var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + # Full hcst corr + map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + } # end tstep loop + } +} diff --git a/recipe_NAO_single.yml b/recipe_NAO_single.yml index a2b3cb32..aaf166e2 100644 --- a/recipe_NAO_single.yml +++ b/recipe_NAO_single.yml @@ -23,9 +23,9 @@ Analysis: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '0501' - fcst_year: '2023' + fcst_year: #'2023' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months Region: @@ -35,14 +35,12 @@ Analysis: lonmax: 40 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt - # type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" - #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt Workflow: Anomalies: compute: yes cross_validation: no - save: all + save: none Time_aggregation: execute: no Indices: @@ -52,7 +50,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff save: 'all' cross_validation: yes Probabilities: @@ -82,16 +80,16 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 4 # Optional, int: number of cores, defaults to 1 + ncores: 16 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: scorecards logo: yes Run: Loglevel: INFO Terminal: yes - filesystem: esarchive #gpfs - output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: diff --git a/sunset.sh b/sunset.sh index c37bc9bb..7f913f15 100644 --- a/sunset.sh +++ b/sunset.sh @@ -10,5 +10,5 @@ source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From d9a4b27f881f4eab878211a32cb7e543c869c69d Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 24 Jul 2024 14:40:57 +0200 Subject: [PATCH 42/53] fix folder names and add fcst categories saving --- modules/Crossval/Crossval_anomalies.R | 56 +++++++++++++++++++++------ 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 3ff0f50e..cc41ffae 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -125,23 +125,60 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], along = 'unneeded', indices = 1, drop = 'selected') } -browser() + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Forecast anomalies: if (!is.null(data$fcst)) { - clim_hcst <- Apply(ano_hcst_ev_res, + clim_hcst <- Apply(data$hcst$data, target_dims = c('syear', 'ensemble'), mean, na.rm = na.rm, ncores = ncores)$output1 data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + hcst_ano <- Ano(data = data$hcst$data, clim = clim_hcst) # Terciles limits using the whole hindcast period: - lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { quantile(as.vector(x), ps, na.rm = na.rm)})}, output_dims = lapply(categories, function(x) {'cat'}), prob_lims = categories, ncores = ncores) + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + obs_ano <- Ano(data = data$obs$data, clim = clim_obs) + lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + tmp_lims2 <- list() + + for(ps in 1:length(categories)) { + tmp_lims3 <- drop(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['cat']) { + tmp_lims <- tmp_lims3[l,,,] + if (!('var' %in% names(dim(tmp_lims)))) { + dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) + } + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) + } + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + } } # Compute Probabilities @@ -180,8 +217,8 @@ browser() "#### Anomalies and Probabilities Done #####") if (recipe$Analysis$Workflow$Anomalies$save != 'none') { info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Anomalies/") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") # Save forecast if ((recipe$Analysis$Workflow$Anomalies$save %in% c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { @@ -202,11 +239,6 @@ browser() probs_fcst <- list() probs_obs <- list() all_names <- NULL - # Make categories rounded number to use as names: - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { @@ -292,8 +324,8 @@ browser() return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, hcst_EM = hcst_EM, fcst_EM = fcst_EM, - cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, - obs_tr = lims_ano_obs_tr_res), + #cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), probs = list(hcst_ev = hcst_probs_ev, obs_ev = obs_probs_ev), ref_obs_tr = ano_obs_tr_res)) -- GitLab From 5c4f0d91999dad3cb98963db833e6409a9be2d03 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Mon, 29 Jul 2024 15:55:25 +0200 Subject: [PATCH 43/53] UKMO to test, NAO multimodel under dev --- full_ecvs_multimodel_anomalies.R | 11 +- full_multimodel_NAO.R | 49 ++++++++ modules/Crossval/Crossval_Calibration.R | 4 +- modules/Crossval/Crossval_multimodel_NAO.R | 139 +++++++++++++++++++++ modules/Loading/R/dates2load.R | 19 ++- recipe_NAO.yml | 110 ++++++++++++++++ recipe_tas.yml | 16 +-- recipe_tas_decadal.yml | 116 +++++++++++++++++ sunset.sh | 12 +- 9 files changed, 456 insertions(+), 20 deletions(-) create mode 100644 full_multimodel_NAO.R create mode 100644 modules/Crossval/Crossval_multimodel_NAO.R create mode 100644 recipe_NAO.yml create mode 100644 recipe_tas_decadal.yml diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 1416b72a..4f423764 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -1,4 +1,4 @@ - +library(pryr) source("modules/Loading/Loading.R") source("modules/Saving/Saving.R") source("modules/Units/Units.R") @@ -12,7 +12,6 @@ original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) # Load datasets models <- unlist(original_recipe$Analysis$Datasets$System) -original_recipe$Analysis$ncores <- 32 recipe_aux <- original_recipe datos <- list() @@ -20,9 +19,11 @@ datos$hcst <- list() datos$fcst <- list() source("modules/Crossval/Crossval_metrics.R") source("modules/Crossval/Crossval_anomalies.R") +source("modules/Aggregation/Aggregation.R") for (sys in models) { recipe_aux$Analysis$Datasets$System <- NULL recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) data <- Units(recipe_aux, data) # verification individual models @@ -36,6 +37,8 @@ for (sys in models) { datos$fcst <- append(datos$fcst, list(data$fcst)) names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + gc() + print(mem_used()) } data_aux <- data datos$obs <- data$obs @@ -49,13 +52,13 @@ source("modules/Crossval/Crossval_multimodel_metrics.R") skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, data = res, Fair = FALSE) - +recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' Visualization(recipe = recipe_aux, data = data_aux, skill_metrics = skill_metrics, significance = TRUE) ## Check logo size is appropiated for your maps: source("tools/add_logo.R") -add_logo(recipe, "rsz_rsz_bsc_logo.png") +add_logo(recipe_aux, "rsz_rsz_bsc_logo.png") # scorecards computation: diff --git a/full_multimodel_NAO.R b/full_multimodel_NAO.R new file mode 100644 index 00000000..e53cd199 --- /dev/null +++ b/full_multimodel_NAO.R @@ -0,0 +1,49 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO.yml" +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +models <- unlist(original_recipe$Analysis$Datasets$System) + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +NAO_ind <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_NAO.R") +for (sys in models[1:2]) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # Load datasets + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + # Verification individual models + product <- Crossval_NAO(recipe = recipe_aux, data = data) + source("plot_NAO.R") + plot_NAO(recipe = recipe_aux, nao = product, data = data) + + skill_model <- Crossval_metrics(recipe = recipe_aux, + data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + NAO_ind <- append(NAO_ind, list(product)) + names(NAO_ind)[length(NAO_ind)] <- gsub('\\.','', sys) +} +# No visualisation available for region aggregated +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') +source("modules/Crossval/Crossval_multimodel_NAO.R") +res <- Crossval_multimodel_NAO(recipe = original_recipe, data = data) + + diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index c425a84c..203b1448 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -71,9 +71,11 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4 } cal_hcst_tr <- CST_Calibration(exp = hcst_tr, obs = obs_tr, memb_dim = 'ensemble', sdate_dim = 'syear', - ncores = ncores) + eval.method = 'in-sample', + ncores = ncores) cal_hcst_ev <- CST_Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', ncores = ncores) lims_cal_hcst_tr <- Apply(cal_hcst_tr$data, target_dims = c('syear', 'ensemble'), diff --git a/modules/Crossval/Crossval_multimodel_NAO.R b/modules/Crossval/Crossval_multimodel_NAO.R new file mode 100644 index 00000000..b82aea16 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_NAO.R @@ -0,0 +1,139 @@ +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_NAO <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + # Define output dims and names: + nao_hcst_tr_aux <- list() + nao_hcst_ev_aux <- list() + + nao_obs_tr_res <- list() + nao_hcst_ev_res <- list() + # Cross-val loop starts + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + nao_hcst_tr_res <- list() + nao_hcst_ev_res <- list() + + # Observations + obs_tr <- Subset(data $obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # NAO for individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, + exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + # store results: + nao_hcst_tr_res <- append(nao_hcst_tr_res, list(nao_hcst_tr)) + names(nao_hcst_tr_res)[length(nao_hcst_tr_res)] <- names(data$hcst)[[sys]] + nao_hcst_ev_res <- append(nao_hcst_ev_res, list(nao_hcst_ev)) + names(nao_hcst_ev_res)[length(nao_hcst_ev_res)] <- names(data$hcst)[[sys]] + } + if (t == 1) { + nao_hcst_ev_aux <- nao_hcst_ev_res + } else { + for (sys in 1:length(data$hcst)) { + nao_hcst_ev_aux[[sys]] <- abind(nao_hcst_ev_aux[[sys]], + nao_hcst_ev_res[[sys]], + along = length(dim(nao_hcst_ev_res[[sys]])) + 1) + nao_hcst_tr_aux[[sys]] <- abind(nao_hcst_tr_aux[[sys]], + nao_hcst_tr_res[[sys]], + along = length(dim(nao_hcst_tr_res[[sys]])) + 1) + } + } + } + nao_hcst_ev_aux <- lapply(1:length(nao_hcst_ev_aux), function(x) { + names(dim(nao_hcst_ev_aux[[x]])) <- c(names(dim(nao_hcst_ev_res[[x]])), 'sample') + return(nao_hcst_ev_aux[[x]])}) + nao_hcst_tr_aux <- lapply(1:length(nao_hcst_tr_aux), function(x) { + names(dim(nao_hcst_tr_aux[[x]])) <- c(names(dim(nao_hcst_tr_res[[x]])), 'sample') + return(nao_hcst_tr_aux[[x]])}) + + # Observed NAO should be the same for all models. + nao_obs_ev_res <- append(nao_obs_ev_res, list(nao_obs_ev)) + names(nao_obs_ev_res)[length(nao_obs_ev_res)] <- names(data$hcst)[[1]] + + diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index f084ce62..15242c0a 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -18,15 +18,28 @@ library(lubridate) dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq + system_name <- recipe$Analysis$Datasets$System$name recipe <- recipe$Analysis$Time # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) - + + # Note: UKMO models are always missing the start date of January 1993. + # This piece of code removes that date from the start dates. + UKMO_MODELS <- c("UK-MetOffice-Glosea600", "UK-MetOffice-Glosea601", + "UKMO-System602") + if ((system_name %in% UKMO_MODELS) && ("19930101" %in% file_dates)) { + file_dates <- file_dates[-(which(file_dates == '19930101'))] + warn(logger, + paste("January 1993 start date is not available for", system_name, + "and has been removed from the list of start dates.")) + } + + # Add dimensions to the vector if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } - # fcst dates (if fcst_year empty it creates an empty object) + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { @@ -38,7 +51,6 @@ dates2load <- function(recipe, logger) { paste("fcst_year empty in the recipe, creating empty fcst object...")) } return(list(hcst = file_dates, fcst = file_dates.fcst)) - ## TODO: document header of fun } # adds the correspondent dims to each sdate array @@ -49,3 +61,4 @@ dates2load <- function(recipe, logger) { dim(data) <- default_dims return(data) } + diff --git a/recipe_NAO.yml b/recipe_NAO.yml new file mode 100644 index 00000000..27687ac8 --- /dev/null +++ b/recipe_NAO.yml @@ -0,0 +1,110 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'UK-MetOffice-Glosea601'} + - {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-CanCM4i'} + #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas.yml b/recipe_tas.yml index 32862c16..f3e2e7c3 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -13,11 +13,11 @@ Analysis: System: - {name: 'Meteo-France-System8'} - {name: 'CMCC-SPS3.5'} - #- {name: 'ECMWF-SEAS5.1'} - #- {name: 'UK-MetOffice-Glosea601'} - #- {name: 'NCEP-CFSv2'} - #- {name: 'DWD-GCFS2.1'} - #- {name: 'ECCC-CanCM4i'} + - {name: 'ECMWF-SEAS5.1'} + - {name: 'UK-MetOffice-Glosea601'} + - {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-CanCM4i'} # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: yes @@ -26,7 +26,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0501' + sdate: '0301' fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' @@ -53,7 +53,7 @@ Analysis: cross_validation: yes save: none Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff save: 'all' cross_validation: yes Probabilities: @@ -83,7 +83,7 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 4 # Optional, int: number of cores, defaults to 1 + ncores: 100 # Optional, int: number of cores, defaults to 1 remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE alpha: 0.05 Output_format: scorecards diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml new file mode 100644 index 00000000..09f88567 --- /dev/null +++ b/recipe_tas_decadal.yml @@ -0,0 +1,116 @@ +Description: + Author: nperez + Info: Test decadal 25-07-2024 + +Analysis: + Horizon: decadal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + - {name: 'EC-Earth3-i4'} + - {name: 'HadGEM3-GC31-MM'} + - {name: 'BCC-CSM2-MR'} + - {name: 'CanESM5'} + - {name: 'CMCC-CM2-SR5'} + - {name: 'FGOALS-f3-L'} + - {name: 'IPSL-CM6A-LR'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2021' + hcst_start: '1980' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 120 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Time_aggregation: + execute: yes + method: average + user_def: + Y1: [1, 12] # aggregate from 1 to 3 forecast times + Y2: [13, 24] + Y2-Y5: [13, 60] + Y5-Y10: [49, 120] + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + #dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 100 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/sunset.sh b/sunset.sh index 7f913f15..34b7d05f 100644 --- a/sunset.sh +++ b/sunset.sh @@ -1,14 +1,18 @@ #!/bin/bash -#SBATCH -n 16 -#SBATCH -t 01:00:00 +#SBATCH -n 112 +#SBATCH -N 1 +#SBATCH -t 24:00:00 #SBATCH -J sunset_multimodel #SBATCH -o sunset_multimodel-%J.out #SBATCH -e sunset_multimodel-%J.err #SBATCH --account=bsc32 -#SBATCH --qos=acc_bsces +#SBATCH --qos=gp_bsces +#SBATCH --constraint=highmem + +#### --qos=acc_bsces source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 -Rscript /home/bsc/bsc032339/sunset/full_NAO.R #ecvs_multimodel_anomalies.R +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R -- GitLab From c3e04411dd3c49f86f5f087c46e853db69cb4eb4 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Mon, 29 Jul 2024 18:33:22 +0200 Subject: [PATCH 44/53] revert UKMO 0101 --- modules/Loading/R/dates2load.R | 19 ++++--------------- recipe_tas.yml | 2 +- recipe_tas_singl.yml | 20 ++++++++++---------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 15242c0a..69c4daa3 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -18,28 +18,15 @@ library(lubridate) dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq - system_name <- recipe$Analysis$Datasets$System$name recipe <- recipe$Analysis$Time # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) - - # Note: UKMO models are always missing the start date of January 1993. - # This piece of code removes that date from the start dates. - UKMO_MODELS <- c("UK-MetOffice-Glosea600", "UK-MetOffice-Glosea601", - "UKMO-System602") - if ((system_name %in% UKMO_MODELS) && ("19930101" %in% file_dates)) { - file_dates <- file_dates[-(which(file_dates == '19930101'))] - warn(logger, - paste("January 1993 start date is not available for", system_name, - "and has been removed from the list of start dates.")) - } - - # Add dimensions to the vector + if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } - # fcst dates (if fcst_year empty it creates an empty object) + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { @@ -51,6 +38,8 @@ dates2load <- function(recipe, logger) { paste("fcst_year empty in the recipe, creating empty fcst object...")) } return(list(hcst = file_dates, fcst = file_dates.fcst)) + ## TODO: document header of fun + } # adds the correspondent dims to each sdate array diff --git a/recipe_tas.yml b/recipe_tas.yml index f3e2e7c3..4ec92a78 100644 --- a/recipe_tas.yml +++ b/recipe_tas.yml @@ -26,7 +26,7 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0301' + sdate: '0501' fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' diff --git a/recipe_tas_singl.yml b/recipe_tas_singl.yml index 9dc99cbc..be20a311 100644 --- a/recipe_tas_singl.yml +++ b/recipe_tas_singl.yml @@ -14,7 +14,7 @@ Analysis: #- {name: 'ECMWF-SEAS5.1'} #- {name: 'Meteo-France-System8'} #- {name: 'CMCC-SPS3.5'} - name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 Multimodel: execute: no approach: pooled # Mandatory, bool: Either yes/true or no/false @@ -22,20 +22,20 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0501' - fcst_year: '2021' + sdate: '0101' + fcst_year: #'2021' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - latmin: -90 - latmax: 90 + latmin: -10 + latmax: 10 lonmin: 0 - lonmax: 359.9 + lonmax: 40 #359.9 Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. - type: /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -86,9 +86,9 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: esarchive #gpfs - output_dir: /esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: -- GitLab From c89ef132e2d030afb72769bdb784d084582839d0 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Wed, 31 Jul 2024 18:18:08 +0200 Subject: [PATCH 45/53] decadal conf and test --- conf/archive_decadal.yml | 51 ++++++++++++++++++++++++++++++++ full_ecvs_multimodel_anomalies.R | 5 +++- recipe_tas_decadal.yml | 14 ++++----- 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 0697fc3f..d06f336c 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -1,3 +1,54 @@ +gpfs: + src: "/gpfs/scratch/bsc32/MN4/bsc32/bsc32693/data_amd/" + System: +# ---- + EC-Earth3-i4: + name: "EC-Earth3-i4" + institution: "EC-Earth-Consortium" + src: + hcst: "exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/" + fcst: + first_dcppB_syear: 2021 + monthly_mean: + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "clt":"Amon", "hfls":"Amon", + "hurs":"Amon", "huss":"Amon", "rsds":"Amon", "rsut":"Amon", "ta":"Amon", + "tasmax":"Amon", "tosa":"Amon", "ua":"Amon", "va":"Amon", "zg":"Amon", + "evspsbl":"Amon", "hfss":"Amon", "hursmin":"Amon", "rlut":"Amon", + "rsdt":"Amon", "sfcWind":"Amon", "tasmin":"Amon", "ts":"Amon", "uas":"Amon", + "vas":"Amon"} + grid: {"tas":"gr", "pr":"gr", "psl":"gr", "clt":"gr", "hfls":"gr", + "hurs":"gr", "huss":"gr", "rsds":"gr", "rsut":"gr", "ta":"gr", + "tasmax":"gr", "tosa":"gr", "ua":"gr", "va":"gr", "zg":"gr", + "vas":"gr"} + version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910", "clt":"v20210910", + "hurs":"v20210910", "huss":"v20210910", "rsds":"v20210910", "rsut":"v20210910", "ta":"v20210910", + "tasmax":"v20210910", "tosa":"v20210910", "ua":"v20210910", "va":"v20210910", "zg":"v20210910", + "evspsbl":"v20210910", "hfss":"v20210910", "hursmin":"v20210910", "rlut":"v20210910", + "rsdt":"v20210910", "sfcWind":"v20210910", "tasmin":"v20210910", "ts":"v20210910", "uas":"v20210910", + "vas":"v20210910"} + daily_mean: + grid: {"tas":"gr", "pr":"gr", "psl":"gr"} + version: {"tas":"v20210910", "pr":"v20210910", "psl":"v20210910"} + calendar: "proleptic_gregorian" + member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 + initial_month: 11 + sdate_add: 0 + reference_grid: "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" + + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/era5/" + monthly_mean: {"tas":"_f1h-r1440x721cds/", + "psl":"monthly_mean/psl_f1h-r1440x721cds/", + "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" + + esarchive: src: "/esarchive/" System: diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R index 4f423764..4c7e16ab 100644 --- a/full_ecvs_multimodel_anomalies.R +++ b/full_ecvs_multimodel_anomalies.R @@ -6,6 +6,7 @@ source("modules/Visualization/Visualization.R") #args = commandArgs(trailingOnly = TRUE) #recipe_file <- args[1] recipe_file <- "recipe_tas.yml" +# recipe_file <- "recipe_tas_decadal.yml" # Will it work with a single job? #recipe <- read_atomic_recipe(recipe_file) original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) @@ -20,12 +21,14 @@ datos$fcst <- list() source("modules/Crossval/Crossval_metrics.R") source("modules/Crossval/Crossval_anomalies.R") source("modules/Aggregation/Aggregation.R") +# sys <- models[1] for (sys in models) { recipe_aux$Analysis$Datasets$System <- NULL recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member data <- Loading(recipe = recipe_aux) - data <- Units(recipe_aux, data) + data <- Units(recipe = recipe_aux, data = data) + # data <- Aggregation(recipe = recipe_aux, data = data) # verification individual models product <- Crossval_anomalies(recipe = recipe_aux, data = data) skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml index 09f88567..e5169dbe 100644 --- a/recipe_tas_decadal.yml +++ b/recipe_tas_decadal.yml @@ -26,12 +26,12 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '0501' + # sdate: '0501' fcst_year: #'2021' - hcst_start: '1980' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + hcst_start: '1981' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2010' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 120 # Mandatory, int: Last leadtime time step in months + ftime_max: 12 # Mandatory, int: Last leadtime time step in months Region: latmin: -90 latmax: 90 @@ -47,9 +47,9 @@ Analysis: method: average user_def: Y1: [1, 12] # aggregate from 1 to 3 forecast times - Y2: [13, 24] - Y2-Y5: [13, 60] - Y5-Y10: [49, 120] + Y1-JJA: [6, 8] + #Y2-Y5: [13, 60] + #Y5-Y10: [49, 120] Anomalies: compute: yes cross_validation: no -- GitLab From 30b8e5c82d4c992d679e4ca1eb5df86fb42e48ec Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 1 Aug 2024 17:33:58 +0200 Subject: [PATCH 46/53] avoid memory line an save probs --- modules/Crossval/Crossval_anomalies.R | 8 +++++--- modules/Loading/R/load_decadal.R | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index cc41ffae..d743c834 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -175,9 +175,11 @@ Crossval_anomalies <- function(recipe, data) { tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) } - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) + if (recipe$Analysis$Workflow$Probabilities$save == 'yes') { + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + } } } diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index d3b4f439..1e9c332a 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -480,6 +480,6 @@ load_decadal <- function(recipe) { info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } -- GitLab From d87721aa2802fd004b73c8bde2cbec4f8f3b81f4 Mon Sep 17 00:00:00 2001 From: Nuria Perez Zanon Date: Thu, 1 Aug 2024 17:35:56 +0200 Subject: [PATCH 47/53] add fix aggregation --- modules/Aggregation/R/agg_ini_end.R | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R index 17c6940a..c58a7ebe 100644 --- a/modules/Aggregation/R/agg_ini_end.R +++ b/modules/Aggregation/R/agg_ini_end.R @@ -9,6 +9,8 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { # take the first and last element of each indices list for time_bounds saving ini <- unlist(lapply(indices, function(x){x[1]})) end <- unlist(lapply(indices, function(x){x[length(x)]})) + indices <- lapply(1:length(ini), function(x) { + ini[x]:end[x]}) plotting_attr <- list(names(indices)) } original_dims <- names(dim(x[[1]]$data)) @@ -16,8 +18,13 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { - sapply(1:length(indices), - function(z) {mean(y[indices[[z]]], na.rm = na.rm)}) + res <- sapply(1:length(indices), + function(z) { + mean(y[indices[[z]]], na.rm = na.rm)}) + if (is.null(dim(res))) { + dim(res) <- c(length(res)) + } + return(res) }, ind = indices, output_dims = 'time', @@ -26,8 +33,13 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { - sapply(1:length(indices), - function(z) {sum(y[indices[[z]]], na.rm = na.rm)}) + res <- sapply(1:length(indices), + function(z) { + sum(y[indices[[z]]], na.rm = na.rm)}) + if (is.null(dim(res))) { + dim(res) <- c(length(res)) + } + return(res) }, ind = indices, output_dims = 'time', @@ -54,3 +66,4 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$attrs$Dates <- tmp_dates return(x) } + -- GitLab From f6bdee6aef142d1b5a3bf5fecd684011c36a1ca4 Mon Sep 17 00:00:00 2001 From: abatalla Date: Fri, 9 Aug 2024 11:50:01 +0200 Subject: [PATCH 48/53] Delete GetProbs.R (s2dv master branch duplicate) --- modules/Crossval/R/tmp/GetProbs.R | 351 ------------------------------ 1 file changed, 351 deletions(-) delete mode 100644 modules/Crossval/R/tmp/GetProbs.R diff --git a/modules/Crossval/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R deleted file mode 100644 index fb2cda0c..00000000 --- a/modules/Crossval/R/tmp/GetProbs.R +++ /dev/null @@ -1,351 +0,0 @@ -#'Compute probabilistic forecasts or the corresponding observations -#' -#'Compute probabilistic forecasts from an ensemble based on the relative -#'thresholds, or the probabilistic observations (i.e., which probabilistic -#'category was observed). A reference period can be specified to calculate the -#'absolute thresholds between each probabilistic category. The absolute -#'thresholds can be computed in cross-validation mode. If data is an ensemble, -#'the probabilities are calculated as the percentage of members that fall into -#'each category. For observations (or forecast without member dimension), 1 -#'means that the event happened, while 0 indicates that the event did not -#'happen. Weighted probabilities can be computed if the weights are provided for -#'each ensemble member and time step. The absolute thresholds can also be -#'provided directly for probabilities calculation. -#' -#'@param data A named numerical array of the forecasts or observations with, at -#' least, time dimension. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast, or NULL if there is no member -#' dimension (e.g., for observations, or for forecast with only one ensemble -#' member). The default value is 'member'. -#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to -#' 1) between the categories. The default value is c(1/3, 2/3), which -#' corresponds to tercile equiprobable categories. -#'@param abs_thresholds A numeric array or vector of the absolute thresholds in -#' the same units as \code{data}. If an array is provided, it should have at -#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different -#' thresholds for different locations, i.e. lon and lat dimensions), they -#' should match the dimensions of \code{data}, except the member dimension -#' which should not be included. The default value is NULL and, in this case, -#' 'prob_thresholds' is used for calculating the probabilities. -#'@param bin_dim_abs A character string of the dimension name of -#' 'abs_thresholds' array in which category limits are stored. It will also be -#' the probabilistic category dimension name in the output. The default value -#' is 'bin'. -#'@param indices_for_quantiles A vector of the indices to be taken along -#' 'time_dim' for computing the absolute thresholds between the probabilistic -#' categories. If NULL (default), the whole period is used. It is only used -#' when 'prob_thresholds' is provided. -#'@param weights A named numerical array of the weights for 'data' with -#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value -#' is NULL. The ensemble should have at least 70 members or span at least 10 -#' time steps and have more than 45 members if consistency between the weighted -#' and unweighted methodologies is desired. -#'@param cross.val A logical indicating whether to compute the thresholds -#' between probabilistic categories in cross-validation mode. The default value -#' is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest -#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of -#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. -#' -#'@examples -#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) -#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', -#' indices_for_quantiles = 4:17) -#' -#'# abs_thresholds is provided -#'abs_thr1 <- c(-0.2, 0.3) -#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) -#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', -#' prob_thresholds = NULL, abs_thresholds = abs_thr1) -#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', -#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') -#' -#'@import multiApply -#'@importFrom easyVerification convert2prob -#'@export -GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', - indices_for_quantiles = NULL, - prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, - bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { - - # Check inputs - ## data - if (is.null(data)) { - stop("Parameter 'data' cannot be NULL.") - } - if (!is.numeric(data)) { - stop("Parameter 'data' must be a numeric array.") - } - if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { - stop("Parameter 'data' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(data))) { - stop("Parameter 'time_dim' is not found in 'data' dimensions.") - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(data))) { - stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", - "dimension exists, set it as NULL.") - } - } - ## bin_dim_abs - if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { - stop('Parameter "bin_dim_abs" must be a character string.') - } - ## prob_thresholds, abs_thresholds - if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { - .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", - "Only the first one is used.")) - abs_thresholds <- NULL - } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { - stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") - } - if (!is.null(prob_thresholds)) { - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") - } - ## indices_for_quantiles - if (is.null(indices_for_quantiles)) { - indices_for_quantiles <- seq_len(dim(data)[time_dim]) - } else { - if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { - stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") - } else if (length(indices_for_quantiles) > dim(data)[time_dim] | - max(indices_for_quantiles) > dim(data)[time_dim] | - any(indices_for_quantiles < 1)) { - stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") - } - } - - } else { # abs_thresholds - - if (is.null(dim(abs_thresholds))) { # a vector - dim(abs_thresholds) <- length(abs_thresholds) - names(dim(abs_thresholds)) <- bin_dim_abs - } - # bin_dim_abs - if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { - stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") - } - if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { - stop("Parameter abs_thresholds' cannot have member dimension.") - } - dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] - if (!all(dim_name_abs %in% names(dim(data)))) { - stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") - } else { - if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { - stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") - } - } - if (!is.null(indices_for_quantiles)) { - warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") - } - abs_target_dims <- bin_dim_abs - if (time_dim %in% names(dim(abs_thresholds))) { - abs_target_dims <- c(bin_dim_abs, time_dim) - } - - } - - ## weights - if (!is.null(weights)) { - if (!is.array(weights) | !is.numeric(weights)) - stop("Parameter 'weights' must be a named numeric array.") - -# if (is.null(dat_dim)) { - if (!is.null(memb_dim)) { - lendim_weights <- 2 - namesdim_weights <- c(time_dim, memb_dim) - } else { - lendim_weights <- 1 - namesdim_weights <- c(time_dim) - } - if (length(dim(weights)) != lendim_weights | - !all(names(dim(weights)) %in% namesdim_weights)) { - stop("Parameter 'weights' must have dimension ", - paste0(namesdim_weights, collapse = ' and '), ".") - } - if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { - stop("Parameter 'weights' must have the same dimension length as ", - paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.") - } - weights <- Reorder(weights, namesdim_weights) - -# } else { -# if (length(dim(weights)) != 3 | -# any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) -# stop("Parameter 'weights' must have three dimensions with the names of ", -# "'memb_dim', 'time_dim' and 'dat_dim'.") -# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | -# dim(weights)[time_dim] != dim(exp)[time_dim] | -# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { -# stop(paste0("Parameter 'weights' must have the same dimension lengths ", -# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) -# } -# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) -# } - } - ## cross.val - if (!is.logical(cross.val) | length(cross.val) > 1) { - stop("Parameter 'cross.val' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - ############################### - if (is.null(abs_thresholds)) { - res <- Apply(data = list(data = data), - target_dims = c(time_dim, memb_dim), - output_dims = c(bin_dim_abs, time_dim), - fun = .GetProbs, - prob_thresholds = prob_thresholds, - indices_for_quantiles = indices_for_quantiles, - weights = weights, cross.val = cross.val, ncores = ncores)$output1 - } else { - res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), - target_dims = list(c(time_dim, memb_dim), abs_target_dims), - output_dims = c(bin_dim_abs, time_dim), - fun = .GetProbs, - prob_thresholds = NULL, - indices_for_quantiles = NULL, - weights = NULL, cross.val = FALSE, ncores = ncores)$output1 - } - - return(res) -} - -.GetProbs <- function(data, indices_for_quantiles, - prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, - weights = NULL, cross.val = FALSE) { - # .GetProbs() is used in RPS, RPSS, ROCSS - # data - ## if data is exp: [sdate, memb] - ## if data is obs: [sdate, (memb)] - # weights: [sdate, (memb)], same as data - # if abs_thresholds is not NULL: [bin, (sdate)] - - # Add dim [memb = 1] to data if it doesn't have memb_dim - if (length(dim(data)) == 1) { - dim(data) <- c(dim(data), 1) - if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) - } - - # Calculate absolute thresholds - if (is.null(abs_thresholds)) { - if (cross.val) { - quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) - for (i_time in seq_len(dim(data)[1])) { - if (is.null(weights)) { - tmp <- which(indices_for_quantiles != i_time) - quantiles[, i_time] <- - quantile(x = as.vector(data[indices_for_quantiles[tmp], ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - tmp <- which(indices_for_quantiles != i_time) - sorted_arrays <- - .sorted_distributions(data[indices_for_quantiles[tmp], ], - weights[indices_for_quantiles[tmp], ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles[, i_time] <- approx(cumulative_weights, sorted_data, - prob_thresholds, "linear")$y - } - } - - } else { - if (is.null(weights)) { - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], - weights[indices_for_quantiles, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y - } - quantiles <- array(rep(quantiles, dim(data)[1]), - dim = c(bin = length(quantiles), dim(data)[1])) - } - - } else { # abs_thresholds provided - quantiles <- abs_thresholds - if (length(dim(quantiles)) == 1) { - quantiles <- InsertDim(quantiles, lendim = dim(data)[1], - posdim = 2, name = names(dim(data))[1]) - } - } - # quantiles: [bin-1, sdate] - # Probabilities - probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] - for (i_time in seq_len(dim(data)[1])) { - if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) - } else { - if (is.null(weights)) { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], - threshold = quantiles[, i_time])) - } else { - sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - # find any quantiles that are outside the data range - integrated_probs <- array(dim = dim(quantiles)) - for (i_quant in seq_len(dim(quantiles)[1])) { - # for thresholds falling under the distribution - if (quantiles[i_quant, i_time] < min(sorted_data)) { - integrated_probs[i_quant, i_time] <- 0 - # for thresholds falling over the distribution - } else if (max(sorted_data) < quantiles[i_quant, i_time]) { - integrated_probs[i_quant, i_time] <- 1 - } else { - integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, - quantiles[i_quant, i_time], "linear")$y - } - } - probs[, i_time] <- append(integrated_probs[, i_time], 1) - - append(0, integrated_probs[, i_time]) - if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { - stop("Probability in i_time = ", i_time, " is out of [0, 1].") - } - } - } - } - - return(probs) -} - -.sorted_distributions <- function(data_vector, weights_vector) { - weights_vector <- as.vector(weights_vector) - data_vector <- as.vector(data_vector) - weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 - sorter <- order(data_vector) - sorted_weights <- weights_vector[sorter] - cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights - cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 - cumulative_weights <- cumulative_weights / - cumulative_weights[length(cumulative_weights)] # fix the 1 - return(list(data = data_vector[sorter], cumulative_weights = cumulative_weights)) -} - -- GitLab From 4d203f8a5055da825baa5a883b95335b444f858c Mon Sep 17 00:00:00 2001 From: abatalla Date: Fri, 9 Aug 2024 11:51:44 +0200 Subject: [PATCH 49/53] Delete RandomWalkTest.R (s2dv master branch duplicate) --- modules/Crossval/R/tmp/RandomWalkTest.R | 184 ------------------------ 1 file changed, 184 deletions(-) delete mode 100644 modules/Crossval/R/tmp/RandomWalkTest.R diff --git a/modules/Crossval/R/tmp/RandomWalkTest.R b/modules/Crossval/R/tmp/RandomWalkTest.R deleted file mode 100644 index 16d89f6d..00000000 --- a/modules/Crossval/R/tmp/RandomWalkTest.R +++ /dev/null @@ -1,184 +0,0 @@ -#'Random Walk test for skill differences -#' -#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common observational reference) based on Random Walks (DelSole and Tippett, -#'2016). -#' -#'@param skill_A A numerical array of the time series of the scores obtained -#' with the forecaster A. -#'@param skill_B A numerical array of the time series of the scores obtained -#' with the forecaster B. The dimensions should be identical as parameter -#' 'skill_A'. -#'@param time_dim A character string indicating the name of the dimension along -#' which the tests are computed. The default value is 'sdate'. -#'@param test.type A character string indicating the type of significance test. -#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster -#' B are significantly different in terms of skill with a two-sided test using -#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess -#' whether forecaster A and forecaster B are significantly different in terms -#' of skill with an exact two-sided test), "greater" (to assess whether -#' forecaster A shows significantly better skill than forecaster B with a -#' one-sided test for negatively oriented scores), or "less" (to assess whether -#' forecaster A shows significantly better skill than forecaster B with a -#' one-sided test for positively oriented scores). The default value is -#' "two.sided.approx". -#'@param alpha A numeric of the significance level to be used in the statistical -#' significance test (output "sign"). The default value is 0.05. -#'@param pval A logical value indicating whether to return the p-value of the -#' significance test. The default value is TRUE. -#'@param sign A logical value indicating whether to return the statistical -#' significance of the test based on 'alpha'. The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return A list with: -#'\item{$score}{ -#' A numerical array with the same dimensions as the input arrays except -#' 'time_dim'. The number of times that forecaster A has been better than -#' forecaster B minus the number of times that forecaster B has been better -#' than forecaster A (for skill negatively oriented, i.e., the lower the -#' better). If $score is positive, forecaster A has been better more times -#' than forecaster B. If $score is negative, forecaster B has been better more -#' times than forecaster A. -#'} -#'\item{$sign}{ -#' A logical array of the statistical significance with the same dimensions -#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. -#'} -#'\item{$p.val}{ -#' A numeric array of the p-values with the same dimensions as the input arrays -#' except "time_dim". Returned only if "pval" is TRUE. -#'} -#' -#'@details -#' Null and alternative hypothesis for "two-sided" test (regardless of the -#' orientation of the scores):\cr -#' H0: forecaster A and forecaster B are not different in terms of skill\cr -#' H1: forecaster A and forecaster B are different in terms of skill -#' -#' Null and alternative hypothesis for one-sided "greater" (for negatively -#' oriented scores, i.e., the lower the better) and "less" (for positively -#' oriented scores, i.e., the higher the better) tests:\cr -#' H0: forecaster A is not better than forecaster B\cr -#' H1: forecaster A is better than forecaster B -#' -#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while -#' the ROC score is a positively oriented score. -#' -#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence -#' level: significant if the difference between the number of times that -#' forecaster A has been better than forecaster B and forecaster B has been -#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). -#' -#'@references -#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 -#' -#'@examples -#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) -#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) -#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) -#' scores_A <- abs(fcst_A - reference) -#' scores_B <- abs(fcst_B - reference) -#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) -#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') -#' -#'@import multiApply -#'@importFrom ClimProjDiags Subset -#'@export -RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', - test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, - sign = FALSE, ncores = NULL) { - - # Check inputs - ## skill_A and skill_B - if (is.null(skill_A) | is.null(skill_B)) { - stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") - } - if (!is.numeric(skill_A) | !is.numeric(skill_B)) { - stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") - } - if (!identical(dim(skill_A), dim(skill_B))) { - stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { - stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") - } - ## alpha - if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { - stop("Parameter 'alpha' must be a number between 0 and 1.") - } - ## test.type - if (!test.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { - stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") - } - if (test.type == 'two.sided.approx') { - if (alpha != 0.05) { - .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", - "= 0.05 only. Returning the significance at the 0.05 significance level.") - } - if (pval) { - .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", - "aproximation. Returning the significance at the 0.05 significance level.") - } - sign <- TRUE - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ## Compute the Random Walk Test - res <- Apply(data = list(skill_A = skill_A, - skill_B = skill_B), - target_dims = list(skill_A = time_dim, - skill_B = time_dim), - fun = .RandomWalkTest, - test.type = test.type, - alpha = alpha, pval = pval, sign = sign, - ncores = ncores) - - return(res) -} - -.RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', - alpha = 0.05, pval = TRUE, sign = FALSE) { - #skill_A and skill_B: [sdate] - - N.eff <- length(skill_A) - - A_better <- sum(skill_B > skill_A) - B_better <- sum(skill_B < skill_A) - - output <- NULL - output$score <- A_better - B_better - - if (test.type == 'two.sided.approx') { - output$sign <- abs(output$score) > (2 * sqrt(N.eff)) - - } else { - - if (!is.na(output$score)) { - p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, - alternative = test.type)$p.value - - } else { - p.val <- NA - } - - if (pval) { - output$p.val <- p.val - } - if (sign) { - output$sign <- !is.na(p.val) & p.val <= alpha - } - - } - - return(output) -} -- GitLab From 70f44e3facec7274b25468083017608608b2ada7 Mon Sep 17 00:00:00 2001 From: abatalla Date: Fri, 9 Aug 2024 11:52:23 +0200 Subject: [PATCH 50/53] Delete EOF.R (s2dv master branch duplicate) --- modules/Crossval/R/tmp/EOF.R | 293 ----------------------------------- 1 file changed, 293 deletions(-) delete mode 100644 modules/Crossval/R/tmp/EOF.R diff --git a/modules/Crossval/R/tmp/EOF.R b/modules/Crossval/R/tmp/EOF.R deleted file mode 100644 index 87795b66..00000000 --- a/modules/Crossval/R/tmp/EOF.R +++ /dev/null @@ -1,293 +0,0 @@ -#'Area-weighted empirical orthogonal function analysis using SVD -#' -#'Perform an area-weighted EOF analysis using single value decomposition (SVD) -#'based on a covariance matrix or a correlation matrix if parameter 'corr' is -#'set to TRUE. -#' -#'@param ano A numerical array of anomalies with named dimensions to calculate -#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs -#' could exist but it should be consistent along time_dim. That is, if one grid -#' point has NAs, all the time steps at this point should be NAs. -#'@param lat A vector of the latitudes of 'ano'. -#'@param lon A vector of the longitudes of 'ano'. -#'@param time_dim A character string indicating the name of the time dimension -#' of 'ano'. The default value is 'sdate'. -#'@param space_dim A vector of two character strings. The first is the dimension -#' name of latitude of 'ano' and the second is the dimension name of longitude -#' of 'ano'. The default value is c('lat', 'lon'). -#'@param neofs A positive integer of the modes to be kept. The default value is -#' 15. If time length or the product of the length of space_dim is smaller than -#' neofs, neofs will be changed to the minimum of the three values. -#'@param corr A logical value indicating whether to base on a correlation (TRUE) -#' or on a covariance matrix (FALSE). The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A list containing: -#'\item{EOFs}{ -#' An array of EOF patterns normalized to 1 (unitless) with dimensions -#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). -#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed -#' field. -#'} -#'\item{PCs}{ -#' An array of principal components with the units of the original field to -#' the power of 2, with dimensions (time_dim, number of modes, rest of the -#' dimensions of 'ano' except 'space_dim'). -#' 'PCs' contains already the percentage of explained variance so, -#' to reconstruct the original field it's only needed to multiply 'EOFs' -#' by 'PCs'. -#'} -#'\item{var}{ -#' An array of the percentage (%) of variance fraction of total variance -#' explained by each mode (number of modes). The dimensions are (number of -#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). -#'} -#'\item{mask}{ -#' An array of the mask with dimensions (space_dim, rest of the dimensions of -#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that -#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to -#' replace NAs with 0s for EOF calculation and mask the result with NAs again -#' after the calculation. -#'} -#'\item{wght}{ -#' An array of the area weighting with dimensions 'space_dim'. It is calculated -#' by cosine of 'lat' and used to compute the fraction of variance explained by -#' each EOFs. -#'} -#'\item{tot_var}{ -#' A number or a numeric array of the total variance explained by all the modes. -#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. -#'} -#' -#'@seealso ProjectField, NAO, PlotBoxWhisker -#'@examples -#'# This example computes the EOFs along forecast horizons and plots the one -#'# that explains the greatest amount of variability. The example data has low -#'# resolution so the result may not be explanatory, but it displays how to -#'# use this function. -#'\dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' leadtimemin = 1, -#' leadtimemax = 4, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#'} -#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) -#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) -#'ano <- tmp[1, , ,] -#'names(dim(ano)) <- names(dim(tmp))[-2] -#'eof <- EOF(ano, sampleData$lat, sampleData$lon) -#'\dontrun{ -#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) -#'} -#' -#'@import multiApply -#'@importFrom stats sd -#'@export -EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), - neofs = 15, corr = FALSE, ncores = NULL) { - - # Check inputs - ## ano - if (is.null(ano)) { - stop("Parameter 'ano' cannot be NULL.") - } - if (!is.numeric(ano)) { - stop("Parameter 'ano' must be a numeric array.") - } - if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { - stop("Parameter 'ano' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(ano))) { - stop("Parameter 'time_dim' is not found in 'ano' dimension.") - } - ## space_dim - if (!is.character(space_dim) | length(space_dim) != 2) { - stop("Parameter 'space_dim' must be a character vector of 2.") - } - if (!all(space_dim %in% names(dim(ano)))) { - stop("Parameter 'space_dim' is not found in 'ano' dimension.") - } - ## lat - if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { - stop("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'ano'.") - } - if (any(lat > 90 | lat < -90)) { - stop("Parameter 'lat' must contain values within the range [-90, 90].") - } - ## lon - if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { - stop("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'ano'.") - } - if (any(lon > 360 | lon < -360)) { - .warning("Some 'lon' is out of the range [-360, 360].") - } - ## neofs - if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { - stop("Parameter 'neofs' must be a positive integer.") - } - ## corr - if (!is.logical(corr) | length(corr) > 1) { - stop("Parameter 'corr' must be one logical value.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ############################### - # Calculate EOF - -# # Replace mask of NAs with 0s for EOF analysis. -# ano[!is.finite(ano)] <- 0 - - # Area weighting. Weights for EOF; needed to compute the - # fraction of variance explained by each EOFs - space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) - wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) - - # We want the covariance matrix to be weigthed by the grid - # cell area so the anomaly field is weighted by its square - # root since the covariance matrix equals transpose(ano) - # times ano. - wght <- sqrt(wght) - - # neofs is bounded - if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { - neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) - .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", - "the length of time_dim, the production of the length of space_dim, ", - "and neofs.")) - } - - res <- Apply(ano, - target_dims = c(time_dim, space_dim), - output_dims = list(EOFs = c('mode', space_dim), - PCs = c(time_dim, 'mode'), - var = 'mode', - tot_var = NULL, - mask = space_dim), - fun = .EOF, - corr = corr, neofs = neofs, - wght = wght, - ncores = ncores) - - return(c(res, wght = list(wght))) - -} - -.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { - # ano: [time, lat, lon] - - # Dimensions - nt <- dim(ano)[1] - ny <- dim(ano)[2] - nx <- dim(ano)[3] - - # Check if all the time steps at one grid point are NA-consistent. - # The grid point should have all NAs or no NA along time dim. - if (anyNA(ano)) { - ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] - na_ind <- which(is.na(ano_latlon), arr.ind = T) - if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { - stop("Detect certain grid points have NAs but not consistent across time ", - "dimension. If the grid point is NA, it should have NA at all time step.") - } - } - - # Build the mask - mask <- ano[1, , ] - mask[!is.finite(mask)] <- NA - mask[is.finite(mask)] <- 1 - dim(mask) <- c(ny, nx) - - # Replace mask of NAs with 0s for EOF analysis. - ano[!is.finite(ano)] <- 0 - - ano <- ano * InsertDim(wght, 1, nt) - - # The use of the correlation matrix is done under the option corr. - if (corr) { - stdv <- apply(ano, c(2, 3), sd, na.rm = T) - ano <- ano / InsertDim(stdv, 1, nt) - } - - # Time/space matrix for SVD - dim(ano) <- c(nt, ny * nx) - dim.dat <- dim(ano) - - # 'transpose' means the array needs to be transposed before - # calling La.svd for computational efficiency because the - # spatial dimension is larger than the time dimension. This - # goes with transposing the outputs of LA.svd also. - if (dim.dat[2] > dim.dat[1]) { - transpose <- TRUE - } else { - transpose <- FALSE - } - if (transpose) { - pca <- La.svd(t(ano)) - } else { - pca <- La.svd(ano) - } - - # La.svd conventions: decomposition X = U D t(V) La.svd$u - # returns U La.svd$d returns diagonal values of D La.svd$v - # returns t(V) !! The usual convention is PC=U and EOF=V. - # If La.svd is called for ano (transpose=FALSE case): EOFs: - # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE - # case): EOFs: t($u) PCs: t($v) - - if (transpose) { - pca.EOFs <- t(pca$u) - pca.PCs <- t(pca$v) - } else { - pca.EOFs <- pca$v - pca.PCs <- pca$u - } - - # The numbers of transposition is limited to neofs - PC <- pca.PCs[, 1:neofs] - EOF <- pca.EOFs[1:neofs, ] - dim(EOF) <- c(neofs, ny, nx) - - # To sort out crash when neofs=1. - if (neofs == 1) { - PC <- InsertDim(PC, 2, 1, name = 'new') - } - - # Computation of the % of variance associated with each mode - W <- pca$d[1:neofs] - tot.var <- sum(pca$d^2) - var.eof <- 100 * pca$d[1:neofs]^2 / tot.var - - for (e in 1:neofs) { - # Set all masked grid points to NA in the EOFs - # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data - EOF[e, , ] <- EOF[e, , ] * mask / wght - # PC is multiplied by the explained variance, - # so that the reconstruction is only EOF * PC - PC[, e] <- PC[, e] * W[e] - } - - if (neofs == 1) { - var.eof <- as.array(var.eof) - } - - return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) -} - -- GitLab From 4e54a7e9a7dc965146c70d1243c21201a30c7261 Mon Sep 17 00:00:00 2001 From: abatalla Date: Fri, 9 Aug 2024 11:52:37 +0200 Subject: [PATCH 51/53] Delete Eno.R (s2dv master branch duplicate) --- modules/Crossval/R/tmp/Eno.R | 103 ----------------------------------- 1 file changed, 103 deletions(-) delete mode 100644 modules/Crossval/R/tmp/Eno.R diff --git a/modules/Crossval/R/tmp/Eno.R b/modules/Crossval/R/tmp/Eno.R deleted file mode 100644 index cb927602..00000000 --- a/modules/Crossval/R/tmp/Eno.R +++ /dev/null @@ -1,103 +0,0 @@ -#'Compute effective sample size with classical method -#' -#'Compute the number of effective samples along one dimension of an array. This -#'effective number of independent observations can be used in -#'statistical/inference tests.\cr -#'The calculation is based on eno function from Caio Coelho from rclim.txt. -#' -#'@param data A numeric array with named dimensions. -#'@param time_dim A function indicating the dimension along which to compute -#' the effective sample size. The default value is 'sdate'. -#'@param na.action A function. It can be na.pass (missing values are allowed) -#' or na.fail (no missing values are allowed). See details in stats::acf(). -#' The default value is na.pass. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return An array with the same dimension as parameter 'data' except the -#' time_dim dimension, which is removed after the computation. The array -#' indicates the number of effective sample along time_dim. -#' -#'@examples -#'set.seed(1) -#'data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, -#' ftime = 4, lat = 10, lon = 10)) -#'na <- floor(runif(40, min = 1, max = 800)) -#'data[na] <- NA -#'res <- Eno(data) -#' -#'@importFrom stats acf na.pass na.fail -#'@import multiApply -#'@export -Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { - - # Check inputs - ## data - if (is.null(data)) { - stop("Parameter 'data' cannot be NULL.") - } - if (!is.numeric(data)) { - stop("Parameter 'data' must be a numeric array.") - } - if (is.null(dim(data))) { #is vector - dim(data) <- c(length(data)) - names(dim(data)) <- time_dim - } - if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { - stop("Parameter 'data' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(data))) { - stop("Parameter 'time_dim' is not found in 'data' dimension.") - } - ## na.action - if (as.character(substitute(na.action)) != "na.pass" & - as.character(substitute(na.action)) != "na.fail") { - stop("Parameter 'na.action' must be a function either na.pass or na.fail.") - } - if (as.character(substitute(na.action)) == "na.fail" && anyNA(data)) { - stop("Calculation fails because NA is found in paratemter 'data', ", - "which is not accepted when ", - "parameter 'na.action' = na.fail.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ############################### - # Calculate Eno - - eno <- Apply(data = list(data), - target_dims = time_dim, - output_dims = NULL, - fun = .Eno, - na.action = na.action, - ncores = ncores)$output1 - - return(eno) -} - -.Eno <- function(x, na.action) { - n <- length(sort(x)) - if (n > 1) { - a <- acf(x, lag.max = n - 1, plot = FALSE, - na.action = na.action)$acf[2:n, 1, 1] - s <- 0 - for (k in 1:(n - 1)) { - s <- s + (((n - k) / n) * a[k]) - } - eno <- min(n / (1 + (2 * s)), n) - } else { - eno <- NA - } - - return(eno) -} - -- GitLab From e7289ff61f606bf2bee1b07331190d603a134893 Mon Sep 17 00:00:00 2001 From: abatalla Date: Fri, 9 Aug 2024 11:53:24 +0200 Subject: [PATCH 52/53] Delete ProjectField.R (s2dv master branch duplicate) --- modules/Crossval/R/tmp/ProjectField.R | 272 -------------------------- 1 file changed, 272 deletions(-) delete mode 100644 modules/Crossval/R/tmp/ProjectField.R diff --git a/modules/Crossval/R/tmp/ProjectField.R b/modules/Crossval/R/tmp/ProjectField.R deleted file mode 100644 index efa35dc3..00000000 --- a/modules/Crossval/R/tmp/ProjectField.R +++ /dev/null @@ -1,272 +0,0 @@ -#'Project anomalies onto modes of variability -#' -#'Project anomalies onto modes of variability to get the temporal evolution of -#'the EOF mode selected. It returns principal components (PCs) by area-weighted -#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from -#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and -#'returns NA if the whole spatial pattern is NA. -#' -#'@param ano A numerical array of anomalies with named dimensions. The -#' dimensions must have at least 'time_dim' and 'space_dim'. It can be -#' generated by Ano(). -#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which -#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and -#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated -#' by EOF() or REOF(). -#'@param time_dim A character string indicating the name of the time dimension -#' of 'ano'. The default value is 'sdate'. -#'@param space_dim A vector of two character strings. The first is the dimension -#' name of latitude of 'ano' and the second is the dimension name of longitude -#' of 'ano'. The default value is c('lat', 'lon'). -#'@param mode An integer of the variability mode number in the EOF to be -#' projected on. The default value is NULL, which means all the modes of 'eof' -#' is calculated. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return A numerical array of the principal components in the verification -#' format. The dimensions are the same as 'ano' except 'space_dim'. -#' -#'@seealso EOF, NAO, PlotBoxWhisker -#'@examples -#'\dontshow{ -#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') -#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), -#' c('observation'), startDates, -#' leadtimemin = 1, -#' leadtimemax = 4, -#' output = 'lonlat', -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40) -#'} -#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) -#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) -#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) -#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) -#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) -#' -#'\dontrun{ -#' # Plot the forecast and the observation of the first mode for the last year -#' # of forecast -#' sdate_dim_length <- dim(mode1_obs)['sdate'] -#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), -#' lwd = 2) -#' for (i in 1:dim(mode1_exp)['member']) { -#' par(new = TRUE) -#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], -#' ylim = c(-15000, 15000)) -#' } -#'} -#' -#'@import multiApply -#'@export -ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), - mode = NULL, ncores = NULL) { - - # Check inputs - ## ano (1) - if (is.null(ano)) { - stop("Parameter 'ano' cannot be NULL.") - } - if (!is.numeric(ano)) { - stop("Parameter 'ano' must be a numeric array.") - } - if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { - stop("Parameter 'ano' must have dimension names.") - } - ## eof (1) - if (is.null(eof)) { - stop("Parameter 'eof' cannot be NULL.") - } - if (!is.list(eof)) { - stop("Parameter 'eof' must be a list generated by EOF() or REOF().") - } - if ('EOFs' %in% names(eof)) { - EOFs <- "EOFs" - } else if ('REOFs' %in% names(eof)) { - EOFs <- "REOFs" - } else if ('patterns' %in% names(eof)) { - EOFs <- "patterns" - } else { - stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", - "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") - } - if (!'wght' %in% names(eof)) { - stop("Parameter 'eof' must be a list that contains 'wght'. ", - "It can be generated by EOF() or REOF().") - } - if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { - stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") - } - if (!is.numeric(eof$wght) || !is.array(eof$wght)) { - stop("The component 'wght' of parameter 'eof' must be a numeric array.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) > 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(ano))) { - stop("Parameter 'time_dim' is not found in 'ano' dimension.") - } - ## space_dim - if (!is.character(space_dim) | length(space_dim) != 2) { - stop("Parameter 'space_dim' must be a character vector of 2.") - } - if (!all(space_dim %in% names(dim(ano)))) { - stop("Parameter 'space_dim' is not found in 'ano' dimension.") - } - ## ano (2) - if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { - stop("Parameter 'ano' must be an array with dimensions named as ", - "parameter 'space_dim' and 'time_dim'.") - } - ## eof (2) - if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | - !'mode' %in% names(dim(eof[[EOFs]]))) { - stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", - "with dimensions named as parameter 'space_dim' and 'mode'.") - } - if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { - stop("The component 'wght' of parameter 'eof' must be an array ", - "with dimensions named as parameter 'space_dim'.") - } - ## mode - if (!is.null(mode)) { - if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { - stop("Parameter 'mode' must be NULL or a positive integer.") - } - if (mode > dim(eof[[EOFs]])['mode']) { - stop("Parameter 'mode' is greater than the number of available ", - "modes in 'eof'.") - } - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - -#------------------------------------------------------- - - # Keep the chosen mode - if (!is.null(mode)) { - eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') - } else { - eof_mode <- eof[[EOFs]] - } - - if ('mode' %in% names(dim(eof_mode))) { - dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] - } else { - dimnames_without_mode <- names(dim(eof_mode)) - } - - if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] - if ('mode' %in% names(dim(eof_mode))) { - eof_mode_target <- c('mode', space_dim) - output_dims <- c('mode', time_dim) - } else { - eof_mode_target <- space_dim - output_dims <- time_dim - } - res <- Apply(list(ano, eof_mode), - target_dims = list(c(time_dim, space_dim), - eof_mode_target), - output_dims = output_dims, - wght = eof$wght, - fun = .ProjectField, - ncores = ncores)$output1 - - } else { - - if (!all(dimnames_without_mode %in% names(dim(ano)))) { - stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", - "'ano'. Check if 'ano' and 'eof' are compatible.") - } - - common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] - if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != - dim(eof_mode)[dimnames_without_mode])) { - stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", - "with different length. Check if 'ano' and 'eof' are compatible.") - } - - # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent - # between ano and eof. - additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] - additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] - if (length(additional_dims) != 0) { - for (i in seq_along(additional_dims)) { - eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), - lendim = additional_dims[i], name = names(additional_dims)[i]) - } - } - if ('mode' %in% names(dim(eof_mode))) { - eof_mode_target <- c('mode', space_dim) - output_dims <- c('mode', time_dim) - } else { - eof_mode_target <- space_dim - output_dims <- time_dim - } - res <- Apply(list(ano, eof_mode), - target_dims = list(c(time_dim, space_dim), - eof_mode_target), - output_dims = output_dims, - wght = eof$wght, - fun = .ProjectField, - ncores = ncores)$output1 - } - - return(res) -} - - -.ProjectField <- function(ano, eof_mode, wght) { - # ano: [sdate, lat, lon] - # eof_mode: [lat, lon] or [mode, lat, lon] - # wght: [lat, lon] - - ntime <- dim(ano)[1] - if (length(dim(eof_mode)) == 2) { # mode != NULL - # Initialization of pc.ver. - pc.ver <- array(NA, dim = ntime) #[sdate] - - # Weight - e.1 <- eof_mode * wght - ano <- ano * InsertDim(wght, 1, ntime) - #ano <- aaply(ano, 1, '*', wght) # much heavier - - na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA - #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier - tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] - rm(ano) - #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier - pc.ver <- rowSums(tmp, na.rm = TRUE) - pc.ver[which(is.na(na))] <- NA - - } else { # mode = NULL - # Weight - e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) - dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] - ano <- ano * InsertDim(wght, 1, ntime) - dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] - - na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA - na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) - - # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] - # Result: [mode, sdate] - pc.ver <- e.1 %*% t(ano) - pc.ver[which(is.na(na))] <- NA - -# # Change back dimensions to feet original input -# dim(projection) <- c(moredims, mode = unname(neofs)) -# return(projection) - } - - return(pc.ver) -} - - -- GitLab From 4a96668bf6e9944250ce347e2768940a1ab61556 Mon Sep 17 00:00:00 2001 From: abatalla Date: Fri, 9 Aug 2024 11:53:39 +0200 Subject: [PATCH 53/53] Delete Utils.R (s2dv master branch duplicate) --- modules/Crossval/R/tmp/Utils.R | 1885 -------------------------------- 1 file changed, 1885 deletions(-) delete mode 100644 modules/Crossval/R/tmp/Utils.R diff --git a/modules/Crossval/R/tmp/Utils.R b/modules/Crossval/R/tmp/Utils.R deleted file mode 100644 index cd7a1e10..00000000 --- a/modules/Crossval/R/tmp/Utils.R +++ /dev/null @@ -1,1885 +0,0 @@ -#'@importFrom abind abind -#'@import plyr ncdf4 -#'@importFrom grDevices png jpeg pdf svg bmp tiff -#'@importFrom easyVerification convert2prob - -## Function to tell if a regexpr() match is a complete match to a specified name -.IsFullMatch <- function(x, name) { - x > 0 && attributes(x)$match.length == nchar(name) -} - -.ConfigReplaceVariablesInString <- function(string, replace_values, - allow_undefined_key_vars = FALSE) { - # This function replaces all the occurrences of a variable in a string by - # their corresponding string stored in the replace_values. - if (length(strsplit(string, "\\$")[[1]]) > 1) { - parts <- strsplit(string, "\\$")[[1]] - output <- "" - i <- 0 - for (part in parts) { - if (i %% 2 == 0) { - output <- paste0(output, part) - } else { - if (part %in% names(replace_values)) { - output <- paste0(output, - .ConfigReplaceVariablesInString(replace_values[[part]], - replace_values, - allow_undefined_key_vars)) - } else if (allow_undefined_key_vars) { - output <- paste0(output, "$", part, "$") - } else { - stop('Error: The variable $', part, - '$ was not defined in the configuration file.', sep = '') - } - } - i <- i + 1 - } - output - } else { - string - } -} - -.KnownLonNames <- function() { - known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') - return(known_lon_names) -} - -.KnownLatNames <- function() { - known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') - return(known_lat_names) -} - -.t2nlatlon <- function(t) { - ## As seen in cdo's griddes.c: ntr2nlat() - nlats <- (t * 3 + 1) / 2 - if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { - nlats <- ceiling(nlats) - } else { - nlats <- round(nlats) - } - if (nlats %% 2 > 0) { - nlats <- nlats + 1 - } - ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF - nlons <- 2 * nlats - keep_going <- TRUE - while (keep_going) { - n <- nlons - if (n %% 8 == 0) n <- trunc(n / 8) - while (n %% 6 == 0) n <- trunc(n / 6) - while (n %% 5 == 0) n <- trunc(n / 5) - while (n %% 4 == 0) n <- trunc(n / 4) - while (n %% 3 == 0) n <- trunc(n / 3) - if (n %% 2 == 0) n <- trunc(n / 2) - if (n <= 8) { - keep_going <- FALSE - } else { - nlons <- nlons + 2 - if (nlons > 9999) { - stop("Error: pick another gaussian grid truncation. ", - "It doesn't fulfill the standards to apply FFT.") - } - } - } - c(nlats, nlons) -} - -.nlat2t <- function(nlats) { - trunc((nlats * 2 - 1) / 3) -} - -.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { - # The purpose, working modes, inputs and outputs of this function are - # explained in ?LoadDataFile - #suppressPackageStartupMessages({library(ncdf4)}) - #suppressPackageStartupMessages({library(bigmemory)}) - #suppressPackageStartupMessages({library(plyr)}) - # Auxiliar function to convert array indices to lineal indices - arrayIndex2VectorIndex <- function(indices, dims) { - if (length(indices) > length(dims)) { - stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") - } - position <- 1 - dims <- rev(dims) - indices <- rev(indices) - for (i in seq_along(indices)) { - position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) - } - position - } - - found_file <- NULL - dims <- NULL - grid_name <- units <- var_long_name <- NULL - is_2d_var <- array_across_gw <- NULL - data_across_gw <- NULL - - filename <- work_piece[['filename']] - namevar <- work_piece[['namevar']] - output <- work_piece[['output']] - # The names of all data files in the directory of the repository that match - # the pattern are obtained. - if (any(grep("^http", filename))) { - is_url <- TRUE - files <- filename - ## TODO: Check that the user is not using shell globbing exps. - } else { - is_url <- FALSE - files <- Sys.glob(filename) - } - - # If we don't find any, we leave the flag 'found_file' with a NULL value. - if (length(files) > 0) { - # The first file that matches the pattern is chosen and read. - filename <- head(files, 1) - filein <- filename - found_file <- filename - mask <- work_piece[['mask']] - - if (!silent && explore_dims) { - .message(paste("Exploring dimensions...", filename)) - ##} else { - ## cat(paste("* Reading & processing data...", filename, '\n')) - ##} - } - - # We will fill in 'expected_dims' with the names of the expected dimensions of - # the data array we'll retrieve from the file. - expected_dims <- NULL - remap_needed <- FALSE - # But first we open the file and work out whether the requested variable is 2d - fnc <- nc_open(filein) - if (!(namevar %in% names(fnc$var))) { - stop("Error: The variable", namevar, "is not defined in the file", filename) - } - var_long_name <- fnc$var[[namevar]]$longname - units <- fnc$var[[namevar]]$units - file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) - # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of - # 'longitude'/'latitude'. - if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && - (work_piece[['dimnames']][['lon']] == 'longitude') && - ('lon' %in% file_dimnames)) { - work_piece[['dimnames']][['lon']] <- 'lon' - } - if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && - (work_piece[['dimnames']][['lat']] == 'latitude') && - ('lat' %in% file_dimnames)) { - work_piece[['dimnames']][['lat']] <- 'lat' - } - if (is.null(work_piece[['is_2d_var']])) { - is_2d_var <- all(c(work_piece[['dimnames']][['lon']], - work_piece[['dimnames']][['lat']]) %in% - unlist(lapply(fnc$var[[namevar]][['dim']], - '[[', 'name'))) - } else { - is_2d_var <- work_piece[['is_2d_var']] - } - if ((is_2d_var || work_piece[['is_file_per_dataset']])) { - if (Sys.which("cdo")[[1]] == "") { - stop("Error: CDO libraries not available") - } - - cdo_version <- - strsplit(suppressWarnings( - system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] - - cdo_version <- - as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) - - } - # If the variable to load is 2-d, we need to determine whether: - # - interpolation is needed - # - subsetting is requested - if (is_2d_var) { - ## We read the longitudes and latitudes from the file. - lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) - lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) - first_lon_in_original_file <- lon[1] - # If a common grid is requested or we are exploring the file dimensions - # we need to read the grid type and size of the file to finally work out the - # CDO grid name. - if (!is.null(work_piece[['grid']]) || explore_dims) { - # Here we read the grid type and its number of longitudes and latitudes - file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) - grids_positions <- grep('# gridID', file_info) - if (length(grids_positions) < 1) { - stop("The grid should be defined in the files.") - } - grids_first_lines <- grids_positions + 2 - grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) - grids_info <- as.list(seq_along(grids_positions)) - grids_info <- lapply(grids_info, - function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) - grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) - grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) - grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) - grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) - grids_matches <- unlist(lapply(grids_info, function (x) { - nlons <- if (any(grep('xsize', x))) { - as.numeric(x[grep('xsize', x) + 1]) - } else { - NA - } - nlats <- if (any(grep('ysize', x))) { - as.numeric(x[grep('ysize', x) + 1]) - } else { - NA - } - result <- FALSE - if (!anyNA(c(nlons, nlats))) { - if ((nlons == length(lon)) && - (nlats == length(lat))) { - result <- TRUE - } - } - result - })) - grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] - grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] - grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] - if (length(grids_matches) == 0) { - stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) - } - if (sum(grids_matches) > 1) { - if ((all(grids_types[which(grids_matches)] == 'gaussian') || - all(grids_types[which(grids_matches)] == 'lonlat')) && - all(unlist(lapply(grids_info[which(grids_matches)], identical, - grids_info[which(grids_matches)][[1]])))) { - grid_type <- grids_types[which(grids_matches)][1] - } else { - stop("Error: Load() can't disambiguate: ", - "More than one lonlat/gaussian grids with the same size as ", - "the requested variable defined in ", filename) - } - } else if (sum(grids_matches) == 1) { - grid_type <- grids_types[which(grids_matches)] - } else { - stop("Unexpected error.") - } - grid_lons <- length(lon) - grid_lats <- length(lat) - # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() - if (grid_type == 'lonlat') { - grid_name <- paste0('r', grid_lons, 'x', grid_lats) - } else { - grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') - } - if (is.null(work_piece[['grid']])) { - .warning(paste0("Detect the grid type to be '", grid_name, "'. ", - "If it is not expected, assign parameter 'grid' to avoid wrong result.")) - } - } - # If a common grid is requested, we will also calculate its size which we will use - # later on. - if (!is.null(work_piece[['grid']])) { - # Now we calculate the common grid type and its lons and lats - if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { - common_grid_type <- 'gaussian' - common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) - nlonlat <- .t2nlatlon(common_grid_res) - common_grid_lats <- nlonlat[1] - common_grid_lons <- nlonlat[2] - } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { - common_grid_type <- 'lonlat' - common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) - common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) - } else { - stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") - } - } else { - ## If no 'grid' is specified, there is no common grid. - ## But these variables are filled in for consistency in the code. - common_grid_lons <- length(lon) - common_grid_lats <- length(lat) - } - first_common_grid_lon <- 0 - last_common_grid_lon <- 360 - 360 / common_grid_lons - ## This is not true for gaussian grids or for some regular grids, but - ## is a safe estimation - first_common_grid_lat <- -90 - last_common_grid_lat <- 90 - # And finally determine whether interpolation is needed or not - remove_shift <- FALSE - if (!is.null(work_piece[['grid']])) { - if ((grid_lons != common_grid_lons) || - (grid_lats != common_grid_lats) || - (grid_type != common_grid_type) || - (lon[1] != first_common_grid_lon)) { - if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && - grid_type == common_grid_type && lon[1] != first_common_grid_lon) { - remove_shift <- TRUE - } - remap_needed <- TRUE - common_grid_name <- work_piece[['grid']] - } - } else if ((lon[1] != first_common_grid_lon) && explore_dims && - !work_piece[['single_dataset']]) { - remap_needed <- TRUE - common_grid_name <- grid_name - remove_shift <- TRUE - } - if (remap_needed && (work_piece[['remap']] == 'con') && - (cdo_version >= as.numeric_version('1.7.0'))) { - work_piece[['remap']] <- 'ycon' - } - if (remove_shift && !explore_dims) { - if (!is.null(work_piece[['progress_amount']])) { - cat("\n") - } - .warning(paste0("The dataset with index ", - tail(work_piece[['indices']], 1), " in '", - work_piece[['dataset_type']], - "' doesn't start at longitude 0 and will be re-interpolated in order ", - "to align its longitudes with the standard CDO grids definable with ", - "the names 'tgrid' or 'rx', which are by definition ", - "starting at the longitude 0.\n")) - if (!is.null(mask)) { - .warning(paste0("A mask was provided for the dataset with index ", - tail(work_piece[['indices']], 1), " in '", - work_piece[['dataset_type']], - "'. This dataset has been re-interpolated to align its longitudes to ", - "start at 0. You must re-interpolate the corresponding mask to align ", - "its longitudes to start at 0 as well, if you haven't done so yet. ", - "Running cdo remapcon,", common_grid_name, - " original_mask_file.nc new_mask_file.nc will fix it.\n")) - } - } - if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { - if (!is.null(work_piece[['progress_amount']])) { - cat("\n") - } - if (!explore_dims) { - .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), - " in '", work_piece[['dataset_type']], "' is originally on ", - "a grid coarser than the common grid and it has been ", - "extrapolated. Check the results carefully. It is ", - "recommended to specify as common grid the coarsest grid ", - "among all requested datasets via the parameter 'grid'.\n")) - } - } - # Now calculate if the user requests for a lonlat subset or for the - # entire field - lonmin <- work_piece[['lon_limits']][1] - lonmax <- work_piece[['lon_limits']][2] - latmin <- work_piece[['lat_limits']][1] - latmax <- work_piece[['lat_limits']][2] - lon_subsetting_requested <- FALSE - lonlat_subsetting_requested <- FALSE - if (lonmin <= lonmax) { - if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { - lon_subsetting_requested <- TRUE - } - } else { - if ((lonmin - lonmax) > 360 / common_grid_lons) { - lon_subsetting_requested <- TRUE - } else { - gap_width <- floor(lonmin / (360 / common_grid_lons)) - - floor(lonmax / (360 / common_grid_lons)) - if (gap_width > 0) { - if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && - (lonmax %% (360 / common_grid_lons) == 0))) { - lon_subsetting_requested <- TRUE - } - } - } - } - if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) - || (lon_subsetting_requested)) { - lonlat_subsetting_requested <- TRUE - } - # Now that we know if subsetting was requested, we can say if final data - # will go across greenwich - if (lonmax < lonmin) { - data_across_gw <- TRUE - } else { - data_across_gw <- !lon_subsetting_requested - } - - # When remap is needed but no subsetting, the file is copied locally - # so that cdo works faster, and then interpolated. - # Otherwise the file is kept as is and the subset will have to be - # interpolated still. - if (!lonlat_subsetting_requested && remap_needed) { - nc_close(fnc) - filecopy <- tempfile(pattern = "load", fileext = ".nc") - file.copy(filein, filecopy) - filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") - # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the - # underlying hdf5 library. - system(paste0("cdo -L -s remap", work_piece[['remap']], ",", - common_grid_name, - " -selname,", namevar, " ", filecopy, " ", filein, - " 2>/dev/null")) - file.remove(filecopy) - work_piece[['dimnames']][['lon']] <- 'lon' - work_piece[['dimnames']][['lat']] <- 'lat' - fnc <- nc_open(filein) - lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) - lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) - } - - # Read and check also the mask - if (!is.null(mask)) { - ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') - if (is.list(mask)) { - if (!file.exists(mask[['path']])) { - stop("Error: Couldn't find the mask file", mask[['path']]) - } - mask_file <- mask[['path']] - ###file.copy(work_piece[['mask']][['path']], mask_file) - fnc_mask <- nc_open(mask_file) - vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') - if ('nc_var_name' %in% names(mask)) { - if (!(mask[['nc_var_name']] %in% - vars_in_mask)) { - stop("Error: couldn't find variable", mask[['nc_var_name']], - "in the mask file", mask[['path']]) - } - } else { - if (length(vars_in_mask) != 1) { - stop("Error: one and only one non-coordinate variable should be ", - "defined in the mask file", - mask[['path']], - "if the component 'nc_var_name' is not specified. ", - "Currently found: ", - toString(vars_in_mask), ".") - } else { - mask[['nc_var_name']] <- vars_in_mask - } - } - if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { - stop("Error: the variable '", - mask[['nc_var_name']], - "' must be defined only over the dimensions '", - work_piece[['dimnames']][['lon']], "' and '", - work_piece[['dimnames']][['lat']], - "' in the mask file ", - mask[['path']]) - } - mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) - nc_close(fnc_mask) - ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) - ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) - ###} else { - ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) - ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) - ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') - ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) - ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) - ### nc_close(fnc_mask) - ### fnc_mask <- nc_open(mask_file) - ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') - ### mask_lon <- lon - ### mask_lat <- lat - ###} - ###} - ### Now ready to check that the mask is right - ##if (!(lonlat_subsetting_requested && remap_needed)) { - ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { - ### stop(paste("Error: the mask of the dataset with index ", - ### tail(work_piece[['indices']], 1), " in '", - ### work_piece[['dataset_type']], "' is wrong. ", - ### "It must be on the common grid if the selected output type is 'lonlat', ", - ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", - ### "the grid of the corresponding dataset if the selected output type is ", - ### "'areave' and no 'grid' has been specified. For more information ", - ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", - ### "'maskobs'.", sep = "")) - ### } - ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { - ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", - ### "identical to the ones in the corresponding data files if output = 'areave' ", - ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", - ### "the mask file must start by 0 and the latitudes must be ordered from ", - ### "highest to lowest. See\n ", - ### work_piece[['mask']][['path']], " and ", filein)) - ###} - } - } - - lon_indices <- seq_along(lon) - if (!(lonlat_subsetting_requested && remap_needed)) { - lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 - } - if (lonmax >= lonmin) { - lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] - } else if (!remap_needed) { - lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] - } - lat_indices <- which(lat >= latmin & lat <= latmax) - ## In most of the cases the latitudes are ordered from -90 to 90. - ## We will reorder them to be in the order from 90 to -90, so mostly - ## always the latitudes are reordered. - ## TODO: This could be avoided in future. - if (lat[1] < lat[length(lat)]) { - lat_indices <- lat_indices[rev(seq_along(lat_indices))] - } - if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { - if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { - stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), - " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", - "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", - "or 'areave' and 'grid' has been specified. It must be on the grid of ", - "the corresponding dataset if the selected output type is 'areave' and ", - "no 'grid' has been specified. For more information check ?Load and see ", - "help on parameters 'grid', 'maskmod' and 'maskobs'.") - } - mask <- mask[lon_indices, lat_indices] - } - ## If the user requests subsetting, we must extend the lon and lat limits if possible - ## so that the interpolation after is done properly - maximum_extra_points <- work_piece[['remapcells']] - if (lonlat_subsetting_requested && remap_needed) { - if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || - (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { - ## if the requested number of points goes beyond the left or right - ## sides of the map, we need to take the entire map so that the - ## interpolation works properly - lon_indices <- seq_along(lon) - } else { - extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) - if (extra_points > 0) { - lon_indices <- - c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) - } - extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) - if (extra_points > 0) { - lon_indices <- c(lon_indices, - (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) - } - } - min_lat_ind <- min(lat_indices) - max_lat_ind <- max(lat_indices) - extra_points <- min(maximum_extra_points, min_lat_ind - 1) - if (extra_points > 0) { - if (lat[1] < tail(lat, 1)) { - lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) - } else { - lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) - } - } - extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) - if (extra_points > 0) { - if (lat[1] < tail(lat, 1)) { - lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) - } else { - lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) - } - } - } - lon <- lon[lon_indices] - lat <- lat[lat_indices] - expected_dims <- c(work_piece[['dimnames']][['lon']], - work_piece[['dimnames']][['lat']]) - } else { - lon <- 0 - lat <- 0 - } - # We keep on filling the expected dimensions - var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) - nmemb <- nltime <- NULL - ## Sometimes CDO renames 'members' dimension to 'lev' - old_members_dimname <- NULL - if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { - old_members_dimname <- work_piece[['dimnames']][['member']] - work_piece[['dimnames']][['member']] <- 'lev' - } - if (work_piece[['dimnames']][['member']] %in% var_dimnames) { - nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], - var_dimnames)]]$len - expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) - } else { - nmemb <- 1 - } - if (length(expected_dims) > 0) { - dim_matches <- match(expected_dims, var_dimnames) - if (anyNA(dim_matches)) { - if (!is.null(old_members_dimname)) { - expected_dims[which(expected_dims == 'lev')] <- old_members_dimname - } - stop("Error: the expected dimension(s)", - toString(expected_dims[which(is.na(dim_matches))]), - "were not found in", filename) - } - time_dimname <- var_dimnames[-dim_matches] - } else { - time_dimname <- var_dimnames - } - if (length(time_dimname) > 0) { - if (length(time_dimname) == 1) { - nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len - expected_dims <- c(expected_dims, time_dimname) - dim_matches <- match(expected_dims, var_dimnames) - } else { - if (!is.null(old_members_dimname)) { - expected_dims[which(expected_dims == 'lev')] <- old_members_dimname - } - stop("Error: the variable ", namevar, - " is defined over more dimensions than the expected (", - toString(c(expected_dims, 'time')), - "). It could also be that the members, longitude or latitude ", - "dimensions are named incorrectly. In that case, either rename ", - "the dimensions in the file or adjust Load() to recognize the actual ", - "name with the parameter 'dimnames'. See file ", filename) - } - } else { - nltime <- 1 - } - - # Now we must retrieve the data from the file, but only the asked indices. - # So we build up the indices to retrieve. - # Longitudes or latitudes have been retrieved already. - if (explore_dims) { - # If we're exploring the file we only want one time step from one member, - # to regrid it and work out the number of longitudes and latitudes. - # We don't need more. - members <- 1 - ltimes_list <- list(1) - } else { - # The data is arranged in the array 'tmp' with the dimensions in a - # common order: - # 1) Longitudes - # 2) Latitudes - # 3) Members (even if is not a file per member experiment) - # 4) Lead-times - if (work_piece[['is_file_per_dataset']]) { - time_indices <- 1:nltime - mons <- strsplit(system(paste('cdo showmon ', filein, - ' 2>/dev/null'), intern = TRUE), split = ' ') - years <- strsplit(system(paste('cdo showyear ', filein, - ' 2>/dev/null'), intern = TRUE), split = ' ') - mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) - years <- as.numeric(years[[1]][which(years[[1]] != "")]) - time_indices <- ts(time_indices, start = c(years[1], mons[1]), - end = c(years[length(years)], mons[length(mons)]), - frequency = 12) - ltimes_list <- list() - for (sdate in work_piece[['startdates']]) { - selected_time_indices <- window(time_indices, start = c(as.numeric( - substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), - end = c(3000, 12), frequency = 12, extend = TRUE) - selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] - ltimes_list <- c(ltimes_list, list(selected_time_indices)) - } - } else { - ltimes <- work_piece[['leadtimes']] - #if (work_piece[['dataset_type']] == 'exp') { - ltimes_list <- list(ltimes[which(ltimes <= nltime)]) - #} - } - ## TODO: Put, when reading matrices, this kind of warnings - # if (nmember < nmemb) { - # cat("Warning: - members <- 1:work_piece[['nmember']] - members <- members[which(members <= nmemb)] - } - - # Now, for each list of leadtimes to load (usually only one list with all leadtimes), - # we'll join the indices and retrieve data - found_disordered_dims <- FALSE - for (ltimes in ltimes_list) { - if (is_2d_var) { - start <- c(min(lon_indices), min(lat_indices)) - end <- c(max(lon_indices), max(lat_indices)) - if (lonlat_subsetting_requested && remap_needed) { - subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, - lat_indices - min(lat_indices) + 1) - dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) - dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) - ncdf_dims <- list(dim_longitudes, dim_latitudes) - } else { - subset_indices <- list(lon_indices - min(lon_indices) + 1, - lat_indices - min(lat_indices) + 1) - ncdf_dims <- list() - } - final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) - } else { - start <- end <- NULL - subset_indices <- list() - ncdf_dims <- list() - final_dims <- c(1, 1, 1, 1) - } - - if (work_piece[['dimnames']][['member']] %in% expected_dims) { - start <- c(start, head(members, 1)) - end <- c(end, tail(members, 1)) - subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) - dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) - ncdf_dims <- c(ncdf_dims, list(dim_members)) - final_dims[3] <- length(members) - } - if (time_dimname %in% expected_dims) { - if (!all(is.na(ltimes))) { - start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) - end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) - subset_indices <- c(subset_indices, - list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) - } else { - start <- c(start, NA) - end <- c(end, NA) - subset_indices <- c(subset_indices, list(ltimes)) - } - dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) - ncdf_dims <- c(ncdf_dims, list(dim_time)) - final_dims[4] <- length(ltimes) - } - count <- end - start + 1 - start <- start[dim_matches] - count <- count[dim_matches] - subset_indices <- subset_indices[dim_matches] - # Now that we have the indices to retrieve, we retrieve the data - if (prod(final_dims) > 0) { - tmp <- take(ncvar_get(fnc, namevar, start, count, - collapse_degen = FALSE), - seq_along(subset_indices), subset_indices) - # The data is regridded if it corresponds to an atmospheric variable. When - # the chosen output type is 'areave' the data is not regridded to not - # waste computing time unless the user specified a common grid. - if (is_2d_var) { - ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { - ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], - ### start[dim_matches[1:2]], count[dim_matches[1:2]], - ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) - ###} - if (lonlat_subsetting_requested && remap_needed) { - filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") - filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") - ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], - fnc$var[[namevar]]$missval, - prec = if (fnc$var[[namevar]]$prec == 'int') { - 'integer' - } else { - fnc$var[[namevar]]$prec - }) - scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) - add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) - if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { - tmp <- (tmp - add_offset) / scale_factor - } - #nc_close(fnc) - fnc2 <- nc_create(filein2, list(ncdf_var)) - ncvar_put(fnc2, ncdf_var, tmp) - if (add_offset != 0) { - ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) - } - if (scale_factor != 1) { - ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) - } - nc_close(fnc2) - system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { - "0,360," - } else { - paste0(lonmin, ",", lonmax, ",") - }, latmin, ",", latmax, - " -remap", work_piece[['remap']], ",", common_grid_name, - " ", filein2, " ", filein, " 2>/dev/null")) - file.remove(filein2) - fnc2 <- nc_open(filein) - sub_lon <- ncvar_get(fnc2, 'lon') - sub_lat <- ncvar_get(fnc2, 'lat') - ## We read the longitudes and latitudes from the file. - ## In principle cdo should put in order the longitudes - ## and slice them properly unless data is across greenwich - sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 - sub_lon_indices <- seq_along(sub_lon) - if (lonmax < lonmin) { - sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] - } - sub_lat_indices <- seq_along(sub_lat) - ## In principle cdo should put in order the latitudes - if (sub_lat[1] < sub_lat[length(sub_lat)]) { - sub_lat_indices <- rev(seq_along(sub_lat)) - } - final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) - subset_indices[[dim_matches[1]]] <- sub_lon_indices - subset_indices[[dim_matches[2]]] <- sub_lat_indices - - tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), - seq_along(subset_indices), subset_indices) - - if (!is.null(mask)) { - ## We create a very simple 2d netcdf file that is then interpolated to the common - ## grid to know what are the lons and lats of our slice of data - mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') - mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') - dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], - "degrees_east", c(0, 360)) - dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], - "degrees_north", c(-90, 90)) - ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') - fnc_mask <- nc_create(mask_file, list(ncdf_var)) - ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) - nc_close(fnc_mask) - system(paste0("cdo -L -s remap", work_piece[['remap']], ",", - common_grid_name, - " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) - fnc_mask <- nc_open(mask_file_remap) - mask_lons <- ncvar_get(fnc_mask, 'lon') - mask_lats <- ncvar_get(fnc_mask, 'lat') - nc_close(fnc_mask) - file.remove(mask_file, mask_file_remap) - if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { - stop("Error: the mask of the dataset with index ", - tail(work_piece[['indices']], 1), " in '", - work_piece[['dataset_type']], - "' is wrong. It must be on the common grid if the ", - "selected output type is 'lonlat', 'lon' or 'lat', ", - "or 'areave' and 'grid' has been specified. It must ", - "be on the grid of the corresponding dataset if the ", - "selected output type is 'areave' and no 'grid' has been ", - "specified. For more information check ?Load and see help ", - "on parameters 'grid', 'maskmod' and 'maskobs'.") - } - mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 - if (lonmax >= lonmin) { - mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) - } else { - mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) - } - mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) - if (sub_lat[1] < sub_lat[length(sub_lat)]) { - mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] - } - mask <- mask[mask_lon_indices, mask_lat_indices] - } - sub_lon <- sub_lon[sub_lon_indices] - sub_lat <- sub_lat[sub_lat_indices] - ### nc_close(fnc_mask) - ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { - ### "0,360," - ### } else { - ### paste0(lonmin, ",", lonmax, ",") - ### }, latmin, ",", latmax, - ### " -remap", work_piece[['remap']], ",", common_grid_name, - ###This is wrong: same files - ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) - ### fnc_mask <- nc_open(mask_file) - ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], - ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) - ###} - } - } - if (is.unsorted(dim_matches)) { - if (!found_disordered_dims && - rev(work_piece[['indices']])[2] == 1 && - rev(work_piece[['indices']])[3] == 1) { - found_disordered_dims <- TRUE - .warning(paste0("The dimensions for the variable ", namevar, - " in the files of the experiment with index ", - tail(work_piece[['indices']], 1), - " are not in the optimal order for loading with Load(). ", - "The optimal order would be '", - toString(expected_dims), - "'. One of the files of the dataset is stored in ", filename)) - } - tmp <- aperm(tmp, dim_matches) - } - dim(tmp) <- final_dims - # If we are exploring the file we don't need to process and arrange - # the retrieved data. We only need to keep the dimension sizes. - if (is_2d_var && lonlat_subsetting_requested && remap_needed) { - final_lons <- sub_lon - final_lats <- sub_lat - } else { - final_lons <- lon - final_lats <- lat - } - if (explore_dims) { - if (work_piece[['is_file_per_member']]) { - ## TODO: When the exp_full_path contains asterisks and is file_per_member - ## members from different datasets may be accounted. - ## Also if one file member is missing the accounting will be wrong. - ## Should parse the file name and extract number of members. - if (is_url) { - nmemb <- NULL - } else { - nmemb <- length(files) - } - } - dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) - } else { - # If we are not exploring, then we have to process the retrieved data - if (is_2d_var) { - tmp <- apply(tmp, c(3, 4), function(x) { - # Disable of large values. - if (!is.na(work_piece[['var_limits']][2])) { - x[which(x > work_piece[['var_limits']][2])] <- NA - } - if (!is.na(work_piece[['var_limits']][1])) { - x[which(x < work_piece[['var_limits']][1])] <- NA - } - if (!is.null(mask)) { - x[which(mask < 0.5)] <- NA - } - - if (output == 'areave' || output == 'lon') { - weights <- InsertDim(cos(final_lats * pi / 180), 1, - length(final_lons), name = 'lon') - weights[which(is.na(x))] <- NA - if (output == 'areave') { - weights <- weights / mean(weights, na.rm = TRUE) - mean(x * weights, na.rm = TRUE) - } else { - weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, - length(final_lats), name = 'lat') - MeanDims(x * weights, 2, na.rm = TRUE) - } - } else if (output == 'lat') { - MeanDims(x, 1, na.rm = TRUE) - } else if (output == 'lonlat') { - signif(x, 5) - } - }) - if (output == 'areave') { - dim(tmp) <- c(1, 1, final_dims[3:4]) - } else if (output == 'lon') { - dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) - } else if (output == 'lat') { - dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) - } else if (output == 'lonlat') { - dim(tmp) <- final_dims - } - } - var_data <- attach.big.matrix(work_piece[['out_pointer']]) - if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && - work_piece[['dims']][['ftime']] > 1 && - nltime < work_piece[['dims']][['ftime']]) { - work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 - for (jmemb in members) { - work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 - out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) - out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) - var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) - } - work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 - } else { - out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) - out_indices <- out_position:(out_position + length(tmp) - 1) - a <- aperm(tmp, c(1, 2, 4, 3)) - as.vector(a) - var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) - } - work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 - } - } - } - nc_close(fnc) - if (is_2d_var) { - if (remap_needed) { - array_across_gw <- FALSE - file.remove(filein) - ###if (!is.null(mask) && lonlat_subsetting_requested) { - ### file.remove(mask_file) - ###} - } else { - if (first_lon_in_original_file < 0) { - array_across_gw <- data_across_gw - } else { - array_across_gw <- FALSE - } - } - } - } - if (explore_dims) { - list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, - units = units, var_long_name = var_long_name, - data_across_gw = data_across_gw, array_across_gw = array_across_gw) - } else { - ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { - ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) - ###} - if (!silent && !is.null(work_piece[['progress_amount']])) { - message(work_piece[['progress_amount']], appendLF = FALSE) - } - found_file - } -} - -.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, - nmember = NULL, nmemberobs = NULL, - nleadtime = NULL, leadtimemin = 1, - leadtimemax = NULL, storefreq = 'monthly', - sampleperiod = 1, lonmin = 0, lonmax = 360, - latmin = -90, latmax = 90, output = 'areave', - method = 'conservative', grid = NULL, - maskmod = vector("list", 15), - maskobs = vector("list", 15), - configfile = NULL, suffixexp = NULL, - suffixobs = NULL, varmin = NULL, varmax = NULL, - silent = FALSE, nprocs = NULL) { - ## This function loads and selects sample data stored in sampleMap and - ## sampleTimeSeries and is used in the examples instead of Load() so as - ## to avoid nco and cdo system calls and computation time in the stage - ## of running examples in the CHECK process on CRAN. - selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', - '20001101', '20051101')) - start_dates_position <- 3 - lead_times_position <- 4 - - if (output == 'lonlat') { - sampleData <- s2dv::sampleMap - if (is.null(leadtimemax)) { - leadtimemax <- dim(sampleData$mod)[lead_times_position] - } - selected_lead_times <- leadtimemin:leadtimemax - - dataOut <- sampleData - dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] - dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] - } else if (output == 'areave') { - sampleData <- s2dv::sampleTimeSeries - if (is.null(leadtimemax)) { - leadtimemax <- dim(sampleData$mod)[lead_times_position] - } - selected_lead_times <- leadtimemin:leadtimemax - - dataOut <- sampleData - dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] - dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] - } - - dims_out <- dim(sampleData$mod) - dims_out[start_dates_position] <- length(selected_start_dates) - dims_out[lead_times_position] <- length(selected_lead_times) - dim(dataOut$mod) <- dims_out - - dims_out <- dim(sampleData$obs) - dims_out[start_dates_position] <- length(selected_start_dates) - dims_out[lead_times_position] <- length(selected_lead_times) - dim(dataOut$obs) <- dims_out - - invisible(list(mod = dataOut$mod, obs = dataOut$obs, - lat = dataOut$lat, lon = dataOut$lon)) -} - -.ConfigGetDatasetInfo <- function(matching_entries, table_name) { - # This function obtains the information of a dataset and variable pair, - # applying all the entries that match in the configuration file. - if (table_name == 'experiments') { - id <- 'EXP' - } else { - id <- 'OBS' - } - defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), - paste0('$DEFAULT_', id, '_FILE_PATH$'), - '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', - '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') - info <- NULL - - for (entry in matching_entries) { - if (is.null(info)) { - info <- entry[-1:-2] - info[which(info == '*')] <- defaults[which(info == '*')] - } else { - info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] - } - } - - info <- as.list(info) - names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') - info -} - -.ReplaceGlobExpressions <- function(path_with_globs, actual_path, - replace_values, tags_to_keep, - dataset_name, permissive) { - # The goal of this function is to replace the shell globbing expressions in - # a path pattern (that may contain shell globbing expressions and Load() - # tags) by the corresponding part of the real existing path. - # What is done actually is to replace all the values of the tags in the - # actual path by the corresponding $TAG$ - # - # It takes mainly two inputs. The path with expressions and tags, e.g.: - # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc - # and a complete known path to one of the matching files, e.g.: - # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc - # and it returns the path pattern but without shell globbing expressions: - # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc - # - # To do that, it needs also as inputs the list of replace values (the - # association of each tag to their value). - # - # All the tags not present in the parameter tags_to_keep will be repalced. - # - # Not all cases can be resolved with the implemented algorithm. In an - # unsolvable case a warning is given and one possible guess is returned. - # - # In some cases it is interesting to replace only the expressions in the - # path to the file, but not the ones in the file name itself. To keep the - # expressions in the file name, the parameter permissive can be set to - # TRUE. To replace all the expressions it can be set to FALSE. - clean <- function(x) { - if (nchar(x) > 0) { - x <- gsub('\\\\', '', x) - x <- gsub('\\^', '', x) - x <- gsub('\\$', '', x) - x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) - do.call(paste0, as.list(x)) - } else { - x - } - } - - strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") - - if (permissive) { - actual_path_chunks <- strsplit(actual_path, '/')[[1]] - actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') - file_name <- tail(actual_path_chunks, 1) - if (length(actual_path_chunks) > 1) { - file_name <- paste0('/', file_name) - } - path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] - path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], - collapse = '/') - path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) - file_name_with_globs <- tail(path_with_globs_chunks, 1) - if (length(path_with_globs_chunks) > 1) { - file_name_with_globs <- paste0('/', file_name_with_globs) - } - right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) - right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) - path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) - match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), - paste0(actual_path, file_name)) - if (match != 1) { - stop("Incorrect parameters to replace glob expressions. ", - "The path with expressions does not match the actual path.") - } - if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { - path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') - file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) - } - } - path_with_globs_rx <- utils::glob2rx(path_with_globs) - values_to_replace <- NULL - tags_to_replace_starts <- NULL - tags_to_replace_ends <- NULL - give_warning <- FALSE - for (tag in tags_to_keep) { - matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] - lengths <- attr(matches, 'match.length') - if (!(length(matches) == 1 && matches[1] == -1)) { - for (i in seq_along(matches)) { - left <- NULL - if (matches[i] > 1) { - left <- - .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, - matches[i] - 1), replace_values) - left_known <- - strReverse(head(strsplit(strReverse(left), - strReverse('.*'), fixed = TRUE)[[1]], 1)) - } - right <- NULL - if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { - right <- - .ConfigReplaceVariablesInString(substr(path_with_globs_rx, - matches[i] + lengths[i], - nchar(path_with_globs_rx)), - replace_values) - right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) - } - match_limits <- NULL - if (!is.null(left)) { - left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) - match_len <- attr(left_match, 'match.length') - left_match_limits <- - c(left_match + match_len - 1 - nchar(clean(right_known)) - - nchar(replace_values[[tag]]) + 1, - left_match + match_len - 1 - nchar(clean(right_known))) - if (!(left_match < 1)) { - match_limits <- left_match_limits - } - } - right_match <- NULL - if (!is.null(right)) { - right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) - match_len <- attr(right_match, 'match.length') - right_match_limits <- - c(right_match + nchar(clean(left_known)), - right_match + nchar(clean(left_known)) + - nchar(replace_values[[tag]]) - 1) - if (is.null(match_limits) && !(right_match < 1)) { - match_limits <- right_match_limits - } - } - if (!is.null(right_match) && !is.null(left_match)) { - if (!identical(right_match_limits, left_match_limits)) { - give_warning <- TRUE - } - } - if (is.null(match_limits)) { - stop("Too complex path pattern specified for ", dataset_name, - ". Specify a simpler path pattern for this dataset.") - } - values_to_replace <- c(values_to_replace, tag) - tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) - tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) - } - } - } - - if (length(tags_to_replace_starts) > 0) { - reorder <- sort(tags_to_replace_starts, index.return = TRUE) - tags_to_replace_starts <- reorder$x - values_to_replace <- values_to_replace[reorder$ix] - tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] - while (length(values_to_replace) > 0) { - actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), - '$', head(values_to_replace, 1), '$', - substr(actual_path, head(tags_to_replace_ends, 1) + 1, - nchar(actual_path))) - extra_chars <- nchar(head(values_to_replace, 1)) + 2 - - (head(tags_to_replace_ends, 1) - - head(tags_to_replace_starts, 1) + 1) - values_to_replace <- values_to_replace[-1] - tags_to_replace_starts <- tags_to_replace_starts[-1] - tags_to_replace_ends <- tags_to_replace_ends[-1] - tags_to_replace_starts <- tags_to_replace_starts + extra_chars - tags_to_replace_ends <- tags_to_replace_ends + extra_chars - } - } - - if (give_warning) { - .warning(paste0("Too complex path pattern specified for ", dataset_name, - ". Double check carefully the '$Files' fetched for this dataset ", - "or specify a simpler path pattern.")) - } - - if (permissive) { - paste0(actual_path, file_name_with_globs) - } else { - actual_path - } -} - -.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { - tag <- paste0('\\$', tag, '\\$') - path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') - parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] - parts <- as.list(grep(tag, parts, value = TRUE)) - longest_couples <- NULL - pos_longest_couples <- NULL - found_value <- NULL - for (i in seq_along(parts)) { - parts[[i]] <- strsplit(parts[[i]], tag)[[1]] - if (length(parts[[i]]) == 1) { - parts[[i]] <- c(parts[[i]], '') - } - len_parts <- sapply(parts[[i]], nchar) - len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] - pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) - longest_couples <- c(longest_couples, max(len_couples)) - } - chosen_part <- which.max(longest_couples) - parts[[chosen_part]] <- - parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] - if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { - if (nchar(parts[[chosen_part]][1]) > 0) { - matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] - if (length(matches) == 1) { - match_left <- matches - actual_path <- - substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) - } - } - if (nchar(parts[[chosen_part]][2]) > 0) { - matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] - if (length(matches) == 1) { - match_right <- matches - found_value <- substr(actual_path, 0, match_right - 1) - } - } - } else { - if (nchar(parts[[chosen_part]][2]) > 0) { - matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] - if (length(matches) == 1) { - match_right <- matches - actual_path <- substr(actual_path, 0, match_right - 1) - } - } - if (nchar(parts[[chosen_part]][1]) > 0) { - matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] - if (length(matches) == 1) { - match_left <- matches - found_value <- - substr(actual_path, match_left + attr(match_left, 'match.length'), - nchar(actual_path)) - } - } - } - found_value -} - -.FilterUserGraphicArgs <- function(excludedArgs, ...) { - # This function filter the extra graphical parameters passed by the user in - # a plot function, excluding the ones that the plot function uses by default. - # Each plot function has a different set of arguments that are not allowed to - # be modified. - args <- list(...) - userArgs <- list() - for (name in names(args)) { - if ((name != "") & !is.element(name, excludedArgs)) { - # If the argument has a name and it is not in the list of excluded - # arguments, then it is added to the list that will be used - userArgs[[name]] <- args[[name]] - } else { - .warning(paste0("the argument '", name, "' can not be - modified and the new value will be ignored")) - } - } - userArgs -} - -.SelectDevice <- function(fileout, width, height, units, res) { - # This function is used in the plot functions to check the extension of the - # files where the graphics will be stored and select the right R device to - # save them. - # If the vector of filenames ('fileout') has files with different - # extensions, then it will only accept the first one, changing all the rest - # of the filenames to use that extension. - - # We extract the extension of the filenames: '.png', '.pdf', ... - ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) - - if (length(ext) != 0) { - # If there is an extension specified, select the correct device - ## units of width and height set to accept inches - if (ext[1] == ".png") { - saveToFile <- function(fileout) { - png(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] == ".jpeg") { - saveToFile <- function(fileout) { - jpeg(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] %in% c(".eps", ".ps")) { - saveToFile <- function(fileout) { - postscript(file = fileout, width = width, height = height) - } - } else if (ext[1] == ".pdf") { - saveToFile <- function(fileout) { - pdf(file = fileout, width = width, height = height) - } - } else if (ext[1] == ".svg") { - saveToFile <- function(fileout) { - svg(filename = fileout, width = width, height = height) - } - } else if (ext[1] == ".bmp") { - saveToFile <- function(fileout) { - bmp(filename = fileout, width = width, height = height, res = res, units = units) - } - } else if (ext[1] == ".tiff") { - saveToFile <- function(fileout) { - tiff(filename = fileout, width = width, height = height, res = res, units = units) - } - } else { - .warning("file extension not supported, it will be used '.eps' by default.") - ## In case there is only one filename - fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) - ext[1] <- ".eps" - saveToFile <- function(fileout) { - postscript(file = fileout, width = width, height = height) - } - } - # Change filenames when necessary - if (any(ext != ext[1])) { - .warning(paste0("some extensions of the filenames provided in 'fileout' ", - "are not ", ext[1], - ". The extensions are being converted to ", ext[1], ".")) - fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) - } - } else { - # Default filenames when there is no specification - .warning("there are no extensions specified in the filenames, default to '.eps'") - fileout <- paste0(fileout, ".eps") - saveToFile <- postscript - } - - # return the correct function with the graphical device, and the correct - # filenames - list(fun = saveToFile, files = fileout) -} - -.message <- function(...) { - # Function to use the 'message' R function with our custom settings - # Default: new line at end of message, indent to 0, exdent to 3, - # collapse to \n* - args <- list(...) - - ## In case we need to specify message arguments - if (!is.null(args[["appendLF"]])) { - appendLF <- args[["appendLF"]] - } else { - ## Default value in message function - appendLF <- TRUE - } - if (!is.null(args[["domain"]])) { - domain <- args[["domain"]] - } else { - ## Default value in message function - domain <- NULL - } - args[["appendLF"]] <- NULL - args[["domain"]] <- NULL - - ## To modify strwrap indent and exdent arguments - if (!is.null(args[["indent"]])) { - indent <- args[["indent"]] - } else { - indent <- 0 - } - if (!is.null(args[["exdent"]])) { - exdent <- args[["exdent"]] - } else { - exdent <- 3 - } - args[["indent"]] <- NULL - args[["exdent"]] <- NULL - - ## To modify paste collapse argument - if (!is.null(args[["collapse"]])) { - collapse <- args[["collapse"]] - } else { - collapse <- "\n*" - } - args[["collapse"]] <- NULL - - ## Message tag - if (!is.null(args[["tag"]])) { - tag <- args[["tag"]] - } else { - tag <- "* " - } - args[["tag"]] <- NULL - - tmp <- paste0(tag, - paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) - message(tmp, appendLF = appendLF, domain = domain) -} - -.warning <- function(...) { - # Function to use the 'warning' R function with our custom settings - # Default: no call information, indent to 0, exdent to 3, - # collapse to \n - args <- list(...) - - ## In case we need to specify warning arguments - if (!is.null(args[["call."]])) { - call <- args[["call."]] - } else { - ## Default: don't show info about the call where the warning came up - call <- FALSE - } - if (!is.null(args[["immediate."]])) { - immediate <- args[["immediate."]] - } else { - ## Default value in warning function - immediate <- FALSE - } - if (!is.null(args[["noBreaks."]])) { - noBreaks <- args[["noBreaks."]] - } else { - ## Default value warning function - noBreaks <- FALSE - } - if (!is.null(args[["domain"]])) { - domain <- args[["domain"]] - } else { - ## Default value warning function - domain <- NULL - } - args[["call."]] <- NULL - args[["immediate."]] <- NULL - args[["noBreaks."]] <- NULL - args[["domain"]] <- NULL - - ## To modify strwrap indent and exdent arguments - if (!is.null(args[["indent"]])) { - indent <- args[["indent"]] - } else { - indent <- 0 - } - if (!is.null(args[["exdent"]])) { - exdent <- args[["exdent"]] - } else { - exdent <- 3 - } - args[["indent"]] <- NULL - args[["exdent"]] <- NULL - - ## To modify paste collapse argument - if (!is.null(args[["collapse"]])) { - collapse <- args[["collapse"]] - } else { - collapse <- "\n!" - } - args[["collapse"]] <- NULL - - ## Warning tag - if (!is.null(args[["tag"]])) { - tag <- args[["tag"]] - } else { - tag <- "! Warning: " - } - args[["tag"]] <- NULL - - tmp <- paste0(tag, - paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) - warning(tmp, call. = call, immediate. = immediate, - noBreaks. = noBreaks, domain = domain) -} - -.IsColor <- function(x) { - res <- try(col2rgb(x), silent = TRUE) - return(!is(res, "try-error")) -} - -# This function switches to a specified figure at position (row, col) in a layout. -# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is -# activated, i.e., all drawn elements will be superimposed. Additionally, after -# using this function, the automatical pointing to the next figure in the layout -# will be spoiled: once the last figure in the layout is drawn, the pointer won't -# move to the first figure in the layout. -# Only figures with numbers other than 0 (when creating the layout) will be -# accessible. -# Inputs: either row and col, or n and mat -.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { - if (!is.null(n) && !is.null(mat)) { - if (!is.numeric(n) || length(n) != 1) { - stop("Parameter 'n' must be a single numeric value.") - } - n <- round(n) - if (!is.array(mat)) { - stop("Parameter 'mat' must be an array.") - } - target <- which(mat == n, arr.ind = TRUE)[1, ] - row <- target[1] - col <- target[2] - } else if (!is.null(row) && !is.null(col)) { - if (!is.numeric(row) || length(row) != 1) { - stop("Parameter 'row' must be a single numeric value.") - } - row <- round(row) - if (!is.numeric(col) || length(col) != 1) { - stop("Parameter 'col' must be a single numeric value.") - } - col <- round(col) - } else { - stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") - } - next_attempt <- c(row, col) - par(mfg = next_attempt) - i <- 1 - layout_size <- par('mfrow') - layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], - byrow = TRUE) - while (any((par('mfg')[1:2] != c(row, col)))) { - next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] - par(mfg = next_attempt) - i <- i + 1 - if (i > prod(layout_size)) { - stop("Figure not accessible.") - } - } - plot(0, type = 'n', axes = FALSE, ann = FALSE) - par(mfg = next_attempt) -} - -# Function to permute arrays of non-atomic elements (e.g. POSIXct) -.aperm2 <- function(x, new_order) { - old_dims <- dim(x) - attr_bk <- attributes(x) - if ('dim' %in% names(attr_bk)) { - attr_bk[['dim']] <- NULL - } - if (is.numeric(x)) { - x <- aperm(x, new_order) - } else { - y <- array(seq_along(x), dim = dim(x)) - y <- aperm(y, new_order) - x <- x[as.vector(y)] - } - dim(x) <- old_dims[new_order] - attributes(x) <- c(attributes(x), attr_bk) - x -} - -# This function is a helper for the function .MergeArrays. -# It expects as inputs two named numeric vectors, and it extends them -# with dimensions of length 1 until an ordered common dimension -# format is reached. -# The first output is dims1 extended with 1s. -# The second output is dims2 extended with 1s. -# The third output is a merged dimension vector. If dimensions with -# the same name are found in the two inputs, and they have a different -# length, the maximum is taken. -.MergeArrayDims <- function(dims1, dims2) { - new_dims1 <- NULL - new_dims2 <- NULL - while (length(dims1) > 0) { - if (names(dims1)[1] %in% names(dims2)) { - pos <- which(names(dims2) == names(dims1)[1]) - dims_to_add <- rep(1, pos - 1) - if (length(dims_to_add) > 0) { - names(dims_to_add) <- names(dims2[1:(pos - 1)]) - } - new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) - new_dims2 <- c(new_dims2, dims2[1:pos]) - dims1 <- dims1[-1] - dims2 <- dims2[-(1:pos)] - } else { - new_dims1 <- c(new_dims1, dims1[1]) - new_dims2 <- c(new_dims2, 1) - names(new_dims2)[length(new_dims2)] <- names(dims1)[1] - dims1 <- dims1[-1] - } - } - if (length(dims2) > 0) { - dims_to_add <- rep(1, length(dims2)) - names(dims_to_add) <- names(dims2) - new_dims1 <- c(new_dims1, dims_to_add) - new_dims2 <- c(new_dims2, dims2) - } - list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) -} - -# This function takes two named arrays and merges them, filling with -# NA where needed. -# dim(array1) -# 'b' 'c' 'e' 'f' -# 1 3 7 9 -# dim(array2) -# 'a' 'b' 'd' 'f' 'g' -# 2 3 5 9 11 -# dim(.MergeArrays(array1, array2, 'b')) -# 'a' 'b' 'c' 'e' 'd' 'f' 'g' -# 2 4 3 7 5 9 11 -.MergeArrays <- function(array1, array2, along) { - if (!(is.null(array1) || is.null(array2))) { - if (!(identical(names(dim(array1)), names(dim(array2))) && - identical(dim(array1)[-which(names(dim(array1)) == along)], - dim(array2)[-which(names(dim(array2)) == along)]))) { - new_dims <- .MergeArrayDims(dim(array1), dim(array2)) - dim(array1) <- new_dims[[1]] - dim(array2) <- new_dims[[2]] - for (j in seq_along(dim(array1))) { - if (names(dim(array1))[j] != along) { - if (dim(array1)[j] != dim(array2)[j]) { - if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { - na_array_dims <- dim(array2) - na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] - na_array <- array(dim = na_array_dims) - array2 <- abind(array2, na_array, along = j) - names(dim(array2)) <- names(na_array_dims) - } else { - na_array_dims <- dim(array1) - na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] - na_array <- array(dim = na_array_dims) - array1 <- abind(array1, na_array, along = j) - names(dim(array1)) <- names(na_array_dims) - } - } - } - } - } - if (!(along %in% names(dim(array2)))) { - stop("The dimension specified in 'along' is not present in the ", - "provided arrays.") - } - array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) - names(dim(array1)) <- names(dim(array2)) - } else if (is.null(array1)) { - array1 <- array2 - } - array1 -} - -# only can be used in Trend(). Needs generalization or be replaced by other function. -.reorder <- function(output, time_dim, dim_names) { - # Add dim name back - if (is.null(dim(output))) { - dim(output) <- c(stats = length(output)) - } else { #is an array - if (length(dim(output)) == 1) { - if (!is.null(names(dim(output)))) { - dim(output) <- c(1, dim(output)) - names(dim(output))[1] <- time_dim - } else { - names(dim(output)) <- time_dim - } - } else { # more than one dim - if (names(dim(output))[1] != "") { - dim(output) <- c(1, dim(output)) - names(dim(output))[1] <- time_dim - } else { #regular case - names(dim(output))[1] <- time_dim - } - } - } - # reorder - pos <- match(dim_names, names(dim(output))) - output <- aperm(output, pos) - names(dim(output)) <- dim_names - names(dim(output))[names(dim(output)) == time_dim] <- 'stats' - return(output) -} - -# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R -.Indices <- function(data, type, monini, indices_for_clim, - fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { - - if (type == 'dcpp') { - - fyear_dim <- 'fyear' - data <- Season(data = data, time_dim = fmonth_dim, - monini = monini, moninf = 1, monsup = 12, - method = mean, na.rm = na.rm) - names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim - - if (identical(indices_for_clim, FALSE)) { ## data is already anomalies - - anom <- data - - } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) - - n_fyears <- as.numeric(dim(data)[fyear_dim]) - n_sdates <- as.numeric(dim(data)[sdate_dim]) - - if (is.null(indices_for_clim)) { ## climatology over the whole (common) period - first_years_for_clim <- n_fyears : 1 - last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) - } else { ## indices_for_clim specified as a numeric vector - first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) - last_years_for_clim <- - seq(from = indices_for_clim[length(indices_for_clim)], - by = -1, length.out = n_fyears) - } - - data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) - anom <- array(data = NA, dim = dim(data)) - for (i in 1:n_fyears) { - clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) - anom[i, ] <- data[i, ] - clim - } - } - - } else if (type %in% c('obs', 'hist')) { - - data <- multiApply::Apply(data = data, target_dims = month_dim, - fun = mean, na.rm = na.rm)$output1 - - if (identical(indices_for_clim, FALSE)) { ## data is already anomalies - clim <- 0 - } else if (is.null(indices_for_clim)) { - ## climatology over the whole period - clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, - na.rm = na.rm)$output1 - } else { - ## indices_for_clim specified as a numeric vector - clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, - indices = indices_for_clim), - target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 - } - - anom <- data - clim - - } else { - stop('type must be dcpp, hist or obs') - } - - return(anom) -} - -#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. -# The function is temporarily here because PlotLayout() needs to draw the colorbars of -# PlotMostLikelyQuantileMap(). -#Draws Color Bars for Categories -#A wrapper of s2dv::ColorBar to generate multiple color bars for different -#categories, and each category has different color set. -GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, - bar_limits, var_limits = NULL, - triangle_ends = NULL, plot = TRUE, - draw_separators = FALSE, - bar_titles = NULL, title_scale = 1, - label_scale = 1, extra_margin = rep(0, 4), - ...) { - # bar_limits - if (!is.numeric(bar_limits) || length(bar_limits) != 2) { - stop("Parameter 'bar_limits' must be a numeric vector of length 2.") - } - - # Check brks - if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { - num_brks <- 5 - if (is.numeric(brks)) { - num_brks <- brks - } - brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) - } - if (!is.numeric(brks)) { - stop("Parameter 'brks' must be a numeric vector.") - } - # Check cols - col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), - c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), - c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), - c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), - c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) - if (is.null(cols)) { - if (length(col_sets) >= nmap) { - chosen_sets <- 1:nmap - chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) - } else { - chosen_sets <- array(seq_along(col_sets), nmap) - } - cols <- col_sets[chosen_sets] - } else { - if (!is.list(cols)) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (!all(sapply(cols, is.character))) { - stop("Parameter 'cols' must be a list of character vectors.") - } - if (length(cols) != nmap) { - stop("Parameter 'cols' must be a list of the same length as the number of ", - "maps in 'maps'.") - } - } - for (i in seq_along(cols)) { - if (length(cols[[i]]) != (length(brks) - 1)) { - cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) - } - } - - # Check bar_titles - if (is.null(bar_titles)) { - if (nmap == 3) { - bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") - } else if (nmap == 5) { - bar_titles <- c("Low (%)", "Below normal (%)", - "Normal (%)", "Above normal (%)", "High (%)") - } else { - bar_titles <- paste0("Cat. ", 1:nmap, " (%)") - } - } - - if (plot) { - for (k in 1:nmap) { - s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, -# bar_limits = bar_limits, var_limits = var_limits, - triangle_ends = triangle_ends, plot = TRUE, - draw_separators = draw_separators, - title = bar_titles[[k]], title_scale = title_scale, - label_scale = label_scale, extra_margin = extra_margin) - } - } else { - #TODO: col_inf and col_sup - return(list(brks = brks, cols = cols)) - } - -} - - -- GitLab