From b80a86cfc9f57bebdb27f4c8a1d0e28db92beb7a Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 7 May 2024 18:08:59 +0200 Subject: [PATCH 01/25] temporal agg --- modules/Aggregation/Aggregation.R | 67 +++++++++++++++++++ .../ex0_1_sample_dataset/ex0_1-recipe.yml | 6 ++ 2 files changed, 73 insertions(+) create mode 100644 modules/Aggregation/Aggregation.R diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R new file mode 100644 index 00000000..40018a7d --- /dev/null +++ b/modules/Aggregation/Aggregation.R @@ -0,0 +1,67 @@ +# recipe +## Time_aggregation$execute YES/NO +## Time_aggregation$method accumulated or average +## Time_aggregation$set for decadal +## Time_aggregation$ini based on forecast time +## Time_aggregation$end based on forecast time +## ini = 2, end = 3 with monthly freq would be a 2 months agg +Aggregation(recipe, data) { + + ncores <- recipe$Analysis$ncores # is it already checked? NULL or number + na.rm <- recipe$Analysis$remove_NAs # is it already checked? TRUE/FALSE + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + # original freq + raw_freq <- tolower(recipe$Analysis$Variable$freq) + custom <- recipe$Analysis$Workflow$Time_aggregation$set + ini <- recipe$Analysis$Workflow$Time_aggregation$ini + end <- recipe$Analysis$Workflow$Time_aggregation$end + method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) +## Instead of raw freq may be better to use ini/end +## if (raw_freq %in% c('daily', 'daily_mean', 'weekly', 'weekly_mean')) { + if (!is.null(ini) && !is.null(end)) { + # calculate aggregations from fday1 to fday2 + # calculate aggregations from fweek1 to fweek2 + # other cases could be indicators or other original freq + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method = method, + na.rm = na.rm, ncores = ncores, + ini = ini, end = end) + } else { + result <- data[x] + }}, simplify = TRUE) + + } else if (!is.null(custom)) { + # caluclate aggreagtions from fmonth1 to ftmonth2 + # for decadal, it makes sense to get all seasons and annual means too + + } else { + # higher frequencies are typically used to compute indicators + # lower frequencies are not typically stored in esarchive + } + } +} + +agg_ini_end <- function(x, ini, end, method, na.rm , ncores) { + if (method == 'average') { + x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', + function(y) { + mean(y[ini:end], na.rm = na.rm)}, + ncores = ncores) + } else if (method == 'accumulated') { + x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', + function(y) { + sum(y[ini:end], na.rm = na.rm)}, + ncores = ncores) + } else { + stop("Unknown method") + } + dims <- x[[1]]$dims + dims['time'] <- length(ini:end) + x[[1]]$dims <- dims + x[[1]]$coords$time <- ini:end + attr(x[[1]]$coords$time, "indices") <- TRUE + x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = ini:end) + return(x) +} diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 662e75cc..bedb1983 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -28,6 +28,12 @@ Analysis: method: bilinear type: to_system Workflow: + Time_aggregation: + execute: yes + set: no + method: average + ini: 2 + end: 3 Anomalies: compute: yes cross_validation: yes -- GitLab From 2ceaaca30ca423f1a24f09978c92ec2d56b3b2a9 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 8 May 2024 18:16:11 +0200 Subject: [PATCH 02/25] ini and end and list of indices --- modules/Aggregation/Aggregation.R | 86 +++++++++++++------ .../ex0_1_sample_dataset/ex0_1-recipe.yml | 8 +- 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 40018a7d..6a026e57 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -1,18 +1,21 @@ # recipe ## Time_aggregation$execute YES/NO ## Time_aggregation$method accumulated or average -## Time_aggregation$set for decadal +## Time_aggregation$user_def for decadal ## Time_aggregation$ini based on forecast time ## Time_aggregation$end based on forecast time +# For date definition of period aggregation consider using Indicators module ## ini = 2, end = 3 with monthly freq would be a 2 months agg -Aggregation(recipe, data) { +Aggregation <- function(recipe, data) { +## Do we need this checks here? ncores <- recipe$Analysis$ncores # is it already checked? NULL or number - na.rm <- recipe$Analysis$remove_NAs # is it already checked? TRUE/FALSE + na.rm <- ifelse(is.null(recipe$Analysis$remove_NAs), + TRUE, recipe$Analysis$remove_NAs) # is it already checked? TRUE/FALSE if (recipe$Analysis$Workflow$Time_aggregation$execute) { # original freq raw_freq <- tolower(recipe$Analysis$Variable$freq) - custom <- recipe$Analysis$Workflow$Time_aggregation$set + custom <- recipe$Analysis$Workflow$Time_aggregation$user_def ini <- recipe$Analysis$Workflow$Time_aggregation$ini end <- recipe$Analysis$Workflow$Time_aggregation$end method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) @@ -22,46 +25,73 @@ Aggregation(recipe, data) { # calculate aggregations from fday1 to fday2 # calculate aggregations from fweek1 to fweek2 # other cases could be indicators or other original freq - res <- sapply(1:length(data), function(x) { - if (!is.null(data[[x]])) { - result <- agg_ini_end(data[x], method = method, - na.rm = na.rm, ncores = ncores, - ini = ini, end = end) - } else { - result <- data[x] - }}, simplify = TRUE) - - } else if (!is.null(custom)) { + if (length(ini) == length(end)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method = method, + na.rm = na.rm, ncores = ncores, + ini = ini, end = end, indices = NULL) + } else { + result <- data[x] + }}, simplify = TRUE) + } + } else if (!is.ll(custom)) { # caluclate aggreagtions from fmonth1 to ftmonth2 # for decadal, it makes sense to get all seasons and annual means too - + ## Ex: January, February, March for decadal simulations aggregation + ## custom <- sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + if (is.list(custom)) { + res <- sapply(1:length(data), function(x) { + if (!is.null(data[[x]])) { + result <- agg_ini_end(data[x], method, na.rm, ncores, + indices = custom, ini = NULL, end = NULL) + } else { + result <- data[x]}}, simplify = TRUE) + } else { + stop("Why Time_aggregation$user_def is not a list?") + } } else { # higher frequencies are typically used to compute indicators # lower frequencies are not typically stored in esarchive + stop("Temporal aggregation not implemented yet") } } } -agg_ini_end <- function(x, ini, end, method, na.rm , ncores) { +agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { + # to make the code work with both options: + if (!is.null(ini) && is.null(indices)) { + # list of vectors for the indices from ini to end pairs + indices <- lapply(1:length(ini), function(x) { + ini[x]:end[x]}) + } else { + # take the firs and las element of each indices list for time_bounds saving + ini <- unlist(lapply(indices, function(x){x[1]})) + end <- unlist(lapply(indices, function(x){x[length(x)]})) + } if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y) { - mean(y[ini:end], na.rm = na.rm)}, - ncores = ncores) + function(y, ind) { + sapply(1:length(indices), function(z) { + mean(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', + ncores = ncores)$output1 } else if (method == 'accumulated') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y) { - sum(y[ini:end], na.rm = na.rm)}, - ncores = ncores) + function(y, ind) { + sapply(1:length(indices), function(z) { + sum(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', + ncores = ncores)$output1 } else { stop("Unknown method") - } - dims <- x[[1]]$dims - dims['time'] <- length(ini:end) - x[[1]]$dims <- dims - x[[1]]$coords$time <- ini:end + } + x[[1]]$dims <- dim(x[[1]]$data) + x[[1]]$coords$time <- 1:length(ini) attr(x[[1]]$coords$time, "indices") <- TRUE + x[[1]]$attrs$time_bounds$start <- x[[1]]$attrs$Dates + x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = end) x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = ini:end) + indices = ini) + return(x) } diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index bedb1983..7c8fcb2d 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -30,10 +30,12 @@ Analysis: Workflow: Time_aggregation: execute: yes - set: no + user_def: + - [1, 3] + - !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) method: average - ini: 2 - end: 3 + ini: [1, 2, 1] + end: [2, 3, 3] Anomalies: compute: yes cross_validation: yes -- GitLab From e156dfa8b29cd595353dc290c701194105639c67 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 9 May 2024 17:27:43 +0200 Subject: [PATCH 03/25] fix attrs and add example --- example_scripts/exec_timeagg.R | 41 +++++++++++ modules/Aggregation/Aggregation.R | 9 +-- modules/Visualization/R/plot_metrics.R | 9 ++- .../examples/recipe_tas_seasonal_timeagg.yml | 70 +++++++++++++++++++ 4 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 example_scripts/exec_timeagg.R create mode 100644 recipes/examples/recipe_tas_seasonal_timeagg.yml diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R new file mode 100644 index 00000000..1f34193c --- /dev/null +++ b/example_scripts/exec_timeagg.R @@ -0,0 +1,41 @@ +rm(list=ls()) +gc() +setwd("/esarchive/scratch/nperez/git/auto-s2s") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") +source("modules/Aggregtation/Aggregation.") + +# Read recipe +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe <- read_atomic_recipe(recipe_file) +## to test a single recipe: + # recipe_file <- "recipes/examples/recipe_tas_seasonal_timeagg.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" + +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +# Units transformation +source("modules/Units/Units.R") +test <- Units(recipe, data) +# Temporal aggregation +test <- Aggregation(recipe = recipe, data = data) +# Calibrate datasets +data <- Calibration(recipe, test) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Export all data to netCDF +## TODO: Fix plotting +# save_data(recipe, data, skill_metrics, probabilities) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 6a026e57..445d1d55 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -86,12 +86,13 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { } x[[1]]$dims <- dim(x[[1]]$data) x[[1]]$coords$time <- 1:length(ini) - attr(x[[1]]$coords$time, "indices") <- TRUE - x[[1]]$attrs$time_bounds$start <- x[[1]]$attrs$Dates + attr(x[[1]]$coords$time, "indices") <- TRUE + tmp_dates <- Subset(x[[1]]$attrs$Dates, along = 'time', + indices = ini) + x[[1]]$attrs$time_bounds$start <- tmp_dates x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', indices = end) - x[[1]]$attrs$Dates <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = ini) + x[[1]]$attrs$Dates <- tmp_dates return(x) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 92b707fa..e909a1f9 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -206,7 +206,14 @@ plot_metrics <- function(recipe, data_cube, metrics, # Define titles toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) - titles <- as.vector(months) + ## time_bounds in data_cube to know if Time_aggregation was applied + if (!is.null(data_cube$attrs$time_bounds$start)) { + info(logger, paste("Time_bounds found in data_cube", + "using them for titles")) + titles <- "What" + } else { + titles <- as.vector(months) + } ## TODO: Combine PlotLayout with PlotRobinson? suppressWarnings( PlotLayout(PlotEquiMap, c('longitude', 'latitude'), diff --git a/recipes/examples/recipe_tas_seasonal_timeagg.yml b/recipes/examples/recipe_tas_seasonal_timeagg.yml new file mode 100644 index 00000000..b62ac79a --- /dev/null +++ b/recipes/examples/recipe_tas_seasonal_timeagg.yml @@ -0,0 +1,70 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: K + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2009' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 3, 2] + end: [3, 6, 4] + Calibration: + execute: yes + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ -- GitLab From f7baf09576063ceb6fcce9a40483e5ec95c650f6 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 9 May 2024 18:44:38 +0200 Subject: [PATCH 04/25] plotting attr in time_bound --- example_scripts/exec_timeagg.R | 6 +-- modules/Aggregation/Aggregation.R | 6 ++- modules/Visualization/R/plot_metrics.R | 52 ++++++++++++++----- .../ex0_1_sample_dataset/ex0_1-recipe.yml | 4 +- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 1f34193c..3b688985 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -25,11 +25,11 @@ recipe <- prepare_outputs(recipe_file) data <- Loading(recipe) # Units transformation source("modules/Units/Units.R") -test <- Units(recipe, data) +data <- Units(recipe, data) # Temporal aggregation -test <- Aggregation(recipe = recipe, data = data) +data <- Aggregation(recipe = recipe, data = data) # Calibrate datasets -data <- Calibration(recipe, test) +data <- Calibration(recipe, data) # Compute skill metrics skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 445d1d55..c45370fd 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -64,10 +64,12 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { # list of vectors for the indices from ini to end pairs indices <- lapply(1:length(ini), function(x) { ini[x]:end[x]}) + plotting_attr <- list(ini_ftime = ini, end_ftime = end) } else { # take the firs and las element of each indices list for time_bounds saving ini <- unlist(lapply(indices, function(x){x[1]})) end <- unlist(lapply(indices, function(x){x[length(x)]})) + plotting_attr <- list(names(indices)) } if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', @@ -92,7 +94,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { x[[1]]$attrs$time_bounds$start <- tmp_dates x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', indices = end) - x[[1]]$attrs$Dates <- tmp_dates - + attributes(x[[1]]$attrs$time_bounds)$plotting_attr <- plotting_attr + x[[1]]$attrs$Dates <- tmp_dates return(x) } diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index e909a1f9..87baaebe 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -172,7 +172,6 @@ plot_metrics <- function(recipe, data_cube, metrics, col_sup <- NULL } - # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, @@ -207,10 +206,15 @@ plot_metrics <- function(recipe, data_cube, metrics, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) ## time_bounds in data_cube to know if Time_aggregation was applied - if (!is.null(data_cube$attrs$time_bounds$start)) { - info(logger, paste("Time_bounds found in data_cube", - "using them for titles")) - titles <- "What" + if (!is.null(attributes(data_cube$attrs$time_bounds))) { + info(recipe$Run$logger, "Using plotting attrs from time_bounds.") + titles <- unlist( + lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), + function(x) { + paste("Forecast time", + attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], + "to", + attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) } else { titles <- as.vector(months) } @@ -273,17 +277,37 @@ plot_metrics <- function(recipe, data_cube, metrics, # Loop over forecast times for (i in 1:dim(metric)[['time']]) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 + # Case without time aggregation: + if (is.null(attributes(data_cube$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Define plot title + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", months[i], "/", + hcst_period) + } else { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", + hcst_period) } - forecast_time <- sprintf("%02d", forecast_time) - # Define plot title - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", months[i], "/", - hcst_period) # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 7c8fcb2d..d36b0324 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -31,8 +31,8 @@ Analysis: Time_aggregation: execute: yes user_def: - - [1, 3] - - !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + ftimes: [1, 3] + JJA: !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) method: average ini: [1, 2, 1] end: [2, 3, 3] -- GitLab From 36e11d219bab785da5843975eef9bf1b1f0d35b2 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 11:41:00 +0200 Subject: [PATCH 05/25] working version for ini and end plots --- example_scripts/exec_timeagg.R | 9 ++-- modules/Visualization/R/plot_ensemble_mean.R | 40 +++++++++++++--- .../R/plot_most_likely_terciles_map.R | 46 +++++++++++++++---- 3 files changed, 77 insertions(+), 18 deletions(-) diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 3b688985..6444bc70 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -1,6 +1,5 @@ rm(list=ls()) gc() -setwd("/esarchive/scratch/nperez/git/auto-s2s") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -9,7 +8,8 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("tools/prepare_outputs.R") -source("modules/Aggregtation/Aggregation.") +source("modules/Units/Units.R") +source("modules/Aggregation/Aggregation.R") # Read recipe #args = commandArgs(trailingOnly = TRUE) @@ -24,7 +24,6 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) # Units transformation -source("modules/Units/Units.R") data <- Units(recipe, data) # Temporal aggregation data <- Aggregation(recipe = recipe, data = data) @@ -38,4 +37,6 @@ probabilities <- Probabilities(recipe, data) ## TODO: Fix plotting # save_data(recipe, data, skill_metrics, probabilities) # Plot data -Visualization(recipe, data, skill_metrics, probabilities, significance = T) +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) + diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 59881864..cd4d9bb7 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -118,8 +118,26 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) + } else { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { @@ -174,11 +192,15 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Loop over forecast times for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -209,7 +231,13 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } # Modify base arguments base_args[[1]] <- i_var_ens_mean[i, , ] - fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + } else { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } base_args$mask <- mask_i base_args$dots <- dots_i # Plot diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index c9ce70f2..ea0f2b90 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -42,7 +42,6 @@ plot_most_likely_terciles <- function(recipe, ## TODO: Sort out decadal initial month (is it always January?) init_month <- 1 } - # Retrieve and rearrange probability bins for the forecast if (is.null(probabilities$probs_fcst$prob_b33) || is.null(probabilities$probs_fcst$prob_33_to_66) || @@ -113,8 +112,27 @@ plot_most_likely_terciles <- function(recipe, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / Initialization: ", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F,locale = "en_GB") + if (is.null(attributes(fcst$attrs$time_bounds))) { + months <- lubridate::month( + fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + } else { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } + years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { ## TODO: Ensure this works for daily and sub-daily cases @@ -170,11 +188,16 @@ plot_most_likely_terciles <- function(recipe, base_args[names(output_configuration)] <- output_configuration for (i in 1:length(months)) { # Get forecast time label - forecast_time <- match(months[i], month.name) - init_month + 1 - if (forecast_time < 1) { - forecast_time <- forecast_time + 12 + if (is.null(attributes(fcst$attrs$time_bounds))) { + forecast_time <- match(months[i], month.name) - init_month + 1 + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + } else { + forecast_time <- months } - forecast_time <- sprintf("%02d", forecast_time) + # Get mask subset if (!is.null(mask)) { mask_i <- Subset(var_mask, along = 'time', indices = i, drop = TRUE) @@ -196,7 +219,14 @@ plot_most_likely_terciles <- function(recipe, format(as.Date(i_syear, format="%Y%m%d"), "%d-%m-%Y")) # Plot - fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + if (is.null(attributes(fcst$attrs$time_bounds))) { + fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + } else { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } + base_args$probs <- i_var_probs[i, , , ] base_args$mask <- mask_i base_args$dots <- dots_i -- GitLab From 27578fe3789f835dc9bcc8afc739ccdb97e0273e Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 15:54:00 +0200 Subject: [PATCH 06/25] tesing decadal plots and user_def param --- example_scripts/exec_timeagg.R | 2 +- example_scripts/test_decadal.R | 11 +-- modules/Aggregation/Aggregation.R | 2 +- modules/Visualization/R/plot_ensemble_mean.R | 69 ++++++++++++------- modules/Visualization/R/plot_metrics.R | 47 ++++++++----- .../R/plot_most_likely_terciles_map.R | 61 ++++++++++------ recipes/atomic_recipes/recipe_decadal.yml | 28 +++++--- .../examples/recipe_prlr_seasonal_timeagg.yml | 63 +++++++++++++++++ tools/prepare_outputs.R | 2 +- tools/read_atomic_recipe.R | 2 +- 10 files changed, 205 insertions(+), 82 deletions(-) create mode 100644 recipes/examples/recipe_prlr_seasonal_timeagg.yml diff --git a/example_scripts/exec_timeagg.R b/example_scripts/exec_timeagg.R index 6444bc70..66df62b8 100644 --- a/example_scripts/exec_timeagg.R +++ b/example_scripts/exec_timeagg.R @@ -17,7 +17,7 @@ source("modules/Aggregation/Aggregation.R") #recipe <- read_atomic_recipe(recipe_file) ## to test a single recipe: # recipe_file <- "recipes/examples/recipe_tas_seasonal_timeagg.yml" - # recipe_file <- "recipes/examples/recipe_prlr_seasonal_units.yml" + # recipe_file <- "recipes/examples/recipe_prlr_seasonal_timeagg.yml" recipe <- prepare_outputs(recipe_file) diff --git a/example_scripts/test_decadal.R b/example_scripts/test_decadal.R index 12daa540..de9eae6a 100644 --- a/example_scripts/test_decadal.R +++ b/example_scripts/test_decadal.R @@ -9,7 +9,7 @@ source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") - +source("modules/Aggregation/Aggregation.R") recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive @@ -17,16 +17,19 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) +data <- Aggregation(recipe = recipe, data = data) # Calibrate datasets calibrated_data <- Calibration(recipe, data) # Compute skill metrics -skill_metrics <- Skill(recipe, calibrated_data) +skill_metrics <- Skill(recipe, data) # Compute percentiles and probability bins -probabilities <- Probabilities(recipe, calibrated_data) +probabilities <- Probabilities(recipe, data) # Plot data -Visualization(recipe, calibrated_data, skill_metrics, probabilities, +Visualization(recipe = recipe, data = data, + skill_metrics = skill_metrics, + probabilities = probabilities, significance = T) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index c45370fd..cb23a0dd 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -35,7 +35,7 @@ Aggregation <- function(recipe, data) { result <- data[x] }}, simplify = TRUE) } - } else if (!is.ll(custom)) { + } else if (!is.null(custom)) { # caluclate aggreagtions from fmonth1 to ftmonth2 # for decadal, it makes sense to get all seasons and annual means too ## Ex: January, February, March for decadal simulations aggregation diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index cd4d9bb7..261dcc74 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -123,20 +123,24 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) } else { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) @@ -199,7 +203,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } forecast_time <- sprintf("%02d", forecast_time) } else { - forecast_time <- months + forecast_time <- months } # Get mask subset if (!is.null(mask)) { @@ -213,14 +217,23 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { dots_i <- NULL } - # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Ensemble Mean / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + # Define plot title + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Ensemble Mean / ", + months[i], + " / Start date: ", + i_syear) + } # Define caption if (identical(fun, PlotRobinson)) { ## TODO: Customize technical details @@ -234,9 +247,13 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o if (is.null(attributes(fcst$attrs$time_bounds))) { fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") } else { - fileout <- paste0(outfile, "_ft", - attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", - attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", + attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", + attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } } base_args$mask <- mask_i base_args$dots <- dots_i diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 87baaebe..2df06e8b 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -208,13 +208,17 @@ plot_metrics <- function(recipe, data_cube, metrics, ## time_bounds in data_cube to know if Time_aggregation was applied if (!is.null(attributes(data_cube$attrs$time_bounds))) { info(recipe$Run$logger, "Using plotting attrs from time_bounds.") - titles <- unlist( + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + titles <- unlist( lapply(1:length(attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime), function(x) { paste("Forecast time", attributes(data_cube$attrs$time_bounds)$plotting_attr$ini_ftime[x], "to", attributes(data_cube$attrs$time_bounds)$plotting_attr$end_ftime[x])})) + } else { + titles <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]] + } } else { titles <- as.vector(months) } @@ -291,22 +295,31 @@ plot_metrics <- function(recipe, data_cube, metrics, "\n", display_name, "/", months[i], "/", hcst_period) } else { - forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] - forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) - # title names: - forecast_time_ini <- init_month + forecast_time_ini - 1 - forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) - forecast_time_ini <- month.name[forecast_time_ini] - forecast_time_end <- init_month + forecast_time_end - 1 - forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) - forecast_time_end <- month.name[forecast_time_end] - toptitle <- paste(system_name, "/", - str_to_title(var_long_name), - "\n", display_name, "/", forecast_time_ini, "to", - forecast_time_end, "/", - hcst_period) + if (length(attributes(data_cube$attrs$time_bounds)$plotting_attr) > 1) { + forecast_time_ini <-attributes(data_cube$attrs$time_bounds)$plotting_attr$ini[i] + forecast_time_end <- attributes(data_cube$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + forecast_time <- paste0(forecast_time_ini, "-", forecast_time_end) + # title names: + forecast_time_ini <- init_month + forecast_time_ini - 1 + forecat_time_ini <- ifelse(forecast_time_ini > 12, forecast_time_ini - 12, forecast_time_ini) + forecast_time_ini <- month.name[forecast_time_ini] + forecast_time_end <- init_month + forecast_time_end - 1 + forecat_time_end <- ifelse(forecast_time_end > 12, forecast_time_end - 12, forecast_time_end) + forecast_time_end <- month.name[forecast_time_end] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", forecast_time_ini, "to", + forecast_time_end, "/", + hcst_period) + } else { + forecast_time <- attributes(data_cube$attrs$time_bounds)$plotting_attr[[1]][i] + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", + forecast_time, "/", + hcst_period) + } } # Modify base arguments base_args[[1]] <- metric[i, , ] diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index ea0f2b90..5455f0f6 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -117,20 +117,24 @@ plot_most_likely_terciles <- function(recipe, fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) } else { - months <- unlist( - lapply(1:length(fcst$attrs$time_bounds$start), function(i) { - ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] - ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] - # labels for file name: - ftime <- paste0(ftime_ini, "-", ftime_end) - # title names: - ftime_ini <- init_month + ftime_ini - 1 - ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) - ftime_ini <- month.name[ftime_ini] - ftime_end <- init_month + ftime_end - 1 - ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) - ftime_end <- month.name[ftime_end] - toptitle <- paste(ftime_ini, "to", ftime_end)})) + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + months <- unlist( + lapply(1:length(fcst$attrs$time_bounds$start), function(i) { + ftime_ini <- attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i] + ftime_end <- attributes(fcst$attrs$time_bounds)$plotting_attr$end[i] + # labels for file name: + ftime <- paste0(ftime_ini, "-", ftime_end) + # title names: + ftime_ini <- init_month + ftime_ini - 1 + ftime_ini <- ifelse(ftime_ini > 12, ftime_ini - 12, ftime_ini) + ftime_ini <- month.name[ftime_ini] + ftime_end <- init_month + ftime_end - 1 + ftime_end <- ifelse(ftime_end > 12, ftime_end - 12, ftime_end) + ftime_end <- month.name[ftime_end] + toptitle <- paste(ftime_ini, "to", ftime_end)})) + } else { + months <- attributes(fcst$attrs$time_bounds)$plotting_attr[[1]] + } } years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) @@ -211,20 +215,33 @@ plot_most_likely_terciles <- function(recipe, dots_i <- NULL } # Define plot title - toptitle <- paste0(system_name, " / ", - str_to_title(var_long_name), - "\n", "Most Likely Tercile / ", - months[i], " ", years[i], - " / Start date: ", - format(as.Date(i_syear, format="%Y%m%d"), - "%d-%m-%Y")) + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], " ", years[i], + " / Start date: ", + format(as.Date(i_syear, format="%Y%m%d"), + "%d-%m-%Y")) + } else { + toptitle <- paste0(system_name, " / ", + str_to_title(var_long_name), + "\n", "Most Likely Tercile / ", + months[i], + " / Start date: ", + i_syear) + } # Plot if (is.null(attributes(fcst$attrs$time_bounds))) { fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") } else { - fileout <- paste0(outfile, "_ft", + if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { + fileout <- paste0(outfile, "_ft", attributes(fcst$attrs$time_bounds)$plotting_attr$ini[i], "-", attributes(fcst$attrs$time_bounds)$plotting_attr$end[i], ".pdf") + } else { + fileout <- paste0(outfile, "_ft", months[i], ".pdf") + } } base_args$probs <- i_var_probs[i, , , ] diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 26312b34..fe0f2479 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -18,7 +18,7 @@ Analysis: hcst_start: 1990 hcst_end: 1993 # season: 'Annual' - ftime_min: 2 + ftime_min: 1 ftime_max: 24 Region: latmin: 10 #-90 @@ -29,29 +29,39 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Time_aggregation: + execute: yes + method: 'average' + user_def: + JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12))) + MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12))) + SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12))) + DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12))) + Y1: !expr 1:12 Anomalies: compute: no cross_validation: save: Calibration: method: bias - save: 'all' + save: 'none' Skill: - metric: RPSS Corr - save: 'all' + metric: RPSS EnsCorr Mean_Bias + save: 'none' Probabilities: percentiles: [[1/3, 2/3]] - save: 'all' + save: 'none' Indicators: index: FALSE Visualization: plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles - ncores: # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + file_format: PNG + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/nperez/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/recipes/examples/recipe_prlr_seasonal_timeagg.yml b/recipes/examples/recipe_prlr_seasonal_timeagg.yml new file mode 100644 index 00000000..e03428ac --- /dev/null +++ b/recipes/examples/recipe_prlr_seasonal_timeagg.yml @@ -0,0 +1,63 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr + freq: monthly_mean + units: mm + Datasets: + System: + name: ECMWF-SEAS5.1 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0601' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 30 # Mandatory, int: minimum latitude + latmax: 50 # Mandatory, int: maximum latitude + lonmin: -10 # Mandatory, int: minimum longitude + lonmax: 10 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git/s2s-suite/ diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index c571a3b8..c545542a 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -28,7 +28,7 @@ prepare_outputs <- function(recipe_file, # recipe_file: path to recipe YAML file # disable_checks: If TRUE, does not perform checks on recipe # disable_uniqueID: If TRUE, does not add a unique ID to output dir - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R index fb26cb11..3cb11e7e 100644 --- a/tools/read_atomic_recipe.R +++ b/tools/read_atomic_recipe.R @@ -23,7 +23,7 @@ read_atomic_recipe <- function(recipe_file) { # recipe_file: path to recipe YAML file - recipe <- read_yaml(recipe_file) + recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) # Create log file for atomic recipe -- GitLab From 7857d1adfa4ff54727178a5dc69741733ce45512 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 May 2024 16:38:01 +0200 Subject: [PATCH 07/25] test saving --- recipes/atomic_recipes/recipe_decadal.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index fe0f2479..2028fc46 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -44,13 +44,13 @@ Analysis: save: Calibration: method: bias - save: 'none' + save: 'all' Skill: metric: RPSS EnsCorr Mean_Bias - save: 'none' + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] - save: 'none' + save: 'all' Indicators: index: FALSE Visualization: -- GitLab From 9863ac0850e01eba8c4725f58e4cee0ac90a6f2c Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 14 May 2024 17:46:01 +0200 Subject: [PATCH 08/25] fix dimensions order --- modules/Aggregation/Aggregation.R | 6 + .../recipe-seasonal_monthly_1_timeagg.yml | 61 ++++++ .../testthat/test-seasonal_monthly_timeagg.R | 177 ++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 tests/recipes/recipe-seasonal_monthly_1_timeagg.yml create mode 100644 tests/testthat/test-seasonal_monthly_timeagg.R diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index cb23a0dd..5eab4aff 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -71,6 +71,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { end <- unlist(lapply(indices, function(x){x[length(x)]})) plotting_attr <- list(names(indices)) } + original_dims <- names(dim(x[[1]]$data)) if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { @@ -86,6 +87,11 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { } else { stop("Unknown method") } + # Check in case only 1 aggregation only is requested to add time dim back: + if (!('time' %in% names(dim(data[[1]]$data)))) { + dim(x[[1]]$data) <- c( dim(x[[1]]$data), time = 1) + } + x[[1]]$data <- Reorder(x[[1]]$data, original_dims) x[[1]]$dims <- dim(x[[1]]$data) x[[1]]$coords$time <- 1:length(ini) attr(x[[1]]$coords$time, "indices") <- TRUE diff --git a/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml new file mode 100644 index 00000000..0cd5746f --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_timeagg.yml @@ -0,0 +1,61 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 4 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Time_aggregation: + execute: yes #do we need this? + method: average #accumulated + ini: [1, 2] # aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + end: [3, 4] + + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: Robinson + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nperez/git4/sunset/ diff --git a/tests/testthat/test-seasonal_monthly_timeagg.R b/tests/testthat/test-seasonal_monthly_timeagg.R new file mode 100644 index 00000000..2b08d388 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_timeagg.R @@ -0,0 +1,177 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_timeagg.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Aggregated data +suppressWarnings({invisible(capture.output( +aggregated_data <- Aggregation(recipe, data) +))}) + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- Calibration(recipe, aggregated_data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, calibrated_data) +))}) + +suppressWarnings({invisible(capture.output( +probs <- Probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +Saving(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Time Aggregation", { +expect_equal( +is.list(aggregated_data), +TRUE +) +expect_equal( +names(aggregated_data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(aggregated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(aggregated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(aggregated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 2, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(aggregated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 2, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +mean(aggregated_data$fcst$data), +291.6194, +tolerance = 0.0001 +) +expect_equal( +mean(aggregated_data$hcst$data), +290.6241, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(aggregated_data$hcst$data)[1, , 2, 3, 4]), +c(290.5897, 290.2554), +tolerance = 0.0001 +) + +expect_equal( +range(aggregated_data$fcst$data), +c(287.4032, 296.3530), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_significance", "corr_individual_members", "corr_individual_members_significance", + "enscorr_specs") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(var = 1, time = 2, latitude = 3, longitude = 3) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(skill_metrics$rpss[, , 2, 3]), +c(-1.384229, -1.147657), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$rpss_significance[, , 2, 3]), +rep(FALSE, 2) +) + +}) + +test_that("4. Saving", { +outputs <- paste0(recipe$Run$output_dir, "/outputs/") +expect_equal( +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc")), +TRUE +) +expect_equal( +length(list.files(outputs, recursive = T)), +17 +) + +}) + +test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("crpss-november.pdf", "enscorr_specs-november.pdf", "enscorr-november.pdf", + "forecast_ensemble_mean-20201101.pdf", "forecast_most_likely_tercile-20201101.pdf", + "rpss-november.pdf")), +TRUE +) +expect_equal( +length(list.files(plots, recursive = T)), +6 +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) -- GitLab From 616121f3710422f905aa4d78ee36d1b1522b2f88 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 May 2024 12:48:58 +0200 Subject: [PATCH 09/25] test time_bounds attributes --- .../testthat/test-seasonal_monthly_timeagg.R | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/testthat/test-seasonal_monthly_timeagg.R b/tests/testthat/test-seasonal_monthly_timeagg.R index 2b08d388..21977d9c 100644 --- a/tests/testthat/test-seasonal_monthly_timeagg.R +++ b/tests/testthat/test-seasonal_monthly_timeagg.R @@ -57,6 +57,38 @@ is.list(aggregated_data), TRUE ) expect_equal( +"time_bounds" %in% names(aggregated_data$hcst$attrs), +TRUE +) +expect_equal( +is.list(aggregated_data$hcst$attrs$time_bounds), +TRUE +) +expect_equal( +names(aggregated_data$hcst$attrs$time_bounds), +c("start", "end") +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$time_bounds$end) +) +expect_equal( +dim(aggregated_data$hcst$attrs$time_bounds$start), +dim(aggregated_data$hcst$attrs$Dates) +) +expect_equal( +"plotting_attr" %in% names(attributes(aggregated_data$hcst$attrs$time_bounds)), +TRUE +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$ini_ftime, +c(1,2) +) +expect_equal( +attributes(aggregated_data$hcst$attrs$time_bounds)$plotting_attr$end_ftime, +c(3,4) +) +expect_equal( names(aggregated_data), c("hcst", "fcst", "obs") ) -- GitLab From 6472d20fe272fd59c4b6a00d86723b84284ecb33 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 May 2024 17:52:56 +0200 Subject: [PATCH 10/25] check recipe --- tools/check_recipe.R | 73 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index be38ae71..68637281 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -301,7 +301,78 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- - + # Time_Aggregation + if ("Time_aggregation" %in% names(recipe$Analysis$Workflow)) { + if (!("execute" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + recipe$Analysis$Workflow$Time_aggregation$execute <- FALSE + } else { + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + if (!("method" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + "No Time_aggregation method defined in recipe.") + } else { + if (!tolower(recipe$Analysis$Workflow$Time_aggregation$method) %in% + c('average', 'accumulation')) { + error(recipe$Run$logger, + "Time_aggregation method should be either average or accumulation.") + } + } + if ((any(!c('ini','end') %in% names(recipe$Analysis$Workflow$Time_aggregation)) && + !'user_def' %in% names(recipe$Analysis$Workflow$Time_aggregation))) { + error(recipe$Run$logger, + paste("Missing parameters in Time aggregation section", + "add 'ini' and 'end' or 'user_def'.")) + } + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { + if (!is.numeric(recipe$Analysis$Workflow$Time_aggregation$ini) || + !is.numeric(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + "ini and end in Time aggregation need to be numeric") + } else { + if (length(recipe$Analysis$Workflow$Time_aggregation$ini) != + length(recipe$Analysis$Workflow$Time_aggregation$end)) { + error(recipe$Run$logger, + paste("The numeber of ini and end parameters need to be", + "the same for Time aggregation")) + } + max_ind <- max(c(recipe$Analysis$Workflow$Time_aggregation$ini, + recipe$Analysis$Workflow$Time_aggregation$end)) + if (recipe$Analysis$Time$ftime_max < max_ind) { + error(recipe$Run$logger, + paste("Forecast time max required is smaller than indices", + "requested for time aggregation")) + } + } + } + if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if (c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL + warn(recipe$Run$logger, + "Only 'ini' and 'end' indices are used for time agg.") + } + if (is.list(recipe$Analysis$Workflow$Time_aggregation$user_def)) { + if (is.character(names(recipe$Analysis$Workflow$Time_aggregation$user_def))) { + if (!all(lapply(recipe$Analysis$Workflow$Time_aggregation$user_def, + is.numeric))) { + error(recipe$Run$logger, + "User_def indices in Time aggregation are expected", + "to be numeric.") + } + } else { + warn(recipe$Run$logger, + paste("Not names provided to the list of user_def indices", + "Time aggregation, review plots and file names.")) + } + } else { + error(recipe$Run$logger, + "user_def in Time aggregation should be a named list") + } + } + } + } + } # Calibration # If 'method' is FALSE/no/'none' or NULL, set to 'raw' ## TODO: Review this check -- GitLab From 769cff64918dda33bb52e2c231b64f0010cb70e8 Mon Sep 17 00:00:00 2001 From: nperez Date: Thu, 16 May 2024 12:17:43 +0200 Subject: [PATCH 11/25] fix if condition --- tools/check_recipe.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 68637281..0a412b61 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -346,8 +346,8 @@ check_recipe <- function(recipe) { } } if ('user_def' %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { - if (c('ini', 'end') %in% - tolower(names(recipe$Analysis$Workflow$Time_aggregation))) { + if (all(c('ini', 'end') %in% + tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL warn(recipe$Run$logger, "Only 'ini' and 'end' indices are used for time agg.") -- GitLab From 02a96204fafc2c3f223a4fc4e802f15c61f064bf Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 17 May 2024 14:49:41 +0200 Subject: [PATCH 12/25] Create file for agg_ini_end(); formatting --- modules/Aggregation/Aggregation.R | 57 ++++------------------------- modules/Aggregation/R/agg_ini_end.R | 56 ++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 50 deletions(-) create mode 100644 modules/Aggregation/R/agg_ini_end.R diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 5eab4aff..82d911ed 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -6,9 +6,11 @@ ## Time_aggregation$end based on forecast time # For date definition of period aggregation consider using Indicators module ## ini = 2, end = 3 with monthly freq would be a 2 months agg +source("modules/Aggregation/R/agg_ini_end.R") + Aggregation <- function(recipe, data) { -## Do we need this checks here? + ## TODO: Move checks to recipe checker ncores <- recipe$Analysis$ncores # is it already checked? NULL or number na.rm <- ifelse(is.null(recipe$Analysis$remove_NAs), TRUE, recipe$Analysis$remove_NAs) # is it already checked? TRUE/FALSE @@ -19,8 +21,8 @@ Aggregation <- function(recipe, data) { ini <- recipe$Analysis$Workflow$Time_aggregation$ini end <- recipe$Analysis$Workflow$Time_aggregation$end method <- tolower(recipe$Analysis$Workflow$Time_aggregation$method) -## Instead of raw freq may be better to use ini/end -## if (raw_freq %in% c('daily', 'daily_mean', 'weekly', 'weekly_mean')) { + ## Instead of raw freq may be better to use ini/end + ## if (raw_freq %in% c('daily', 'daily_mean', 'weekly', 'weekly_mean')) { if (!is.null(ini) && !is.null(end)) { # calculate aggregations from fday1 to fday2 # calculate aggregations from fweek1 to fweek2 @@ -56,51 +58,6 @@ Aggregation <- function(recipe, data) { stop("Temporal aggregation not implemented yet") } } -} - -agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm , ncores) { - # to make the code work with both options: - if (!is.null(ini) && is.null(indices)) { - # list of vectors for the indices from ini to end pairs - indices <- lapply(1:length(ini), function(x) { - ini[x]:end[x]}) - plotting_attr <- list(ini_ftime = ini, end_ftime = end) - } else { - # take the firs and las element of each indices list for time_bounds saving - ini <- unlist(lapply(indices, function(x){x[1]})) - end <- unlist(lapply(indices, function(x){x[length(x)]})) - plotting_attr <- list(names(indices)) - } - original_dims <- names(dim(x[[1]]$data)) - if (method == 'average') { - x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y, ind) { - sapply(1:length(indices), function(z) { - mean(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', - ncores = ncores)$output1 - } else if (method == 'accumulated') { - x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', - function(y, ind) { - sapply(1:length(indices), function(z) { - sum(y[indices[[z]]], na.rm = na.rm)})}, ind = indices, output_dims = 'time', - ncores = ncores)$output1 - } else { - stop("Unknown method") - } - # Check in case only 1 aggregation only is requested to add time dim back: - if (!('time' %in% names(dim(data[[1]]$data)))) { - dim(x[[1]]$data) <- c( dim(x[[1]]$data), time = 1) - } - x[[1]]$data <- Reorder(x[[1]]$data, original_dims) - x[[1]]$dims <- dim(x[[1]]$data) - x[[1]]$coords$time <- 1:length(ini) - attr(x[[1]]$coords$time, "indices") <- TRUE - tmp_dates <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = ini) - x[[1]]$attrs$time_bounds$start <- tmp_dates - x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, along = 'time', - indices = end) - attributes(x[[1]]$attrs$time_bounds)$plotting_attr <- plotting_attr - x[[1]]$attrs$Dates <- tmp_dates - return(x) + info(recipe$Run$logger, + "##### TIME AGGREGATION COMPLETE #####") } diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R new file mode 100644 index 00000000..17c6940a --- /dev/null +++ b/modules/Aggregation/R/agg_ini_end.R @@ -0,0 +1,56 @@ +agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { + # to make the code work with both options: + if (!is.null(ini) && is.null(indices)) { + # list of vectors for the indices from ini to end pairs + indices <- lapply(1:length(ini), function(x) { + ini[x]:end[x]}) + plotting_attr <- list(ini_ftime = ini, end_ftime = end) + } else { + # take the first and last element of each indices list for time_bounds saving + ini <- unlist(lapply(indices, function(x){x[1]})) + end <- unlist(lapply(indices, function(x){x[length(x)]})) + plotting_attr <- list(names(indices)) + } + original_dims <- names(dim(x[[1]]$data)) + if (method == 'average') { + x[[1]]$data <- Apply(x[[1]]$data, + target_dim = 'time', + function(y, ind) { + sapply(1:length(indices), + function(z) {mean(y[indices[[z]]], na.rm = na.rm)}) + }, + ind = indices, + output_dims = 'time', + ncores = ncores)$output1 + } else if (method == 'accumulated') { + x[[1]]$data <- Apply(x[[1]]$data, + target_dim = 'time', + function(y, ind) { + sapply(1:length(indices), + function(z) {sum(y[indices[[z]]], na.rm = na.rm)}) + }, + ind = indices, + output_dims = 'time', + ncores = ncores)$output1 + } else { + stop("Unknown method") + } + # Check in case only 1 aggregation only is requested to add time dim back: + if (!('time' %in% names(dim(data[[1]]$data)))) { + dim(x[[1]]$data) <- c(dim(x[[1]]$data), time = 1) + } + x[[1]]$data <- Reorder(x[[1]]$data, original_dims) + x[[1]]$dims <- dim(x[[1]]$data) + x[[1]]$coords$time <- 1:length(ini) + attr(x[[1]]$coords$time, "indices") <- TRUE + tmp_dates <- Subset(x[[1]]$attrs$Dates, + along = 'time', + indices = ini) + x[[1]]$attrs$time_bounds$start <- tmp_dates + x[[1]]$attrs$time_bounds$end <- Subset(x[[1]]$attrs$Dates, + along = 'time', + indices = end) + attributes(x[[1]]$attrs$time_bounds)$plotting_attr <- plotting_attr + x[[1]]$attrs$Dates <- tmp_dates + return(x) +} -- GitLab From 152b91bc9329cc0e1f5da5d65449432b2cf67741 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 17 May 2024 14:53:51 +0200 Subject: [PATCH 13/25] Indentation --- modules/Visualization/R/plot_ensemble_mean.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 261dcc74..ff91adf4 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -121,7 +121,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o if (is.null(attributes(fcst$attrs$time_bounds))) { months <- lubridate::month( fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) + label = T, abb = F) } else { if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { months <- unlist( @@ -203,7 +203,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } forecast_time <- sprintf("%02d", forecast_time) } else { - forecast_time <- months + forecast_time <- months } # Get mask subset if (!is.null(mask)) { -- GitLab From cb0c369b83b519023ae2187c263834d364d6e68a Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 17 May 2024 16:21:39 +0200 Subject: [PATCH 14/25] Correct name of most likely terciles plots, indentation --- .../Visualization/R/plot_most_likely_terciles_map.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 5455f0f6..739f2d10 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -115,7 +115,7 @@ plot_most_likely_terciles <- function(recipe, if (is.null(attributes(fcst$attrs$time_bounds))) { months <- lubridate::month( fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) + label = T, abb = F) } else { if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { months <- unlist( @@ -233,7 +233,7 @@ plot_most_likely_terciles <- function(recipe, } # Plot if (is.null(attributes(fcst$attrs$time_bounds))) { - fileout <- paste0(outfile, "_ft", sprintf("%02d", i), ".pdf") + fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") } else { if (length(attributes(fcst$attrs$time_bounds)$plotting_attr) > 1) { fileout <- paste0(outfile, "_ft", @@ -248,9 +248,9 @@ plot_most_likely_terciles <- function(recipe, base_args$mask <- mask_i base_args$dots <- dots_i cb_info <- do.call(PlotMostLikelyQuantileMap, - args = c(base_args, - list(toptitle = toptitle, - fileout = fileout))) + args = c(base_args, + list(toptitle = toptitle, + fileout = fileout))) # Add color bars with 1 range for normal category: for (i_bar in 1:cb_info$nmap) { tmp <- cb_info -- GitLab From 6dae4ca6fc5f4b5aabdb9fcad1f3197f4210a72c Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 17 May 2024 16:21:55 +0200 Subject: [PATCH 15/25] Add return statement to Aggregation() --- modules/Aggregation/Aggregation.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 82d911ed..6be8ed39 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -58,6 +58,7 @@ Aggregation <- function(recipe, data) { stop("Temporal aggregation not implemented yet") } } + return(res) info(recipe$Run$logger, "##### TIME AGGREGATION COMPLETE #####") } -- GitLab From 5ce79e339e119bd48d4255df0e7c10f519d3c9af Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 17 May 2024 17:07:06 +0200 Subject: [PATCH 16/25] Add error_status to TimeAggregation recipe checks --- tools/check_recipe.R | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 0a412b61..a995a1ca 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -302,26 +302,31 @@ check_recipe <- function(recipe) { # WORKFLOW CHECKS # --------------------------------------------------------------------- # Time_Aggregation - if ("Time_aggregation" %in% names(recipe$Analysis$Workflow)) { + if ("Time_aggregation" %in% names(recipe$Analysis$Workflow)) { + TIME_AGG_METHODS <- c("average", "accumulation") if (!("execute" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { recipe$Analysis$Workflow$Time_aggregation$execute <- FALSE } else { if (recipe$Analysis$Workflow$Time_aggregation$execute) { if (!("method" %in% names(recipe$Analysis$Workflow$Time_aggregation))) { error(recipe$Run$logger, - "No Time_aggregation method defined in recipe.") + "'Workflow:Time_aggregation:method' is not defined in the recipe.") + error_status <- TRUE } else { if (!tolower(recipe$Analysis$Workflow$Time_aggregation$method) %in% - c('average', 'accumulation')) { + TIME_AGG_METHODS) { error(recipe$Run$logger, - "Time_aggregation method should be either average or accumulation.") + paste0("Time_aggregation method should be one of: ", + paste(TIME_AGG_METHODS, collapse = ", "), ".")) + error_status <- TRUE } } - if ((any(!c('ini','end') %in% names(recipe$Analysis$Workflow$Time_aggregation)) && + if ((any(!c('ini', 'end') %in% names(recipe$Analysis$Workflow$Time_aggregation)) && !'user_def' %in% names(recipe$Analysis$Workflow$Time_aggregation))) { - error(recipe$Run$logger, - paste("Missing parameters in Time aggregation section", - "add 'ini' and 'end' or 'user_def'.")) + error(recipe$Run$logger, + paste("Missing parameters in Time_aggregation section; please", + "add 'ini' and 'end' or 'user_def'.")) + error_status <- TRUE } if (all(c('ini', 'end') %in% tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { @@ -329,19 +334,22 @@ check_recipe <- function(recipe) { !is.numeric(recipe$Analysis$Workflow$Time_aggregation$end)) { error(recipe$Run$logger, "ini and end in Time aggregation need to be numeric") + error_status <- TRUE } else { if (length(recipe$Analysis$Workflow$Time_aggregation$ini) != length(recipe$Analysis$Workflow$Time_aggregation$end)) { error(recipe$Run$logger, paste("The numeber of ini and end parameters need to be", "the same for Time aggregation")) + error_status <- TRUE } max_ind <- max(c(recipe$Analysis$Workflow$Time_aggregation$ini, recipe$Analysis$Workflow$Time_aggregation$end)) if (recipe$Analysis$Time$ftime_max < max_ind) { error(recipe$Run$logger, - paste("Forecast time max required is smaller than indices", + paste("The requested max forecast time is smaller than indices", "requested for time aggregation")) + error_status <- TRUE } } } @@ -350,24 +358,28 @@ check_recipe <- function(recipe) { tolower(names(recipe$Analysis$Workflow$Time_aggregation)))) { recipe$Analysis$Workflow$Time_aggregation$user_def <- NULL warn(recipe$Run$logger, - "Only 'ini' and 'end' indices are used for time agg.") + paste("Both 'ini' and 'end' and 'user_def' are defined under", + "'Time_aggregation'. Only 'ini' and 'end' indices will", + "be used.")) } if (is.list(recipe$Analysis$Workflow$Time_aggregation$user_def)) { if (is.character(names(recipe$Analysis$Workflow$Time_aggregation$user_def))) { if (!all(lapply(recipe$Analysis$Workflow$Time_aggregation$user_def, is.numeric))) { error(recipe$Run$logger, - "User_def indices in Time aggregation are expected", - "to be numeric.") + "'Time_aggregation:user_def' are expected to be numeric.") + error_status <- TRUE } } else { warn(recipe$Run$logger, - paste("Not names provided to the list of user_def indices", - "Time aggregation, review plots and file names.")) + paste("No names provided to the list of 'Time_aggregation:user_def'", + "indices. Review plots and file names to make sure they", + "are correct.")) } } else { error(recipe$Run$logger, - "user_def in Time aggregation should be a named list") + "'Time_aggregation:user_def' should be a named list.") + error_status <- TRUE } } } -- GitLab From 5f686a05f8474e5816f352d730e061b205462a4f Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 21 May 2024 11:30:13 +0200 Subject: [PATCH 17/25] Add time_bounds to SaveExp() --- modules/Saving/R/save_metrics.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 800004d7..68b76ad3 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -95,9 +95,14 @@ save_metrics <- function(recipe, if (any('syear' %in% names(dim(subset_metric[[i]])))) { sdate_dim_save = 'syear' dates <- data_cube$attrs$Dates + time_bounds <- data_cube$attrs$time_bounds } else { sdate_dim_save = NULL dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) + time_bounds <- lapply(data_cube$attrs$time_bounds, + FUN = function (x) { + Subset(x, along = 'syear', indices = 1) + }) } ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code @@ -106,7 +111,8 @@ save_metrics <- function(recipe, SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, coords = c(data_cube$coords['longitude'], - data_cube$coords['latitude']), + data_cube$coords['latitude']), + time_bounds = time_bounds, varname = names(subset_metric)[[i]], metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, -- GitLab From 99ff79bf00b3d4ff7f73a6b607ded2c15755d399 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 21 May 2024 12:07:39 +0200 Subject: [PATCH 18/25] Fix pipeline --- modules/Saving/R/save_metrics.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 68b76ad3..d8ebf9c6 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -13,6 +13,7 @@ save_metrics <- function(recipe, dictionary <- read_yaml("conf/variable-dictionary.yml") global_attributes <- .get_global_attributes(recipe, archive) + time_bounds <- NULL ## TODO: Sort out the logic once default behavior is decided if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { @@ -99,10 +100,12 @@ save_metrics <- function(recipe, } else { sdate_dim_save = NULL dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) - time_bounds <- lapply(data_cube$attrs$time_bounds, - FUN = function (x) { - Subset(x, along = 'syear', indices = 1) - }) + if (!is.null(data_cube$attrs$time_bounds)) { + time_bounds <- lapply(data_cube$attrs$time_bounds, + FUN = function (x) { + Subset(x, along = 'syear', indices = 1) + }) + } } ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code -- GitLab From 8702ecd0b636fb150377c6819ece7d99ee683665 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 21 May 2024 16:49:07 +0200 Subject: [PATCH 19/25] Expand get_times() to get metadata for time bounds, refactor saving functions --- modules/Saving/R/get_time.R | 30 --------------- modules/Saving/R/get_times.R | 55 +++++++++++++++++++++++++++ modules/Saving/R/save_corr.R | 14 ++----- modules/Saving/R/save_forecast.R | 37 +++++------------- modules/Saving/R/save_metrics.R | 36 ++++++------------ modules/Saving/R/save_observations.R | 13 ++----- modules/Saving/R/save_percentiles.R | 11 +----- modules/Saving/R/save_probabilities.R | 15 ++------ modules/Saving/Saving.R | 2 +- 9 files changed, 89 insertions(+), 124 deletions(-) delete mode 100644 modules/Saving/R/get_time.R create mode 100644 modules/Saving/R/get_times.R diff --git a/modules/Saving/R/get_time.R b/modules/Saving/R/get_time.R deleted file mode 100644 index 5426c177..00000000 --- a/modules/Saving/R/get_time.R +++ /dev/null @@ -1,30 +0,0 @@ -get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { - # Generates time dimensions and the corresponding metadata. - ## TODO: Subseasonal - - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'hours since '; - stdname <- ''}, - "decadal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}) - - dim(time) <- length(time) - sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting - metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time=time)) -} - diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R new file mode 100644 index 00000000..3d261307 --- /dev/null +++ b/modules/Saving/R/get_times.R @@ -0,0 +1,55 @@ +# leadtimes: list 2 arrays, 'start' and 'end', containing the time bounds for +# each leadtime +# sdate: start date in POSIXt/POSIXct format +# calendar: name of the calendar in string format + +.get_times <- function(recipe, data_cube, sdate, calendar, init_date) { + + # Define list to contain metadata for time and time_bnds + times <- list() + # Compute initial date + fcst.horizon <- tolower(recipe$Analysis$Horizon) + # Generate time dimensions and the corresponding metadata. + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + leadtimes <- as.numeric(dates - init_date)/3600 + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- leadtimes; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + times$time <- time + # Generate time_bnds dimensions and the corresponding metadata. + if (!is.null(data_cube$attrs$time_bounds)) { + time_bounds <- lapply(data_cube$attrs$time_bounds, + function(x) { + y <- as.PCICt(ClimProjDiags::Subset(x, + along = 'syear', + indices = 1, + drop = 'non-selected'), + cal = calendar) + y <- as.numeric(y - init_date)/3600 + return(y) + }) + # Generates time_bnds dimensions and the corresponding metadata. + time_bnds <- abind(time_bounds, along = 0) + names(dim(time_bnds)) <- c("bnds", "time") + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time_bnds = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar, + long_name = "time bounds")) + attr(time_bnds, 'variables') <- metadata + times$time_bnds <- time_bnds + } + return(times) +} + diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 1eb58b93..47310bf0 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -31,9 +31,6 @@ save_corr <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar } - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', @@ -45,9 +42,6 @@ save_corr <- function(recipe, format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - # Select start date # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (fcst.horizon == 'decadal') { @@ -67,8 +61,7 @@ save_corr <- function(recipe, } } - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { @@ -115,14 +108,13 @@ save_corr <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- .get_countries(grid) - ArrayToNc(append(country, time, subset_skill), outfile) + ArrayToNc(append(country, times, subset_skill), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_skill) + vars <- c(latlon, times, subset_skill) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index f7afa73c..6e3bc906 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -25,43 +25,28 @@ save_forecast <- function(recipe, } else { calendar <- archive$System[[global_attributes$system]]$calendar } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + + # Compute initial date if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) - # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month if (type == 'hcst') { - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start) + 1, '-01-01'), cal = calendar) } else if (type == 'fcst') { - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$fcst_year)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$fcst_year) + 1, '-01-01'), cal = calendar) } } else { if (type == 'hcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), + recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate), + recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) # expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) @@ -132,8 +117,7 @@ save_forecast <- function(recipe, } # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, @@ -142,14 +126,13 @@ save_forecast <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) + ArrayToNc(append(country, times, fcst), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) + vars <- c(latlon, times, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index d8ebf9c6..db7ceeca 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -26,7 +26,7 @@ save_metrics <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - if (global_attributes$system == 'Multimodel'){ + if (global_attributes$system == 'Multimodel') { if (fcst.horizon == 'decadal'){ calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar } else { @@ -35,34 +35,23 @@ save_metrics <- function(recipe, } else { calendar <- archive$System[[global_attributes$system]]$calendar } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - - if (fcst.horizon == 'decadal') { + if (fcst.horizon == 'decadal') { # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', # sprintf('%02d', init_month), '-01'), # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - # Select start date # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (fcst.horizon == 'decadal') { if (!is.null(recipe$Analysis$Time$fcst_year)) { #PROBLEM: May be more than one fcst_year - # fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - # sprintf('%02d', init_month), '01') fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') @@ -76,15 +65,14 @@ save_metrics <- function(recipe, } } - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { # Subset skill arrays subset_metric <- lapply(metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) + indices = var, + drop = 'selected')}) # Generate name of output file variable <- data_cube$attrs$Variable$varName[[var]] outdir <- get_dir(recipe = recipe, variable = variable) @@ -158,12 +146,13 @@ save_metrics <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, subset_metric), outfile) + ArrayToNc(append(country, times$time, subset_metric), outfile) } else if (tolower(agg) == "region") { - region <- array(1:dim(metrics[[1]])['region'], c(dim(metrics[[1]])['region'])) + region <- list(region = array(1:dim(metrics[[1]])['region'], + c(dim(metrics[[1]])['region']))) ## TODO: check metadata when more than 1 region is store in the data array attr(region, 'variables') <- data_cube$attrs$Variable$metadata['region'] - vars <- list(region, time) + vars <- c(region, times) vars <- c(vars, subset_metric) ArrayToNc(vars, outfile) } else { @@ -171,13 +160,12 @@ save_metrics <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_metric) + vars <- c(latlon, times, subset_metric) ArrayToNc(vars, outfile) } } } info(recipe$Run$logger, - paste("#####", toupper(module), " METRICS SAVED TO NETCDF FILE #####")) + paste("#####", toupper(module), "METRICS SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 0f4d70a1..79469719 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -17,10 +17,7 @@ save_observations <- function(recipe, store.freq <- recipe$Analysis$Variables$freq calendar <- archive$Reference[[global_attributes$reference]]$calendar - # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { ## Method 1: Use the first date as init_date. But it may be better to use ## the real initialized date (ask users) @@ -37,8 +34,6 @@ save_observations <- function(recipe, recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] @@ -120,8 +115,7 @@ save_observations <- function(recipe, fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Generate name of output file outfile <- get_filename(outdir, recipe, variable, @@ -130,14 +124,13 @@ save_observations <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) + ArrayToNc(append(country, times$time, fcst), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) + vars <- c(latlon, times, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 7b68b2c2..d5dfae16 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -34,8 +34,6 @@ save_percentiles <- function(recipe, } # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { # if (global_attributes$system == 'Multimodel') { # init_month <- 11 #TODO: put as if init_month is January @@ -53,9 +51,6 @@ save_percentiles <- function(recipe, format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - # Select start date # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (fcst.horizon == 'decadal') { @@ -75,8 +70,7 @@ save_percentiles <- function(recipe, fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) } } - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) for (var in 1:data_cube$dims[['var']]) { # Subset arrays @@ -124,8 +118,7 @@ save_percentiles <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_percentiles) + vars <- c(latlon, times, subset_percentiles) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index 76a5a54c..a9ddc977 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -39,10 +39,6 @@ save_probabilities <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar } - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) if (fcst.horizon == 'decadal') { # if (global_attributes$system == 'Multimodel') { # init_month <- 11 #TODO: put as if init_month is January @@ -60,9 +56,6 @@ save_probabilities <- function(recipe, format = '%Y%m%d', cal = calendar) } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) @@ -119,8 +112,7 @@ save_probabilities <- function(recipe, } # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, @@ -129,14 +121,13 @@ save_probabilities <- function(recipe, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) + ArrayToNc(append(country, times, probs_syear), outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) + vars <- c(latlon, times, probs_syear) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 5ce98575..e9e4a5b4 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -10,7 +10,7 @@ source("modules/Saving/R/save_metrics.R") source("modules/Saving/R/save_corr.R") source("modules/Saving/R/save_probabilities.R") source("modules/Saving/R/save_percentiles.R") -source("modules/Saving/R/get_time.R") +source("modules/Saving/R/get_times.R") source("modules/Saving/R/get_latlon.R") source("modules/Saving/R/get_global_attributes.R") source("modules/Saving/R/drop_dims.R") -- GitLab From b5d3147a0791f297a5b9750961350311644f3549 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 21 May 2024 17:13:59 +0200 Subject: [PATCH 20/25] Add 'bounds' attribute to time in get_times() --- modules/Saving/R/get_times.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R index 3d261307..ec36b102 100644 --- a/modules/Saving/R/get_times.R +++ b/modules/Saving/R/get_times.R @@ -24,7 +24,7 @@ dim(time) <- length(time) sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) + calendar = calendar)) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' times$time <- time @@ -48,6 +48,7 @@ calendar = calendar, long_name = "time bounds")) attr(time_bnds, 'variables') <- metadata + attr(time, 'variables')$bounds <- "time_bounds" times$time_bnds <- time_bnds } return(times) -- GitLab From 1c19cb91a4bb6583aa2f316451f25217b5d580cd Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 22 May 2024 16:22:09 +0200 Subject: [PATCH 21/25] Improve check, add a TODO --- modules/Aggregation/Aggregation.R | 1 + tools/check_recipe.R | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index 6be8ed39..e6846df6 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -42,6 +42,7 @@ Aggregation <- function(recipe, data) { # for decadal, it makes sense to get all seasons and annual means too ## Ex: January, February, March for decadal simulations aggregation ## custom <- sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + ## TODO: Remove this check? Already in check_recipe() if (is.list(custom)) { res <- sapply(1:length(data), function(x) { if (!is.null(data[[x]])) { diff --git a/tools/check_recipe.R b/tools/check_recipe.R index a995a1ca..9d607027 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -367,7 +367,8 @@ check_recipe <- function(recipe) { if (!all(lapply(recipe$Analysis$Workflow$Time_aggregation$user_def, is.numeric))) { error(recipe$Run$logger, - "'Time_aggregation:user_def' are expected to be numeric.") + paste0("'Time_aggregation:user_def' elements are ", + "expected to be numeric.") error_status <- TRUE } } else { -- GitLab From 3ad0563a529b348880b76108af60e62224245ade Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 22 May 2024 16:24:28 +0200 Subject: [PATCH 22/25] Fix pipeline --- tools/check_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 9d607027..7553f919 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -368,7 +368,7 @@ check_recipe <- function(recipe) { is.numeric))) { error(recipe$Run$logger, paste0("'Time_aggregation:user_def' elements are ", - "expected to be numeric.") + "expected to be numeric.")) error_status <- TRUE } } else { -- GitLab From 13a455a5ef0eaea021e0bc268223fd9730e2603a Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 23 May 2024 09:04:35 +0200 Subject: [PATCH 23/25] Remove else if for 'Temporal aggregation not implemented yet' --- modules/Aggregation/Aggregation.R | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R index e6846df6..d5c09fac 100644 --- a/modules/Aggregation/Aggregation.R +++ b/modules/Aggregation/Aggregation.R @@ -38,7 +38,7 @@ Aggregation <- function(recipe, data) { }}, simplify = TRUE) } } else if (!is.null(custom)) { - # caluclate aggreagtions from fmonth1 to ftmonth2 + # calCUlate aggregations from fmonth1 to ftmonth2 # for decadal, it makes sense to get all seasons and annual means too ## Ex: January, February, March for decadal simulations aggregation ## custom <- sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) @@ -47,16 +47,14 @@ Aggregation <- function(recipe, data) { res <- sapply(1:length(data), function(x) { if (!is.null(data[[x]])) { result <- agg_ini_end(data[x], method, na.rm, ncores, - indices = custom, ini = NULL, end = NULL) + indices = custom, + ini = NULL, end = NULL) } else { - result <- data[x]}}, simplify = TRUE) + result <- data[x] + }}, simplify = TRUE) } else { - stop("Why Time_aggregation$user_def is not a list?") + stop("Time_aggregation$user_def is not a list") } - } else { - # higher frequencies are typically used to compute indicators - # lower frequencies are not typically stored in esarchive - stop("Temporal aggregation not implemented yet") } } return(res) -- GitLab From f5fe7528d95ee96d25c6aaec85168c184e88f78d Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 23 May 2024 09:44:38 +0200 Subject: [PATCH 24/25] Change lapply() to sapply() to avoid 'coercing list into logical' warning --- tools/check_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 7553f919..dabd5633 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -364,7 +364,7 @@ check_recipe <- function(recipe) { } if (is.list(recipe$Analysis$Workflow$Time_aggregation$user_def)) { if (is.character(names(recipe$Analysis$Workflow$Time_aggregation$user_def))) { - if (!all(lapply(recipe$Analysis$Workflow$Time_aggregation$user_def, + if (!all(sapply(recipe$Analysis$Workflow$Time_aggregation$user_def, is.numeric))) { error(recipe$Run$logger, paste0("'Time_aggregation:user_def' elements are ", -- GitLab From 068b050d73627906b97a57c16792fbb91b80b3cb Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 23 May 2024 14:16:39 +0200 Subject: [PATCH 25/25] Restore example 0.1 recipe --- use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index d36b0324..662e75cc 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -28,14 +28,6 @@ Analysis: method: bilinear type: to_system Workflow: - Time_aggregation: - execute: yes - user_def: - ftimes: [1, 3] - JJA: !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) - method: average - ini: [1, 2, 1] - end: [2, 3, 3] Anomalies: compute: yes cross_validation: yes -- GitLab