From 47662c371753407f6d6ac80996df47bceb688992 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 15 Jul 2025 14:48:59 +0200 Subject: [PATCH 01/17] Same as bigpredidata --- .../bigpredidata/script_bigpredidata_oper.R | 15 ++++++-------- .../recipe_bigpredidata_oper_subseasonal.yml | 20 +++++++++++-------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/operationals/bigpredidata/script_bigpredidata_oper.R b/operationals/bigpredidata/script_bigpredidata_oper.R index 4ce1cb59..5249a8a0 100644 --- a/operationals/bigpredidata/script_bigpredidata_oper.R +++ b/operationals/bigpredidata/script_bigpredidata_oper.R @@ -9,6 +9,12 @@ recipe_file <- args[1] #recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper_test_sfcWind.yml" #recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper.yml" recipe <- read_atomic_recipe(recipe_file) + +if (recipe$Analysis$Region$name == "EU") { + recipe$Analysis$Regrid$type <- + "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20050109.nc" +} + #recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) @@ -25,13 +31,4 @@ skill_metrics <- Crossval_metrics(recipe = recipe, data = data, Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = data$probs) -#if (!is.null(data$fcst)) { -# # Required to plot a forecast: -# data$fcst <- res$fcst -# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, -# significance = TRUE, probabilities = res$probs) -#} else { -# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, -# significance = TRUE) -#} diff --git a/operationals/test/recipe_bigpredidata_oper_subseasonal.yml b/operationals/test/recipe_bigpredidata_oper_subseasonal.yml index 700c420a..45a2342a 100755 --- a/operationals/test/recipe_bigpredidata_oper_subseasonal.yml +++ b/operationals/test/recipe_bigpredidata_oper_subseasonal.yml @@ -6,9 +6,12 @@ Description: Analysis: Horizon: subseasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - - {name: tas, freq: weekly_mean, units: C} - - {name: prlr, freq: weekly_mean, units: mm, flux: no} - - {name: sfcWind, freq: weekly_mean} + - {name: 'tasmin', freq: 'weekly_mean', units: 'C'} + - {name: 'tasmax', freq: 'weekly_mean', units: 'C'} + - {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: no} + - {name: 'sfcWind', freq: 'weekly_mean'} + - {name: 'rsds', freq: 'weekly_mean'} Datasets: System: - {name: 'NCEP-CFSv2', member: 'all'} @@ -26,10 +29,11 @@ Analysis: sweek_window: 9 sday_window: 3 Region: - - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_system" + type: "to_reference" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -59,7 +63,7 @@ Analysis: file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. ncores: 16 # Optional, int: number of cores, defaults to 1 remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: scorecards + Output_format: S2S4E logo: yes Run: Loglevel: INFO @@ -70,10 +74,10 @@ Run: autosubmit: yes # fill only if using autosubmit auto_conf: - script: scripts/script_bigpredidata_oper.R # replace with the path to your script + script: operationals/bigpredidata/script_bigpredidata_oper.R # replace with the path to your script expid: a6wq # replace with your EXPID hpc_user: bsc032762 # replace with your hpc username - wallclock: 02:00 # hh:mm + wallclock: 03:00 # hh:mm processors_per_job: 24 custom_directives: platform: amd -- GitLab From 07e1ab52e7f09c49d9d2e776fe0f5973f0e4fdb9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 16 Jul 2025 14:00:29 +0200 Subject: [PATCH 02/17] Use na.rm = FALSE for MergeDims() --- modules/Crossval/Crossval_calibration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 75765e75..48f250c9 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -56,10 +56,10 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = na.rm) + rename_dim = 'syear', na.rm = FALSE) obs_tr <- MergeDims(obs_tr, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = na.rm) + rename_dim = 'syear', na.rm = FALSE) # 'sday' dim to select the central day hcst_ev <- Subset(hcst_ev, along = 'sday', indices = central_day, drop = 'selected') -- GitLab From e48741396bba3542be2adcf70b184a8d7bb29092 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 16 Jul 2025 14:00:39 +0200 Subject: [PATCH 03/17] Add missing weekly variables --- conf/archive_reference.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml index b3a75502..fa58832e 100644 --- a/conf/archive_reference.yml +++ b/conf/archive_reference.yml @@ -13,14 +13,20 @@ gpfs: "tasmin":"daily/tasmin-r1440x721cds/", "ta500":"daily_mean/ta500_f1h-r1440x721cds/"} weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "tasmax":"weekly_mean/tasmax_f24h-r1440x721cds/", + "tasmin":"weekly_mean/tasmin_f24h-r1440x721cds/", "psl":"weekly_mean/psl_f1h-r1440x721cds/", "prlr":"weekly_mean/prlr_f1h-r1440x721cds/", - "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/"} + "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/", + "rsds": "weekly_mean/rsds_f1h-r1440x721cds/"} monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", + "tasmax":"monthly_mean/tasmax_f24h-r1440x721cds/", + "tasmin":"monthly_mean/tasmin_f24h-r1440x721cds/", "tos":"monthly_mean/tos_f1h-r1440x721cds/", "psl":"monthly_mean/psl_f1h-r1440x721cds/", "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", - "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/"} + "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", + "rsds":"monthly_mean/rsds_f1h-r1440x721cds/"} calendar: "standard" reference_grid: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" land_sea_mask: "/gpfs/projects/bsc32/esarchive_cache/recon/ecmwf/era5/constant/lsm-r1440x721cds/sftof.nc" @@ -67,6 +73,7 @@ esarchive: "g300":"monthly_mean/g300_f1h-r1440x721cds/", "g500":"monthly_mean/g500_f1h-r1440x721cds/", "g850":"monthly_mean/g850_f1h-r1440x721cds/", + "hurs":"monthly_mean/hurs_f1h-r1440x721cds/", "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", "tasmax":"monthly_mean/tasmax_f24h-r1440x721cds/", "tasmin":"monthly_mean/tasmin_f24h-r1440x721cds/", -- GitLab From a29f21fa4419f5399e41b4ef85f5885f29e5380d Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 16 Jul 2025 14:01:03 +0200 Subject: [PATCH 04/17] Bugfix: include region folder in S2S4E format --- modules/Saving/R/get_dir.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R index 97208b3e..be775617 100644 --- a/modules/Saving/R/get_dir.R +++ b/modules/Saving/R/get_dir.R @@ -47,6 +47,7 @@ get_dir <- function(recipe, variable, agg = "global") { } } store.freq <- recipe$Analysis$Variables$freq + outdir <- paste0(outdir, "/", region) switch(tolower(agg), "region" = {dir <- paste0(outdir, "/", system, "/", calib.method, "-", store.freq, "/", variable, -- GitLab From cac6a38f091f34d471e461e3dfdb1b9e8c620bf5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 17 Jul 2025 09:20:39 +0200 Subject: [PATCH 05/17] Save only forecast probabilities --- modules/Crossval/Crossval_anomalies.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index c22464e0..d33aab80 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -420,12 +420,12 @@ Crossval_anomalies <- function(recipe, data) { if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = probs_hcst, - data_cube = data$hcst, agg = agg, - type = "hcst") - save_probabilities(recipe = recipe, probs = probs_obs, - data_cube = data$hcst, agg = agg, - type = "obs") + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") if (length(probs_fcst) > 0) { save_probabilities(recipe = recipe, probs = probs_fcst, data_cube = data$fcst, agg = agg, -- GitLab From d6a8837f250322bbc117c9956949e98f2edaf7f9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 17 Jul 2025 16:12:36 +0200 Subject: [PATCH 06/17] Use VizMostLikelyQuantileMap and VizCombinedMap --- .../R/plot_most_likely_terciles_map.R | 9 +- .../{PlotCombinedMap.R => VizCombinedMap.R} | 121 ++++++++++++------ ...antileMap.R => VizMostLikelyQuantileMap.R} | 99 +++++--------- 3 files changed, 115 insertions(+), 114 deletions(-) rename modules/Visualization/R/tmp/{PlotCombinedMap.R => VizCombinedMap.R} (80%) rename modules/Visualization/R/tmp/{PlotMostLikelyQuantileMap.R => VizMostLikelyQuantileMap.R} (67%) diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 67802cf1..d2d92865 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -1,11 +1,10 @@ ## Functions required for normal cat and triangles end. ## See https://earth.bsc.es/gitlab/external/cstools/-/issues/125 -source("modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R") -source("modules/Visualization/R/tmp/PlotCombinedMap.R") +source("modules/Visualization/R/tmp/VizMostLikelyQuantileMap.R") +source("modules/Visualization/R/tmp/VizCombinedMap.R") source("modules/Visualization/R/tmp/ColorBar.R") source("modules/Visualization/R/tmp/clim.palette.R") source("modules/Visualization/R/tmp/Utils.R") -source("modules/Visualization/R/tmp/PlotEquiMap.R") source("modules/Visualization/R/tmp/ColorBar_onebox.R") source("modules/Visualization/R/tmp/GradientCatsColorBar.R") @@ -192,7 +191,7 @@ plot_most_likely_terciles <- function(recipe, ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked ## on. This option does not work with mask or dots for now. suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + PlotLayout(VizMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), cat_dim = 'bin', i_var_probs, longitude, latitude, mask = var_mask, @@ -333,7 +332,7 @@ plot_most_likely_terciles <- function(recipe, base_args$probs <- i_var_probs[i, , , ] base_args$mask <- mask_i base_args$dots <- dots_i - cb_info <- do.call(PlotMostLikelyQuantileMap, + cb_info <- do.call(VizMostLikelyQuantileMap, args = c(base_args, list(toptitle = toptitle, fileout = fileout))) diff --git a/modules/Visualization/R/tmp/PlotCombinedMap.R b/modules/Visualization/R/tmp/VizCombinedMap.R similarity index 80% rename from modules/Visualization/R/tmp/PlotCombinedMap.R rename to modules/Visualization/R/tmp/VizCombinedMap.R index e4d6d7f3..50baf418 100644 --- a/modules/Visualization/R/tmp/PlotCombinedMap.R +++ b/modules/Visualization/R/tmp/VizCombinedMap.R @@ -27,14 +27,23 @@ #'@param map_dim Optional name for the dimension of 'maps' along which the #' multiple maps are arranged. Only applies when 'maps' is provided as a #' 3-dimensional array. Takes the value 'map' by default. -#'@param brks Colour levels to be sent to PlotEquiMap. This parameter is +#'@param brks Colour levels to be sent to VizEquiMap. This parameter is #' optional and adjusted automatically by the function. -#'@param cols List of vectors of colours to be sent to PlotEquiMap for the +#'@param cols List of vectors of colours to be sent to VizEquiMap for the #' colour bar of each map. This parameter is optional and adjusted #' automatically by the function (up to 5 maps). The colours provided for each #' colour bar will be automatically interpolated to match the number of breaks. #' Each item in this list can be named, and the name will be used as title for #' the corresponding colour bar (equivalent to the parameter 'bar_titles'). +#'@param bar_limits A numeric vector of 2 indicating the range of color bar. +#' The default is NULL, and the function will decide the range automatically. +#'@param triangle_ends A logical vector of two indicating if the lower and upper +#' triangles of the color bar should be plotted. The default is +#' c(FALSE, FALSE). +#'@param col_inf A character string of recognized color name or code indicating +#' the color of the lower triangle of the color bar. The default is NULL. +#'@param col_sup A character string of recognized color name or code indicating +#' the color of the upper triangle of the color bar. The default is NULL. #'@param col_unknown_map Colour to use to paint the grid cells for which a map #' is not possible to be chosen according to 'map_select_fun' or for those #' values that go beyond 'display_range'. Takes the value 'white' by default. @@ -59,7 +68,10 @@ #'@param plot_margin Numeric vector of length 4 for the margin sizes in the #' following order: bottom, left, top, and right. If not specified, use the #' default of par("mar"), c(5.1, 4.1, 4.1, 2.1). Used as 'margin_scale' in -#' s2dv::PlotEquiMap. +#' VizEquiMap. +#'@param bar_extra_margin A numeric vector of 4 indicating the extra margins to +#' be added around the color bar, in the format c(y1, x1, y2, x2). The units +#' are margin lines. The default values are c(2, 0, 2, 0). #'@param fileout File where to save the plot. If not specified (default) a #' graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, bmp #' and tiff @@ -81,10 +93,10 @@ #'@param return_leg A logical value indicating if the color bars information #' should be returned by the function. If TRUE, the function doesn't plot the #' color bars but still creates the layout with color bar areas, and the -#' arguments for GradientCatsColorBar() or ColorBar() will be returned. It is -#' convenient for users to adjust the color bars manually. The default is -#' FALSE, the color bars will be plotted directly. -#'@param ... Additional parameters to be passed on to \code{PlotEquiMap}. +#' arguments for GradientCatsColorBar() or ColorBarContinuous() will be +#' returned. It is convenient for users to adjust the color bars manually. The +#' default is FALSE, the color bars will be plotted directly. +#'@param ... Additional parameters to be passed on to \code{VizEquiMap}. #' #'@examples #'# Simple example @@ -95,7 +107,7 @@ #'lons <- seq(0, 359.5, length = 20) #'lats <- seq(-89.5, 89.5, length = 10) #'\dontrun{ -#'PlotCombinedMap(list(a, b, c), lons, lats, +#'VizCombinedMap(list(a, b, c), lons, lats, #' toptitle = 'Maximum map', #' map_select_fun = max, #' display_range = c(0, 1), @@ -110,39 +122,41 @@ #'mask <- sample(c(0,1), replace = TRUE, size = 51 * 26) #'dim(mask) <- c(lat = 26, lon = 51) #'\dontrun{ -#'PlotCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, +#'VizCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, #' display_range = range(data), mask = mask, #' width = 14, height = 10) #'} #' -#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} -#' -#'@importFrom s2dv PlotEquiMap ColorBar +#'@seealso \code{VizCombinedMap} and \code{VizEquiMap} +#' +#'@import utils #'@importFrom maps map #'@importFrom graphics box image layout mtext par plot.new #'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off #' hcl jpeg pdf png postscript svg tiff #'@export -PlotCombinedMap <- function(maps, lon, lat, - map_select_fun, display_range, - map_dim = 'map', - brks = NULL, cols = NULL, - bar_limits = NULL, triangle_ends = c(F, F), col_inf = NULL, col_sup = NULL, - col_unknown_map = 'white', - mask = NULL, col_mask = 'grey', - dots = NULL, - bar_titles = NULL, legend_scale = 1, - cex_bar_titles = 1.5, - plot_margin = NULL, bar_extra_margin = c(2, 0, 2, 0), - fileout = NULL, width = 8, height = 5, - size_units = 'in', res = 100, drawleg = T, return_leg = FALSE, - ...) { +VizCombinedMap <- function(maps, lon, lat, + map_select_fun, display_range, + map_dim = 'map', + brks = NULL, cols = NULL, + bar_limits = NULL, triangle_ends = c(FALSE, FALSE), + col_inf = NULL, col_sup = NULL, + col_unknown_map = 'white', + mask = NULL, col_mask = 'grey', + dots = NULL, + bar_titles = NULL, legend_scale = 1, + cex_bar_titles = 1.5, + plot_margin = NULL, bar_extra_margin = c(2, 0, 2, 0), + fileout = NULL, width = 8, height = 5, + size_units = 'in', res = 100, drawleg = T, return_leg = FALSE, + ...) { args <- list(...) - + country.borders <- if (!is.null(args$country.borders)) args$country.borders else FALSE + args$country.borders <- NULL # Prevent duplication + # If there is any filenames to store the graphics, process them # to select the right device if (!is.null(fileout)) { - .SelectDevice <- utils::getFromNamespace(".SelectDevice", "s2dv") deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) saveToFile <- deviceInfo$fun @@ -315,6 +329,10 @@ PlotCombinedMap <- function(maps, lon, lat, #---------------------- # Identify the most likely map #---------------------- + #TODO: Consider col_inf + if (!is.null(colorbar$col_inf[[1]])) { + warning("Lower triangle is not supported now. Please contact maintainer if you have this need.") + } if (!is.null(colorbar$col_sup[[1]])) { brks_norm <- vector('list', length = nmap) @@ -355,11 +373,12 @@ PlotCombinedMap <- function(maps, lon, lat, } else { brks_norm <- vector('list', length = nmap) - range_width <- display_range[2] - display_range[1] #vector('list', length = nmap) + range_width <- vector('list', length = nmap) slightly_tune_val <- vector('list', length = nmap) for (ii in 1:nmap) { brks_norm[[ii]] <- seq(0, 1, length.out = length(colorbar$brks[[ii]])) slightly_tune_val[[ii]] <- brks_norm[[ii]][2] / (length(brks_norm[[ii]]) * 2) + range_width[[ii]] <- diff(range(colorbar$brks[[ii]])) } ml_map <- apply(maps, c(lat_dim, lon_dim), function(x) { if (any(is.na(x))) { @@ -367,15 +386,16 @@ PlotCombinedMap <- function(maps, lon, lat, } else { res <- which(x == map_select_fun(x)) if (length(res) > 0) { - res <- res[1] + res <- res_ind <- res[1] if (map_select_fun(x) < display_range[1] || map_select_fun(x) > display_range[2]) { res <- -0.5 } else { - res <- res + (map_select_fun(x) - display_range[1]) / range_width - if (map_select_fun(x) == display_range[1]) { - res <- res + slightly_tune_val - } + res <- res + ((map_select_fun(x) - colorbar$brks[[res_ind]][1]) / + range_width[[res_ind]]) + if (map_select_fun(x) == colorbar$brks[[res_ind]][1]) { + res <- res + slightly_tune_val[[res_ind]] + } } } else { res <- -0.5 @@ -422,7 +442,7 @@ PlotCombinedMap <- function(maps, lon, lat, } #---------------------- - # Set colors and breaks and then PlotEquiMap + # Set colors and breaks and then VizEquiMap #---------------------- if (!is.null(colorbar$col_sup[[1]])) { tcols <- c(col_unknown_map, colorbar$cols[[1]], colorbar$col_sup[[1]]) @@ -443,10 +463,15 @@ PlotCombinedMap <- function(maps, lon, lat, if (is.null(plot_margin)) { plot_margin <- c(5, 4, 4, 2) + 0.1 # default of par()$mar } - - PlotEquiMap(var = ml_map, lon = lon, lat = lat, - brks = tbrks, cols = tcols, drawleg = FALSE, - filled.continents = FALSE, dots = dots, margin_scale = plot_margin, ...) + + country.borders.VizEquiMap <- if (!is.null(mask)) FALSE else country.borders + + do.call(VizEquiMap, c(list( + var = ml_map, lon = lon, lat = lat, + brks = tbrks, cols = tcols, drawleg = FALSE, + filled.continents = FALSE, country.borders = country.borders.VizEquiMap, + dots = dots, margin_scale = plot_margin + ), args)) #---------------------- # Add overplot on top @@ -471,9 +496,22 @@ PlotCombinedMap <- function(maps, lon, lat, coast_color <- 'black' } if (min(lon) < 0) { - map('world', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon -180 to 180). + map('world', interior = country.borders, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon -180 to 180). } else { - map('world2', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon 0 to 360). + map('world2', interior = country.borders, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon 0 to 360). + } + if (!is.null(args$shapefile)) { + if (is.list(args$shapefile)) { + shape <- args$shapefile + } else { + shape <- readRDS(file = args$shapefile) + } + shapefile_color <- if (!is.null(args$shapefile_color)) args$shapefile_color else "black" + shapefile_lwd <- if (!is.null(args$shapefile_lwd)) args$shapefile_lwd else 1 + filled.continents <- if (!is.null(args$filled.continents)) args$filled.continents else FALSE + maps::map(shape, interior = country.borders, #wrap = wrap_vec, + fill = filled.continents, add = TRUE, plot = TRUE, + lwd = shapefile_lwd, col = shapefile_color) } box() } @@ -516,6 +554,7 @@ PlotCombinedMap <- function(maps, lon, lat, plot = TRUE, draw_separators = TRUE, bar_titles = bar_titles, title_scale = cex_bar_titles, label_scale = legend_scale * 1.5, extra_margin = bar_extra_margin) + warning("The device is not off yet. Use dev.off() after plotting the color bars.") return(tmp) #NOTE: The device is not off! Can keep plotting the color bars. } diff --git a/modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R b/modules/Visualization/R/tmp/VizMostLikelyQuantileMap.R similarity index 67% rename from modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R rename to modules/Visualization/R/tmp/VizMostLikelyQuantileMap.R index a81515e9..3bd75035 100644 --- a/modules/Visualization/R/tmp/PlotMostLikelyQuantileMap.R +++ b/modules/Visualization/R/tmp/VizMostLikelyQuantileMap.R @@ -38,9 +38,9 @@ #' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr #' 'right', 'r', 'R', 'east', 'e', 'E'\cr #' 'left', 'l', 'L', 'west', 'w', 'W' -#'@param ... Additional parameters to be sent to \code{PlotCombinedMap} and -#' \code{PlotEquiMap}. -#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#'@param ... Additional parameters to be sent to \code{VizCombinedMap} and +#' \code{VizEquiMap}. +#'@seealso \code{VizCombinedMap} and \code{VizEquiMap} #'@examples #'# Simple example #'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 @@ -50,54 +50,24 @@ #'lons <- seq(0, 359.5, length = 20) #'lats <- seq(-89.5, 89.5, length = 10) #'\dontrun{ -#'PlotMostLikelyQuantileMap(list(a, b, c), lons, lats, -#' toptitle = 'Most likely tercile map', -#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), -#' brks = 20, width = 10, height = 8) +#'VizMostLikelyQuantileMap(list(a, b, c), lons, lats, +#' toptitle = 'Most likely tercile map', +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 10, height = 8) #'} #' #'# More complex example -#'n_lons <- 40 -#'n_lats <- 20 -#'n_timesteps <- 100 -#'n_bins <- 4 #' #'# 1. Generation of sample data -#'lons <- seq(0, 359.5, length = n_lons) -#'lats <- seq(-89.5, 89.5, length = n_lats) -#' -#'# This function builds a 3-D gaussian at a specified point in the map. -#'make_gaussian <- function(lon, sd_lon, lat, sd_lat) { -#' w <- outer(lons, lats, function(x, y) dnorm(x, lon, sd_lon) * dnorm(y, lat, sd_lat)) -#' min_w <- min(w) -#' w <- w - min_w -#' w <- w / max(w) -#' w <- t(w) -#' names(dim(w)) <- c('lat', 'lon') -#' w -#'} -#' -#'# This function generates random time series (with values ranging 1 to 5) -#'# according to 2 input weights. -#'gen_data <- function(w1, w2, n) { -#' r <- sample(1:5, n, -#' prob = c(.05, .9 * w1, .05, .05, .9 * w2), -#' replace = TRUE) -#' r <- r + runif(n, -0.5, 0.5) -#' dim(r) <- c(time = n) -#' r -#'} -#' -#'# We build two 3-D gaussians. -#'w1 <- make_gaussian(120, 80, 20, 30) -#'w2 <- make_gaussian(260, 60, -10, 40) +#'lons <- seq(0, 359.5, length = 40) +#'lats <- seq(-89.5, 89.5, length = 20) #' -#'# We generate sample data (with dimensions time, lat, lon) according -#'# to the generated gaussians -#'sample_data <- multiApply::Apply(list(w1, w2), NULL, -#' gen_data, n = n_timesteps)$output1 +#'# Generate sample data (with dimensions time, lat, lon) +#'sample_data <- sample(1:5, 50 * 20 * 40, replace = TRUE) +#'dim(sample_data) <- c(time = 50, lat = 20, lon = 40) #' #'# 2. Binning sample data +#'n_bins <- 4 #'prob_thresholds <- 1:n_bins / n_bins #'prob_thresholds <- prob_thresholds[1:(n_bins - 1)] #'thresholds <- quantile(sample_data, prob_thresholds) @@ -105,7 +75,6 @@ #'binning <- function(x, thresholds) { #' n_samples <- length(x) #' n_bins <- length(thresholds) + 1 -#' #' thresholds <- c(thresholds, max(x)) #' result <- 1:n_bins #' lower_threshold <- min(x) - 1 @@ -113,29 +82,26 @@ #' result[i] <- sum(x > lower_threshold & x <= thresholds[i]) / n_samples #' lower_threshold <- thresholds[i] #' } -#' #' dim(result) <- c(bin = n_bins) #' result #'} +#'bins <- apply(sample_data, 2:3, binning, thresholds) +#'names(dim(bins))[1] <- "bin" #' -#'bins <- multiApply::Apply(sample_data, 'time', binning, thresholds)$output1 -#' -#'# 3. Plotting most likely quantile/bin #'\dontrun{ -#'PlotMostLikelyQuantileMap(bins, lons, lats, -#' toptitle = 'Most likely quantile map', -#' bar_titles = paste('% of belonging to', letters[1:n_bins]), -#' mask = 1 - (w1 + w2 / max(c(w1, w2))), -#' brks = 20, width = 10, height = 8) +#'VizMostLikelyQuantileMap(bins, lons, lats, +#' toptitle = 'Most likely quantile map', +#' bar_titles = paste('% of belonging to', letters[1:n_bins]), +#' brks = 20, width = 10, height = 10) #'} #'@importFrom maps map #'@importFrom graphics box image layout mtext par plot.new #'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff #'@export -PlotMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', - bar_titles = NULL, - col_unknown_cat = 'white', drawleg = T, - ...) { +VizMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', + bar_titles = NULL, + col_unknown_cat = 'white', drawleg = T, + ...) { # Check probs error <- FALSE if (is.list(probs)) { @@ -202,22 +168,19 @@ PlotMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") } else if (nprobs == 5) { bar_titles <- c("Low (%)", "Below normal (%)", - "Normal (%)", "Above normal (%)", "High (%)") + "Normal (%)", "Above normal (%)", "High (%)") } else { bar_titles <- paste0("Cat. ", 1:nprobs, " (%)") } } minimum_value <- ceiling(1 / nprobs * 10 * 1.1) * 10 - - # By now, the PlotCombinedMap function is included below in this file. - # In the future, PlotCombinedMap will be part of s2dverification and will + + # By now, the VizCombinedMap function is included below in this file. + # In the future, VizCombinedMap will be part of s2dverification and will # be properly imported. - PlotCombinedMap(probs * 100, lon, lat, map_select_fun = max, - display_range = c(minimum_value, 100), - map_dim = cat_dim, - bar_titles = bar_titles, - col_unknown_map = col_unknown_cat, - drawleg = drawleg, ...) + VizCombinedMap(probs * 100, lon, lat, map_select_fun = max, + display_range = c(minimum_value, 100), map_dim = cat_dim, + bar_titles = bar_titles, col_unknown_map = col_unknown_cat, + drawleg = drawleg, ...) } - -- GitLab From 15aa6082278f4763907e02c87647b14c36102b46 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 17 Jul 2025 16:12:49 +0200 Subject: [PATCH 07/17] Bigpredidata changes --- modules/Visualization/output_size.yml | 28 ++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 8d7edead..74e3f154 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -51,9 +51,10 @@ region: #units inches dot_size: 2 font.main: 1 bar_label_scale: 1 - plot_margin: !expr c(0,1 ,7.1,1) + plot_margin: !expr c(0, 1, 7.1,1) bar_extra_margin: !expr c(4,0,0,0) colNA: "white" + col_mask: "white" country.borders: TRUE extreme_probabilities: width: 7 @@ -83,8 +84,8 @@ region: #units inches axes_label_scale: 0.8 bar_label_scale: 1.2 bar_extra_margin: !expr c(2, 1, 0.5, 1) - dot_size: 1.7 - dot_symbol: 4 + dot_size: 1 + dot_symbol: 16 font.main: 1 colNA: "white" forecast_map: @@ -95,19 +96,22 @@ region: #units inches axes_label_scale: 0.8 bar_label_scale: 1.2 bar_extra_margin: !expr c(2, 1, 0.5, 1) - dot_symbol: 4 - dot_size: 1.7 + dot_symbol: 16 + dot_size: 1 font.main: 1 colNA: "white" most_likely_terciles: - width: 7.5 - height: 8 + width: 8 + height: 7 intylat: 2 intxlon: 2 - dot_size: 2 + dot_size: 1 + dot_symbol: 16 plot_margin: !expr c(2, 5, 7.5, 5) bar_extra_margin: !expr c(2.5, 1, 0.5, 1) # colorbar proportions colNA: "white" + col_mask: 'white' + country.borders: FALSE extreme_probabilities: width: 8 height: 7.5 @@ -115,11 +119,13 @@ region: #units inches intxlon: 2 axes_label_scale: 0.8 bar_label_scale: 1.2 - bar_extra_margin: !expr c(2, 1, 0.5, 1) - dot_symbol: 4 - dot_size: 1.7 + plot_margin: !expr c(2, 5, 7.5, 5) + bar_extra_margin: !expr c(2.5, 1, 0.5, 1) + dot_symbol: 16 + dot_size: 1 font.main: 1 colNA: "white" + country.borders: FALSE multipanel: forecast_map: width: 8.5 -- GitLab From 25530ec47ef7ef0bf1cd2f710495195a3a3d3041 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 17 Jul 2025 16:13:20 +0200 Subject: [PATCH 08/17] Bigpredidata testing --- operationals/bigpredidata/script_bigpredidata_oper.R | 4 ++-- operationals/test/recipe_bigpredidata_oper_subseasonal.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/operationals/bigpredidata/script_bigpredidata_oper.R b/operationals/bigpredidata/script_bigpredidata_oper.R index 5249a8a0..cec71a8b 100644 --- a/operationals/bigpredidata/script_bigpredidata_oper.R +++ b/operationals/bigpredidata/script_bigpredidata_oper.R @@ -11,8 +11,8 @@ recipe_file <- args[1] recipe <- read_atomic_recipe(recipe_file) if (recipe$Analysis$Region$name == "EU") { - recipe$Analysis$Regrid$type <- - "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20050109.nc" + recipe$Analysis$Regrid$type <- "to_system" + # "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20050109.nc" } #recipe <- prepare_outputs(recipe_file) diff --git a/operationals/test/recipe_bigpredidata_oper_subseasonal.yml b/operationals/test/recipe_bigpredidata_oper_subseasonal.yml index 45a2342a..8ae47fdf 100755 --- a/operationals/test/recipe_bigpredidata_oper_subseasonal.yml +++ b/operationals/test/recipe_bigpredidata_oper_subseasonal.yml @@ -59,7 +59,7 @@ Analysis: multi_panel: no dots: no mask_terciles: yes - shapefile: shapefiles/recintos_provinciales_inspire_peninbal_etrs89.shp + shapefile: shapefiles/bigpredidata/recintos_provinciales_inspire_peninbal_etrs89.shp file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. ncores: 16 # Optional, int: number of cores, defaults to 1 remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE -- GitLab From fe3b8defa89227811f249e4c7511fee96304b7fd Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 22 Jul 2025 15:13:41 +0200 Subject: [PATCH 09/17] Save subseasonal observations! --- modules/Saving/R/save_observations.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index b6ed4141..1ca2fc26 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -29,15 +29,22 @@ save_observations <- function(recipe, # cal = calendar) init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, '-01-01'), cal = calendar) - } else { + } else if (fcst.horizon == 'seasonal') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(recipe$Analysis$Time$sdate, + format = '%Y%m%d', cal = calendar) } syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + if (fcst.horizon == "subseasonal") { + syears_val <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end + } else { + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + } # Loop over variables for (var in 1:data_cube$dims[['var']]) { @@ -99,12 +106,11 @@ save_observations <- function(recipe, # the same name pattern. ## Because observations are loaded differently in the daily vs. monthly ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { + if (fcst.horizon %in% c('subseasonal', 'decadal')) { # init_date is like "1990-11-01" init_date <- as.POSIXct(init_date) fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) } else { - if (store.freq == "monthly_mean") { fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') -- GitLab From bf58a880cade4bd72b22785c2e62867cc8ad5d7b Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 23 Jul 2025 10:46:47 +0200 Subject: [PATCH 10/17] Formatting --- modules/Crossval/Crossval_metrics.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 0026dc0c..762bc7cd 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -279,11 +279,11 @@ Crossval_metrics <- function(recipe, for (ps in 1:length(categories)) { if (horizon == 'subseasonal') { data$probs$hcst[[ps]] <- MergeDims(data$probs$hcst[[ps]], - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) data$probs$obs[[ps]] <- MergeDims(data$probs$obs[[ps]], - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) } if ('rps' %in% requested_metrics) { rps <- RPS(exp = data$probs$hcst[[ps]], -- GitLab From 1c6dbe2a1ab98c956caf892fde33c70dd5f25c88 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 23 Jul 2025 10:47:10 +0200 Subject: [PATCH 11/17] Function to save cross-validation workflow outputs --- modules/Saving/Saving_crossval.R | 138 +++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 modules/Saving/Saving_crossval.R diff --git a/modules/Saving/Saving_crossval.R b/modules/Saving/Saving_crossval.R new file mode 100644 index 00000000..b5b0c777 --- /dev/null +++ b/modules/Saving/Saving_crossval.R @@ -0,0 +1,138 @@ +source("modules/Saving/R/get_dir.R") +source("modules/Saving/R/get_filename.R") +source("modules/Saving/R/Utils.R") +source("modules/Saving/R/save_forecast.R") +source("modules/Saving/R/save_observations.R") +source("modules/Saving/R/save_metrics.R") +source("modules/Saving/R/save_probabilities.R") +source("modules/Saving/R/save_percentiles.R") +source("modules/Saving/R/get_times.R") +source("modules/Saving/R/get_latlon.R") +source("modules/Saving/R/get_global_attributes.R") +source("modules/Saving/R/drop_dims.R") +## TODO: Remove when packages are released +source("modules/Saving/R/tmp/ArrayToNc.R") +source("modules/Saving/R/tmp/CST_SaveExp.R") + + + +Saving <- function(recipe, data, module, agg = "global") { + + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", module) + + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)), 2)})}) + + # 1. Save percentiles + limits <- list() + ## TODO: Review if + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { + for (ps in 1:length(categories)) { + limits_ps <- .drop_dims(data$cat_lims$obs[[ps]]) + for (l in 1:dim(data$cat_lims$obs[[ps]])['bin']) { + limits_subset <- ClimProjDiags::Subset(x = limits_ps, + along = "bin", + indices = l, + drop = "selected") + limits <- append(limits, list(limits_subset)) + names(limits)[length(limits)] <- + paste0("p_", as.character(categories[[ps]][l]*100)) + } + } + info(recipe$Run$logger, + "SAVING OBSERVED CATEGORY LIMITS") + save_percentiles(recipe = recipe, percentiles = limits, + data_cube = data$obs, + agg = agg, outdir = NULL) + } + + # 2. Save probabilities + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]*100) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]*100) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1]*100, + "_to_", categories[[ps]][perc]*100) + } + probs_hcst <- append(probs_hcst, + list(Subset(data$probs$hcst[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(data$probs$obs[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + if (!is.null(data$fcst)) { + probs_fcst <- append(probs_fcst, + list(Subset(data$probs$fcst[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + probs_hcst <- lapply(probs_hcst, .drop_dims) + names(probs_obs) <- all_names + probs_obs <- lapply(probs_obs, .drop_dims) + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + probs_fcst <- lapply(probs_fcst, .drop_dims) + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + ## TODO: Add hcst and obs + ## Saving only forecast probabilities for now + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$obs, agg = agg, + # type = "obs") + if (!is.null(data$fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + + # 3. Save hindcast, fcst and obs + if (recipe$Analysis$Workflow[[module]]$save != "none") { + # Save forecast + if ((recipe$Analysis$Workflow[[module]]$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow[[module]]$save %in% + c('all', 'exp_only')) { + if (dim(data$hcst$data)['sweek'] > 1) { + data$hcst <- CST_Subset(data$hcst, along = 'sweek', + indices = (dim(data$hcst$data)['week'] + 1) / 2) + } + save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') + } + # Save observations + if (recipe$Analysis$Workflow[[module]]$save == 'all') { + if (dim(data$obs$data)['sweek'] > 1) { + data$obs <- CST_Subset(data$obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + } + save_observations(recipe = recipe, data_cube = data$obs) + } + } +} -- GitLab From 95e787b9df1ab6c25964357abcb66cd10a180111 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 23 Jul 2025 10:47:38 +0200 Subject: [PATCH 12/17] Standardize outputs, compute limits even without forecast and use new saving function --- modules/Crossval/Crossval_anomalies.R | 203 ++++--------- modules/Crossval/Crossval_calibration.R | 387 ++---------------------- 2 files changed, 91 insertions(+), 499 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index d33aab80..95fe4701 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -48,14 +48,14 @@ Crossval_anomalies <- function(recipe, data) { cross <- cross[[indices]] print(paste("Crossval Index:", cross$eval.dexes)) obs_tr <- Subset(obs, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) hcst_tr <- Subset(exp, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) ## evaluation indices hcst_ev <- Subset(exp, along = 'syear', - indices = cross$eval.dexes, drop = 'selected') + indices = cross$eval.dexes, drop = 'selected') obs_ev <- Subset(obs, along = 'syear', - indices = cross$eval.dexes, drop = 'selected') + indices = cross$eval.dexes, drop = 'selected') if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), @@ -235,30 +235,39 @@ Crossval_anomalies <- function(recipe, data) { res$lims_ano_obs_tr <- lims_tmp_obs rm(lims_tmp_hcst, lims_tmp_obs) gc() - - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/", "outputs/Anomalies/") + # Subseasonal: merge sample dimensions and select central week + if (horizon %in% c('subseasonal')) { + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, + drop = 'selected') + } else { + # we need to keep original objects (to later compute bias) + hcst <- data$hcst$data + obs <- data$obs$data + fcst <- data$fcst$data + } + + # Limits calculation with full hindcast period + lims <- list() + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) # Forecast anomalies: if (!is.null(data$fcst)) { - if (horizon %in% c('subseasonal')) { - # merge sample dimensions and select central week - hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - hcst <- Subset(hcst, along = 'sweek', - indices = (dim(data$hcst$data)['sweek'] + 1) / 2) - obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - obs <- Subset(obs, along = 'sweek', - indices = (dim(data$obs$data)['sweek'] + 1) / 2) - fcst <- Subset(data$fcst$data, along = 'sday', indices = 1) - } else { - # we need to keep original objects (to later compute bias) - hcst <- data$hcst$data - obs <- data$obs$data - fcst <- data$fcst$data - } clim_hcst <- Apply(hcst, target_dims = c('syear', 'ensemble'), mean, @@ -281,36 +290,17 @@ Crossval_anomalies <- function(recipe, data) { na.rm = na.rm, ncores = ncores)$output1 obs_ano <- Ano(data = obs, clim = clim_obs) - lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { - lapply(prob_lims, function(ps) { - as.array(quantile(as.vector(x), ps, - na.rm = TRUE))})}, - output_dims = lapply(categories, function(x) {'bin'}), - prob_lims = categories, - ncores = ncores) - tmp_lims2 <- list() - for (ps in 1:length(categories)) { - tmp_lims3 <- .drop_dims(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, - along = "bin", - indices = l, - drop = "selected") - tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) - names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) - } - if (recipe$Analysis$Workflow$Probabilities$save %in% c("all", "percentiles_only")) { - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) - } - } - } - ## TODO: Should this happen here? - # Make categories rounded number to use as names: - categories <- lapply(categories, round, digits = 2) + # Percentile limits for the forecast: + lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } # Compute Probabilities fcst_probs <- lapply(categories, function(x){NULL}) @@ -348,103 +338,28 @@ Crossval_anomalies <- function(recipe, data) { ano_obs <- data$obs ano_obs$data <- res$ano_obs_ev - info(recipe$Run$logger, - "#### Anomalies and Probabilities Done #####") - if (recipe$Analysis$Workflow$Anomalies$save != 'none') { - info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") - # recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - # "/", "outputs/Anomalies/") - # Save forecast - if ((recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { - save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst', - agg = agg) - } - # Save hindcast - if (recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst', - agg = agg) - } - # Save observation - if (recipe$Analysis$Workflow$Anomalies$save == 'all') { - save_observations(recipe = recipe, data_cube = ano_obs, agg = agg) - } - } - # Save probability bins - probs_hcst <- list() - probs_fcst <- list() - probs_obs <- list() - all_names <- NULL - - for (ps in 1:length(categories)) { - for (perc in 1:(length(categories[[ps]]) + 1)) { - if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]) - } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]) - } else { - name_elem <- paste0("from_", categories[[ps]][perc-1], - "_to_", categories[[ps]][perc]) - } - probs_hcst <- append(probs_hcst, - list(Subset(hcst_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - probs_obs <- append(probs_obs, - list(Subset(obs_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - if (!is.null(data$fcst)) { - probs_fcst <- append(probs_fcst, - list(Subset(fcst_probs[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - } - all_names <- c(all_names, name_elem) - } - } - ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move - ## reorganizing and renaming to save_probabilities() - names(probs_hcst) <- all_names - probs_hcst <- lapply(probs_hcst, .drop_dims) - names(probs_obs) <- all_names - probs_obs <- lapply(probs_obs, .drop_dims) - if (!is.null(data$fcst)) { - names(probs_fcst) <- all_names - probs_fcst <- lapply(probs_fcst, .drop_dims) - } - - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - # save_probabilities(recipe = recipe, probs = probs_hcst, - # data_cube = data$hcst, agg = agg, - # type = "hcst") - # save_probabilities(recipe = recipe, probs = probs_obs, - # data_cube = data$hcst, agg = agg, - # type = "obs") - if (length(probs_fcst) > 0) { - save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, agg = agg, - type = "fcst") - } - } # Subset dimension 'sday' for subseasonal for mean bias: if (horizon == 'subseasonal') { central_day <- (dim(data$hcst$data)['sday'] + 1)/2 data$hcst <- CST_Subset(data$hcst, along = 'sday', indices = central_day) data$obs <- CST_Subset(data$obs, along = 'sday', indices = central_day) + ano_hcst <- CST_Subset(ano_hcst, along = 'sday', indices = central_day) + ano_obs <- CST_Subset(ano_obs, along = 'sday', indices = central_day) } - return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, - hcst.full_val = data$hcst, obs.full_val = data$obs, - cat_lims = list(obs = res$lims_ano_obs_tr), - probs = list(hcst = hcst_probs_ev, - obs = obs_probs_ev, - fcst = fcst_probs), - ref_obs = res$ano_obs_tr)) + + data <- list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(obs = lims), + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, + fcst = fcst_probs), + ref_obs = res$ano_obs_tr) + + # Save Crossval module outputs + source("modules/Saving/Saving_crossval.R") + Saving(recipe, data, module = "Anomalies", agg = agg) + + return(data) } diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 48f250c9..67598e1f 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -44,14 +44,14 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { cross <- cross[[indices]] print(paste("Crossval Index:", cross$eval.dexes)) obs_tr <- Subset(obs, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) hcst_tr <- Subset(exp, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) ## evaluation indices hcst_ev <- Subset(exp, along = 'syear', - indices = cross$eval.dexes) + indices = cross$eval.dexes) obs_ev <- Subset(obs, along = 'syear', - indices = cross$eval.dexes) + indices = cross$eval.dexes) if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 @@ -67,248 +67,21 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { } if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { cal_hcst_tr <- QuantileMapping(exp = hcst_tr, obs = obs_tr, - method = cal_method, - memb_dim = 'ensemble', - sdate_dim = 'syear', - window_dim = NULL, - na.rm = na.rm, qstep = 0.1, - wet.day = FALSE, - ncores = ncores) - cal_hcst_ev <- QuantileMapping(exp = hcst_tr, obs = obs_tr, - exp_cor = hcst_ev, - method = cal_method, - memb_dim = 'ensemble', + method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - wet.day = FALSE, na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, ncores = ncores) - } else { - cal_hcst_tr <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, ncores = ncores) - cal_hcst_ev <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, ncores = ncores) - } - # If precipitation is negative: - if (correct_negative) { - cal_hcst_tr[cal_hcst_tr < 0] <- 0 - cal_hcst_ev[cal_hcst_ev < 0] <- 0 - } - lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { - as.array(quantile(as.vector(x), prob_lims, - na.rm = TRUE))}, - prob_lims = unlist(categories), - ncores = ncores)$output1 - names(dim(lims_cal_hcst_tr))[1] <- 'bin' - lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), - fun = function(x, prob_lims) { - as.array(quantile(as.vector(x), prob_lims, - na.rm = TRUE))}, - prob_lims = unlist(categories), - ncores = ncores)$output1 - names(dim(lims_obs_tr))[1] <- 'bin' - if (horizon == 'subseasonal') { - cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', - new_dim_name = 'sday', - indices = rep(1:dim(data$hcst$data)['sday'], - length(cross$train.dexes))) - obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', - indices = rep(1:dim(data$hcst$data)['sday'], - length(cross$train.dexes))) - } - # check order before storing: - # Add back 'sday' dim if needed or select central 'sday' - if ('sday' %in% names(dim(cal_hcst_ev))) { - if (dim(cal_hcst_ev)['sday'] != 1) { - # To fail because it should not appear - } - } else { - cal_hcst_ev <- InsertDim(cal_hcst_ev, 'sday', len = 1, pos = 1) - } - # remove syear dim from ev objects - if ('syear' %in% names(dim(cal_hcst_ev))) { - if (dim(cal_hcst_ev)['syear'] == 1) { - cal_hcst_ev <- Subset(cal_hcst_ev, along = 'syear', indices = 1, - drop = 'selected') - } - } - # ---- - if ('sday' %in% names(dim(obs_tr))) { - if (dim(obs_tr)['sday'] > 1) { - # 'sday' dim to select the central day - obs_tr <- Subset(obs_tr, along = 'sday', - indices = central_day) - } - } else { - obs_tr <- InsertDim(obs_tr, 'sday', len = 1, pos = 1) - } - # rename dimension to be ensemble - if ('ensemble' %in% names(dim(obs_tr))) { - if (dim(obs_tr)['ensemble'] == 1) { - obs_tr <- Subset(obs_tr, along = 'ensemble', indices = 1, - drop = 'selected') - } else { - stop("why it is longer than 1?") - } - } - if ('syear' %in% names(dim(obs_tr))) { # should be true - names(dim(obs_tr))[which(names(dim(obs_tr)) == 'syear')] <- 'ensemble' - } - # ---- - if ('sday' %in% names(dim(lims_obs_tr))) { - if (dim(lims_obs_tr)['sday'] > 1) { - # 'sday' dim to select the central day - lims_obs_tr <- Subset(lims_obs_tr, along = 'sday', - indices = central_day) - } - } else { - lims_obs_tr <- InsertDim(lims_obs_tr, 'sday', len = 1, pos = 1) - } - # lims cannot have ensemble dim: - if ('ensemble' %in% names(dim(lims_obs_tr))) { - if (dim(lims_obs_tr)['ensemble'] == 1) { - lims_obs_tr <- Subset(lims_obs_tr, along = 'ensemble', - indices = 1, drop = 'selected') - } else { - # error: - stop("Why it has memb_dim") - } - } - # ---- - if ('sday' %in% names(dim(lims_cal_hcst_tr))) { - if (dim(lims_cal_hcst_tr)['sday'] > 1) { - # 'sday' dim to select the central day - lims_cal_hcst_tr <- Subset(lims_cal_hcst_tr, along = 'sday', - indices = central_day) - } - } else { - lims_cal_hcst_tr <- InsertDim(lims_cal_hcst_tr, 'sday', len = 1, pos = 1) - } - # lims cannot have ensemble dim: - if ('ensemble' %in% names(dim(lims_cal_hcst_tr))) { - if (dim(lims_cal_hcst_tr)['ensemble'] == 1) { - lims_cal_hcst_tr <- Subset(lims_cal_hcst_tr, along = 'ensemble', - indices = 1, drop = 'selected') - } else { - # error: - stop("Why it has memb_dim") - } - } - return(list(cal_hcst_ev = cal_hcst_ev, - obs_tr = obs_tr, - lims_cal_hcst_tr = lims_cal_hcst_tr, - lims_obs_tr = lims_obs_tr)) - } - - indices <- array(1:length(cross), dim = c(syear = length(cross))) - res <- Apply(data = list(indices = indices, - exp = data$hcst$data, - obs = data$obs$data), - target_dims = list(indices = NULL, - obs = names(dim(data$obs$data)), - exp = names(dim(data$hcst$data))), - fun = .cross_calibration, - ncores = 1, ## TODO: Explore options for core distribution - cross = cross, - categories = categories, - recipe = recipe) - info(recipe$Run$logger, - "#### Calibration Cross-validation loop ended #####") - # Rearrange limits into a list: - lims_tmp_hcst <- list() - lims_tmp_obs <- list() - last_index <- 0 - for (ps in 1:length(categories)) { - indices <- last_index + 1:length(categories[[ps]]) - lims_category_hcst <- Subset(res$lims_cal_hcst_tr, along = "bin", - indices = list(indices), - drop = FALSE) - lims_tmp_hcst[[ps]] <- lims_category_hcst - lims_category_obs <- Subset(res$lims_obs_tr, along = "bin", - indices = list(indices), - drop = FALSE) - lims_tmp_obs[[ps]] <- lims_category_obs - last_index <- indices[length(indices)] - } - res$lims_cal_hcst_tr <- lims_tmp_hcst - res$lims_obs_tr <- lims_tmp_obs - rm(lims_tmp_hcst, lims_tmp_obs) - gc() - - # Forecast calibration: - lims <- list() - if (!is.null(data$fcst)) { - if (horizon %in% c('subseasonal')) { - # merge sample dimensions and select central week - hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - hcst <- Subset(hcst, along = 'sweek', - indices = (dim(data$hcst$data)['sweek'] + 1) / 2) - obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - obs <- Subset(obs, along = 'sweek', - indices = (dim(data$obs$data)['sweek'] + 1) / 2) - fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, - drop = 'selected') - } else { - # we need to keep original objects (to later compute bias) - hcst <- data$hcst$data - obs <- data$obs$data - fcst <- data$fcst$data - } - if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { - data$fcst$data <- QuantileMapping(exp = hcst, - obs = obs, - exp_cor = fcst, - method = cal_method, - memb_dim = 'ensemble', - sdate_dim = 'syear', - window_dim = NULL, - na.rm = na.rm, qstep = 0.1, - wet.day = FALSE, - ncores = ncores) - hcst_cal <- QuantileMapping(exp = hcst, - obs = obs, - method = cal_method, - memb_dim = 'ensemble', - sdate_dim = 'syear', - window_dim = NULL, - wet.day = FALSE, - na.rm = na.rm, qstep = 0.1, - ncores = ncores) - } else { - data$fcst$data <- CSTools::Calibration(exp = hcst, - obs = obs, - exp_cor = fcst, - cal.method = cal_method, - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) - hcst_cal <- CSTools::Calibration(exp = hcst, - obs = obs, - cal.method = cal_method, - eval.method = 'in-sample', - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) - } + cal_hcst_ev <- QuantileMapping(exp = hcst_tr, obs = obs_tr, + exp_cor = hcst_ev, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', +???LINES MISSING +???LINES MISSING +???LINES MISSING if (!('sday' %in% names(dim(data$fcst$data)))) { data$fcst$data <- InsertDim(data$fcst$data, len = 1, pos = 3, name = 'sday') @@ -317,7 +90,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { hcst_cal[hcst_cal < 0] <- 0 data$fcst$data[data$fcst$data < 0] <- 0 } - # Terciles limits using the whole hindcast period: + # Percentile limits for the forecast: lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { @@ -326,32 +99,14 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) - lims <- Apply(obs, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { - lapply(prob_lims, function(ps) { - as.array(quantile(as.vector(x), ps, - na.rm = TRUE))})}, - output_dims = lapply(categories, function(x) {'bin'}), - prob_lims = categories, - ncores = ncores) } - tmp_lims2 <- list() - # SAVING 'lims' which are the observed category limits: - # TODO saving: - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", - "outputs/Calibration/") - - # Make categories rounded number to use as names: - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) - + ## TODO: Function? # Compute Probabilities hcst_probs_ev <- list() obs_probs_ev <- list() fcst_probs <- list() + for (ps in 1:length(categories)) { hcst_probs_ev[[ps]] <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', prob_thresholds = NULL, @@ -379,118 +134,40 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { ncores = ncores) } } - if (recipe$Analysis$Workflow$Probabilities$save == 'all') { - for (ps in 1:length(categories)) { - ## TODO: use .drop_dims() - tmp_lims3 <- .drop_dims(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, - along = "bin", - indices = l, - drop = "selected") - tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) - names(tmp_lims2)[length(tmp_lims2)] <- - paste0("p_", as.character(categories[[ps]][l]*100)) - } - } - info(recipe$Run$logger, - "SAVING OBSERVED CATEGORY LIMITS") - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) - #} - } # Convert to s2dv_cubes the resulting calibrated hcst <- data$hcst hcst$data <- res$cal_hcst_ev + # Subseasonal case: keep only the central sday position from the dimension if (horizon == 'subseasonal') { - # Keep only de central sday position from the dimension if (dim(hcst$data)['sday'] > 1) { hcst <- CST_Subset(hcst, along = 'sday', indices = (dim(hcst$data)['sday'] + 1) / 2) } if (dim(data$obs$data)['sday'] > 1) { data$obs <- CST_Subset(data$obs, along = 'sday', - indices = (dim(data$obs$data)['sday'] + 1) / 2) + indices = (dim(data$obs$data)['sday'] + 1) / 2) } if (dim(data$hcst$data)['sday'] > 1) { data$hcst <- CST_Subset(data$hcst, along = 'sday', - indices = (dim(data$hcst$data)['sday'] + 1) / 2) + indices = (dim(data$hcst$data)['sday'] + 1) / 2) } } - info(recipe$Run$logger, - "#### Calibrated and Probabilities Done #####") - if (recipe$Analysis$Workflow$Calibration$save != FALSE) { - info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") - # Save forecast - if ((recipe$Analysis$Workflow$Calibration$save %in% - c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { - save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - } - } - # Save probability bins - probs_hcst <- list() - probs_fcst <- list() - probs_obs <- list() - all_names <- NULL - for (ps in 1:length(categories)) { - for (perc in 1:(length(categories[[ps]]) + 1)) { - if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]*100) - } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]*100) - } else { - name_elem <- paste0("from_", categories[[ps]][perc-1]*100, - "_to_", categories[[ps]][perc]*100) - } - probs_hcst <- append(probs_hcst, - list(Subset(hcst_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - probs_obs <- append(probs_obs, - list(Subset(obs_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - if (!is.null(data$fcst)) { - probs_fcst <- append(probs_fcst, - list(Subset(fcst_probs[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - } - all_names <- c(all_names, name_elem) - } - } - ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move - ## reorganizing and renaming to save_probabilities() - names(probs_hcst) <- all_names - probs_hcst <- lapply(probs_hcst, .drop_dims) - names(probs_obs) <- all_names - probs_obs <- lapply(probs_obs, .drop_dims) - if (!is.null(data$fcst)) { - names(probs_fcst) <- all_names - probs_fcst <- lapply(probs_fcst, .drop_dims) - } - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, agg = agg, - type = "fcst") - } + data <- list(hcst = hcst, obs = data$obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(obs = lims), + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, + fcst = fcst_probs), + ref_obs = res$obs_tr) - return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, - hcst.full_val = data$hcst, obs.full_val = data$obs, - cat_lims = list(obs = lims), - probs = list(hcst = hcst_probs_ev, - obs = obs_probs_ev, - fcst = fcst_probs), - ref_obs = res$obs_tr)) + # Save Crossval module outputs + source("modules/Saving/Saving_crossval.R") + Saving(recipe, data, module = "Calibration", agg = agg) + return(data) } # Outputs: -- GitLab From a850538c4bf5ee24fb8924b026aaa08bc2de8e2b Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 23 Jul 2025 12:02:47 +0200 Subject: [PATCH 13/17] Restore Crossval_calibration() --- modules/Crossval/Crossval_calibration.R | 245 +++++++++++++++++++++++- 1 file changed, 242 insertions(+), 3 deletions(-) diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 67598e1f..2aef46fc 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -79,9 +79,248 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', -???LINES MISSING -???LINES MISSING -???LINES MISSING + window_dim = NULL, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, + ncores = ncores) + } else { + cal_hcst_tr <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + cal_hcst_ev <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + } + # If precipitation is negative: + if (correct_negative) { + cal_hcst_tr[cal_hcst_tr < 0] <- 0 + cal_hcst_ev[cal_hcst_ev < 0] <- 0 + } + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + as.array(quantile(as.vector(x), prob_lims, + na.rm = TRUE))}, + prob_lims = unlist(categories), + ncores = ncores)$output1 + names(dim(lims_cal_hcst_tr))[1] <- 'bin' + lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + as.array(quantile(as.vector(x), prob_lims, + na.rm = TRUE))}, + prob_lims = unlist(categories), + ncores = ncores)$output1 + names(dim(lims_obs_tr))[1] <- 'bin' + if (horizon == 'subseasonal') { + cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross$train.dexes))) + obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross$train.dexes))) + } + # check order before storing: + # Add back 'sday' dim if needed or select central 'sday' + if ('sday' %in% names(dim(cal_hcst_ev))) { + if (dim(cal_hcst_ev)['sday'] != 1) { + # To fail because it should not appear + } + } else { + cal_hcst_ev <- InsertDim(cal_hcst_ev, 'sday', len = 1, pos = 1) + } + # remove syear dim from ev objects + if ('syear' %in% names(dim(cal_hcst_ev))) { + if (dim(cal_hcst_ev)['syear'] == 1) { + cal_hcst_ev <- Subset(cal_hcst_ev, along = 'syear', indices = 1, + drop = 'selected') + } + } + # ---- + if ('sday' %in% names(dim(obs_tr))) { + if (dim(obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + obs_tr <- Subset(obs_tr, along = 'sday', + indices = central_day) + } + } else { + obs_tr <- InsertDim(obs_tr, 'sday', len = 1, pos = 1) + } + # rename dimension to be ensemble + if ('ensemble' %in% names(dim(obs_tr))) { + if (dim(obs_tr)['ensemble'] == 1) { + obs_tr <- Subset(obs_tr, along = 'ensemble', indices = 1, + drop = 'selected') + } else { + stop("why it is longer than 1?") + } + } + if ('syear' %in% names(dim(obs_tr))) { # should be true + names(dim(obs_tr))[which(names(dim(obs_tr)) == 'syear')] <- 'ensemble' + } + # ---- + if ('sday' %in% names(dim(lims_obs_tr))) { + if (dim(lims_obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_obs_tr <- Subset(lims_obs_tr, along = 'sday', + indices = central_day) + } + } else { + lims_obs_tr <- InsertDim(lims_obs_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_obs_tr))) { + if (dim(lims_obs_tr)['ensemble'] == 1) { + lims_obs_tr <- Subset(lims_obs_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } + # ---- + if ('sday' %in% names(dim(lims_cal_hcst_tr))) { + if (dim(lims_cal_hcst_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_cal_hcst_tr <- Subset(lims_cal_hcst_tr, along = 'sday', + indices = central_day) + } + } else { + lims_cal_hcst_tr <- InsertDim(lims_cal_hcst_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_cal_hcst_tr))) { + if (dim(lims_cal_hcst_tr)['ensemble'] == 1) { + lims_cal_hcst_tr <- Subset(lims_cal_hcst_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } + return(list(cal_hcst_ev = cal_hcst_ev, + obs_tr = obs_tr, + lims_cal_hcst_tr = lims_cal_hcst_tr, + lims_obs_tr = lims_obs_tr)) + } + + indices <- array(1:length(cross), dim = c(syear = length(cross))) + res <- Apply(data = list(indices = indices, + exp = data$hcst$data, + obs = data$obs$data), + target_dims = list(indices = NULL, + obs = names(dim(data$obs$data)), + exp = names(dim(data$hcst$data))), + fun = .cross_calibration, + ncores = 1, ## TODO: Explore options for core distribution + cross = cross, + categories = categories, + recipe = recipe) + info(recipe$Run$logger, + "#### Calibration Cross-validation loop ended #####") + # Rearrange limits into a list: + lims_tmp_hcst <- list() + lims_tmp_obs <- list() + last_index <- 0 + for (ps in 1:length(categories)) { + indices <- last_index + 1:length(categories[[ps]]) + lims_category_hcst <- Subset(res$lims_cal_hcst_tr, along = "bin", + indices = list(indices), + drop = FALSE) + lims_tmp_hcst[[ps]] <- lims_category_hcst + lims_category_obs <- Subset(res$lims_obs_tr, along = "bin", + indices = list(indices), + drop = FALSE) + lims_tmp_obs[[ps]] <- lims_category_obs + last_index <- indices[length(indices)] + } + res$lims_cal_hcst_tr <- lims_tmp_hcst + res$lims_obs_tr <- lims_tmp_obs + rm(lims_tmp_hcst, lims_tmp_obs) + gc() + + if (horizon %in% c('subseasonal')) { + # merge sample dimensions and select central week + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, + drop = 'selected') + } else { + # we need to keep original objects (to later compute bias) + hcst <- data$hcst$data + obs <- data$obs$data + fcst <- data$fcst$data + } + + # Limits calculation with full hindcast period + lims <- list() + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + + # Forecast calibration + if (!is.null(data$fcst)) { + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + data$fcst$data <- QuantileMapping(exp = hcst, + obs = obs, + exp_cor = fcst, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, + ncores = ncores) + hcst_cal <- QuantileMapping(exp = hcst, + obs = obs, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, + ncores = ncores) + } else { + data$fcst$data <- CSTools::Calibration(exp = hcst, + obs = obs, + exp_cor = fcst, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- CSTools::Calibration(exp = hcst, + obs = obs, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + } + if (!('sday' %in% names(dim(data$fcst$data)))) { data$fcst$data <- InsertDim(data$fcst$data, len = 1, pos = 3, name = 'sday') -- GitLab From 05803d25459590f3aeaecd37a9664e266bde51cf Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 23 Jul 2025 12:03:15 +0200 Subject: [PATCH 14/17] Add saving_metrics() --- modules/Saving/Saving_crossval.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/Saving/Saving_crossval.R b/modules/Saving/Saving_crossval.R index b5b0c777..eb025f2a 100644 --- a/modules/Saving/Saving_crossval.R +++ b/modules/Saving/Saving_crossval.R @@ -16,8 +16,9 @@ source("modules/Saving/R/tmp/CST_SaveExp.R") -Saving <- function(recipe, data, module, agg = "global") { +Saving <- function(recipe, data, module, metrics = NULL, agg = "global") { + original_output_dir <- recipe$Run$output_dir recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", module) # Make categories rounded number to use as names: @@ -55,6 +56,7 @@ Saving <- function(recipe, data, module, agg = "global") { probs_obs <- list() all_names <- NULL + # Convert arrays to named list of lists for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { @@ -135,4 +137,13 @@ Saving <- function(recipe, data, module, agg = "global") { save_observations(recipe = recipe, data_cube = data$obs) } } + + # 4. Save metrics + if (!is.null(metrics)) { + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", "Skill") + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$obs, agg = agg, + outdir = recipe$Run$output_dir) + recipe$Run$output_dir <- original_output_dir } -- GitLab From 3f6ea086af948bd56480a88cff9b420280f906af Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 23 Jul 2025 12:59:22 +0200 Subject: [PATCH 15/17] Fix pipeline --- modules/Saving/R/save_observations.R | 2 +- modules/Saving/Saving_crossval.R | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 1ca2fc26..301f7b76 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -34,7 +34,7 @@ save_observations <- function(recipe, recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } else if (fcst.horizon == 'subseasonal') { - init_date <- as.PCICt(recipe$Analysis$Time$sdate, + init_date <- as.PCICt(as.character(recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) } diff --git a/modules/Saving/Saving_crossval.R b/modules/Saving/Saving_crossval.R index eb025f2a..693c698a 100644 --- a/modules/Saving/Saving_crossval.R +++ b/modules/Saving/Saving_crossval.R @@ -146,4 +146,5 @@ Saving <- function(recipe, data, module, metrics = NULL, agg = "global") { data_cube = data$obs, agg = agg, outdir = recipe$Run$output_dir) recipe$Run$output_dir <- original_output_dir + } } -- GitLab From 54caedbbc4507071d9b30746aaa86c5307635967 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 25 Jul 2025 12:42:21 +0200 Subject: [PATCH 16/17] Bugfix in saving_crossval --- modules/Saving/Saving_crossval.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Saving/Saving_crossval.R b/modules/Saving/Saving_crossval.R index 693c698a..edd48351 100644 --- a/modules/Saving/Saving_crossval.R +++ b/modules/Saving/Saving_crossval.R @@ -140,11 +140,11 @@ Saving <- function(recipe, data, module, metrics = NULL, agg = "global") { # 4. Save metrics if (!is.null(metrics)) { - recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", "Skill") + recipe$Run$output_dir <- file.path(original_output_dir, "outputs", "Skill") save_metrics(recipe = recipe, metrics = skill_metrics, data_cube = data$obs, agg = agg, outdir = recipe$Run$output_dir) - recipe$Run$output_dir <- original_output_dir } + recipe$Run$output_dir <- original_output_dir } -- GitLab From e2a347b4262452aaf2b4e2fbdfd7c817d158265c Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 25 Jul 2025 12:44:39 +0200 Subject: [PATCH 17/17] Save data dimensions always in (time, latlon) order --- modules/Saving/R/save_forecast.R | 4 ++-- modules/Saving/R/save_metrics.R | 4 ++-- modules/Saving/R/save_observations.R | 4 ++-- modules/Saving/R/save_percentiles.R | 4 ++-- modules/Saving/R/save_probabilities.R | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index d7fe542d..807a6bcb 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -141,14 +141,14 @@ save_forecast <- function(recipe, ArrayToNc(append(country, times, fcst), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, fcst) + vars <- c(times, region, fcst) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, fcst) + vars <- c(times, latlon, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index d5fe91bd..42aa5a67 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -162,14 +162,14 @@ save_metrics <- function(recipe, ArrayToNc(append(country, times$time, subset_metric), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, subset_metric) + vars <- c(times, region, subset_metric) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_metric) + vars <- c(times, latlon, subset_metric) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 301f7b76..b357d5c3 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -141,14 +141,14 @@ save_observations <- function(recipe, ArrayToNc(append(country, times$time, fcst), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, fcst) + vars <- c(times, region, fcst) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, fcst) + vars <- c(times, latlon, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index d5b4ae61..905be2fb 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -120,14 +120,14 @@ save_percentiles <- function(recipe, ArrayToNc(append(country, time, subset_percentiles), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, subset_percentiles) + vars <- c(times, region, subset_percentiles) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_percentiles) + vars <- c(times, latlon, subset_percentiles) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index e3df5a1e..1f6a597a 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -124,14 +124,14 @@ save_probabilities <- function(recipe, ## TODO: Move this segment outside of the loop if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, probs_syear) + vars <- c(times, region, probs_syear) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, probs_syear) + vars <- c(times, latlon, probs_syear) ArrayToNc(vars, outfile) } } -- GitLab