From 9ad2ee0f3dc8f12de4c849f42cb22ebd60f3f73c Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 20 Jun 2025 12:58:10 +0200 Subject: [PATCH 1/8] Fix region dim order --- modules/Saving/R/save_metrics.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 725135f9..5b06d335 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -99,7 +99,7 @@ save_metrics <- function(recipe, ## the code extra_string <- get_filename("", recipe, variable, fcst.sdate, agg, names(subset_metric)[[i]]) - SaveExp(data = subset_metric[[i]], destination = outdir, + SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, coords = list(latitude = data_cube$coords[['latitude']], longitude = data_cube$coords[['longitude']]), @@ -117,13 +117,19 @@ save_metrics <- function(recipe, # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { subset_metric <- lapply(subset_metric, function(x) { - Reorder(x, c(lalo, 'time'))}) + Reorder(x, c(lalo, 'time'))}) + } else { + subset_metric <- lapply(subset_metric, function(x) { + Reorder(x, c("region", "time"))}) } attr(subset_metric[[1]], 'global_attrs') <- global_attributes for (i in 1:length(subset_metric)) { metric <- names(subset_metric[i]) - long_name <- dictionary$metrics[[metric]]$long_name + long_name <- tryCatch( + expr = {dictionary$metrics[[metric]]$long_name}, + error = function(e) {NULL} + ) missing_val <- -9.e+33 subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val if (tolower(agg) == "country") { -- GitLab From 191c0967abc36fa4a7232b0c82fcc971d1d0a62a Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 20 Jun 2025 14:29:38 +0200 Subject: [PATCH 2/8] Improve scorecards code (WIP) --- modules/Scorecards/R/tmp/LoadMetrics.R | 36 +++--- modules/Scorecards/R/tmp/LoadMetrics2.R | 109 +++++++++++++++++++ modules/Scorecards/Scorecards_calculations.R | 88 +++++++++------ modules/Scorecards/Scorecards_plotting.R | 32 ++++-- recipe_bigpredidata_oper.yml | 84 ++++++++++++++ recipe_ecvs_ano_seas.yml | 14 +-- script_bigpredidata_oper.R | 37 +++++++ 7 files changed, 337 insertions(+), 63 deletions(-) create mode 100644 modules/Scorecards/R/tmp/LoadMetrics2.R create mode 100644 recipe_bigpredidata_oper.yml create mode 100644 script_bigpredidata_oper.R diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index e49132f6..aee359cb 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -105,8 +105,8 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, start_months = start_months, calib_method = calib_method, data_type = data_type) - - if (sys == 1 && ref == 1){ + + if (sys == 1 && ref == 1) { all_metrics <- array(data = NA, dim = c('system'= length(system), 'reference'=length(reference), @@ -154,20 +154,28 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, # Loop for file dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) - - array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { - if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = data_type, unlist = T, - drop_var_dim = T) - names(dim(res)) <- NULL - } else { - res <- array(dim = c(length(data_type), allfiledims[-1,1])) - names(dim(res)) <- NULL - } - res})$output1 - + + array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', + fun = function(x) { + if (file.exists(x)) { + res <- easyNCDF::NcToArray(x, vars_to_read = data_type, unlist = T, + drop_var_dim = T) + names(dim(res)) <- NULL + } else { + res <- array(dim = c(length(data_type), allfiledims[-1,1])) + names(dim(res)) <- NULL + } + res + })$output1 + + # Retrieve list of units and metrics + file <- nc_open(allfiles[1]) + metrics <- ncatt_get(nc = file, varid = "metric", attname = "names")$value + regions <- ncatt_get(nc = file, varid = "region", attname = "names")$value dim(array_met_by_sdate) <- c(allfiledims[-1,1], sdate = length(allfiles)) + attributes(array_met_by_sdate)$metrics <- strsplit(metrics, ", ")[[1]] + attributes(array_met_by_sdate)$regions <- strsplit(regions, ", ")[[1]] return(array_met_by_sdate) } diff --git a/modules/Scorecards/R/tmp/LoadMetrics2.R b/modules/Scorecards/R/tmp/LoadMetrics2.R new file mode 100644 index 00000000..eb7ae360 --- /dev/null +++ b/modules/Scorecards/R/tmp/LoadMetrics2.R @@ -0,0 +1,109 @@ +#' Scorecards load metrics from verification suite +#' +#'@description Scorecards function to load saved data files +#' +#'@param system A vector of character strings defining the names of the +#' system names following the archive.yml format from verification suite. +#' Accepted system names: 'ECMWF-SEAS5', 'DWD-GFCS2.1', 'CMCC-SPS3.5', +#' 'ecmwfs5','Meteo-France-System 7', 'UK-MetOffice-GloSea600', 'NCEP-CFSv2'. +#'@param reference A vector of character strings defining the names of +#' the references following the archive.yml format from verification suite +#' Pending to be test with more than one. The accepted names are: 'era5'. +#'@param var A character string following the format from +#' variable-dictionary.yml from verification suite (TO DO: multiple variables). +#' The accepted names are: 'psl', 'tas', 'sfcWind', 'prlr'. +#'@param period A character string indicating the start and end years of the +#' reference period (e.g. '1993-203') +#'@param start_months A vector indicating the numbers of the start months +#'@param input_path A character string indicating the path where metrics output +#' files from verification suite are saved (or any other compatible files) +#' +#'@return A is a list by system and reference containing an array of with +#' the following dimensions: longitude, latitude, forecast months, metrics, +#' start dates. + +#'@examples +#'\dontrun{ +#'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), +#' reference. = 'ERA5', +#' var = 'tas', +#' period = '1993-2016' +#' start_months = sprintf("%02d", 1:12), +#' calib_method = 'raw', +#' input_path = './scorecards_data/input_data') +#'} +#'@import startR +#'@export + +LoadMetrics2 <- function(input_path, system, reference, var, period, data_type, + start_months, calib_method = NULL) { + + # Initial checks + ## system + if (!is.character(system)) { + stop("Parameter 'system' must be a character vector with the system names.") + } + ## reference + if (!is.character(reference)) { + stop("Parameter 'reference' must be a character vector with the reference ", + "names.") + } + ## var + if (!is.character(var)) { + stop("Parameter 'var' must be a character vector with the var ", + "names.") + } + if (length(var) > 1) { + warning("Parameter 'var' must be of length one. Only the first value ", + "will be used.") + var <- var[1] + } + ## start_months + if (is.character(start_months)) { + warning("Parameter 'start_months' must be a numeric vector indicating ", + "the starting months.") + start_months <- as.numeric(start_months) + } + if (!is.numeric(start_months)) { + stop("Parameter 'start_months' must be a numeric vector indicating ", + "the starting months.") + } + start_months <- sprintf("%02d", start_months) + ## input_path + if (!is.character(input_path)) { + stop("Parameter 'input_path must be a character string.") + } + if (length(input_path) > 1) { + input_path <- input_path[1] + warning("Parameter 'input_path' has length greater than 1 and only the ", + "first element will be used.") + } + + ## Remove . from names + system <- gsub('.','', system, fixed = T) + reference <- gsub('.','', reference, fixed = T) + + # Load data + ## Outer (file) dimensions: var, sdate (month), system, reference + ## Inner dimensions: time, region + path_pattern <- file.path("$system$", "$reference$", calib_method, var, + paste0("scorecards_", "$system$", "_", + "$reference$", "_", var, "_$var$_", + period, "_s$sdate$.nc")) + metrics <- Start(dat = file.path(input_path, path_pattern), + var = c("rpss", "rpss-Terciles"), + system = system, + reference = reference, + sdate = start_months, + time = "all", + region = "all", + metadata_dims = "var", + return_vars = list(time = NULL, + region = NULL), + retrieve = TRUE) + + metrics <- Subset(metrics, along = "dat", + indices = 1, drop = "selected") + return(metrics) + +} diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index fbd03154..f87b849b 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -18,6 +18,15 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else if (!is.list(skill_metrics)) { stop("'skill_metrics' should be a named list with metric arrays") } + # skill_metrics$ + if (!is.null(statistics)) { + warning("Parameter 'statistics' will be deprecated, use 'skill_metrics' ", + "as a single list instead.") + skill_metrics <- append(skill_metrics, statistics) + } + metrics_to_remove <- names(skill_metrics)[grep("_significance|metadata|_individual_members", + names(skill_metrics))] + skill_metrics <- skill_metrics[!names(skill_metrics) %in% metrics_to_remove] # Assign parameters ncores <- recipe$Analysis$ncores if (is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)) { @@ -77,12 +86,9 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, names(dim(aggr_metrics)) <- c("time", "region", "metric") } else { ## Define arrays to filled with data - aggr_metrics <- array(data = NA, - dim = c(var = length(var), - time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - + aggr_metrics <- list() + aggr_metrics_names <- c() + lon_dim <- 'longitude' lat_dim <- 'latitude' time_dim <- 'syear' @@ -93,31 +99,41 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Skill aggregation if (metric.aggregation == 'skill') { ## Calculate weighted mean of spatial aggregation - for (met in metrics) { - result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(result))[length(dim(result))] <- 'region' + for (met in (names(skill_metrics))) { + if (strsplit(met, split = "-", fixed = TRUE)[[1]][1] %in% metrics) { + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(result))[length(dim(result))] <- 'region' - if (met =='crpss' && inf.to.na == TRUE) { - result[result == -Inf] <- NA + if (met == 'crpss' && inf.to.na == TRUE) { + result[result == -Inf] <- NA + } + aggr_metrics <- append(aggr_metrics, + list(Reorder(data = result, + order = c('var', 'time', 'region')))) + aggr_metrics_names <- c(aggr_metrics_names, met) + names(aggr_metrics) <- aggr_metrics_names } - aggr_metrics[,,,which(metrics == met)] <- Reorder(data = result, - order = c('var','time', 'region')) } ## close on met + ## TODO + # aggr_metrics <- abind(aggr_metrics, along = 0) + # names(dim(aggr_metrics)) <- c("metric", "var", "time", "region") + # metrics <- aggr_metrics_names } ## close if on skill + ## TODO: Review and modify ## Score Aggregation if (metric.aggregation == 'score') { ## Spatially aggregate data for each metric - for (met in metrics) { - if (met == 'rpss') { + for (met in metrics) { + if (met == "rpss" || grep('rpss-', met)) { rps_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$rps_syear, @@ -214,7 +230,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else if (met == 'enscorr') { cov <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$cov, + WeightedMean(data = skill_metrics$cov, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, @@ -222,7 +238,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, }, simplify = 'array') n_eff <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$n_eff, + WeightedMean(data = skill_metrics$n_eff, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, @@ -230,8 +246,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, }, simplify = 'array') ## Calculate variance from standard deviation - var_hcst <- (statistics$std_hcst)^2 - var_obs <- (statistics$std_obs)^2 + var_hcst <- (skill_metrics$std_hcst)^2 + var_obs <- (skill_metrics$std_obs)^2 var_hcst <- sapply(X = 1:length(regions), FUN = function(X) { @@ -369,7 +385,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Calculate metric spread <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$spread, + WeightedMean(data = skill_metrics$spread, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, @@ -439,13 +455,21 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, attributes(aggr_metrics)$system.name <- system attributes(aggr_metrics)$reference.name <- reference - aggr_scorecards <- list(aggr_metrics = aggr_metrics, - aggr_significance = aggr_significance) - + # aggr_scorecards <- list(aggr_metrics = aggr_metrics, + # aggr_significance = aggr_significance) + # Metadata for saving + ## TODO: Significance + data$hcst$attrs$Variable$metadata$region <- names(regions) + attributes(data$hcst$attrs$Variable$metadata$region)$variables <- + list(region = list(names = paste(names(regions), collapse = ", "))) + # data$hcst$attrs$Variable$metadata$metric <- metrics + # attributes(data$hcst$attrs$Variable$metadata$metric)$variables <- + # list(metric = list(names = paste(metrics, collapse = ", "))) + ## Save metric data arrays recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", "Scorecards/") - save_metrics(recipe = recipe, metrics = aggr_scorecards, + save_metrics(recipe = recipe, metrics = aggr_metrics, data_cube = data$hcst, agg = 'region', module = "scorecards") } diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index 0e801644..730a6cef 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -129,13 +129,24 @@ Scorecards_plotting <- function(recipe) { ####### Load data files ####### aggregated_metrics <- LoadMetrics(input_path = input.path, - system = system, - reference = reference, - var = var, - data_type = "aggr_metrics", - period = period, - start_months = as.numeric(start.months), - calib_method = calib.method) + system = system, + reference = reference, + var = var, + data_type = "aggr_metrics", + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + + ## source("modules/Scorecards/R/tmp/LoadMetrics2.R") + ## aggregated_metrics2 <- LoadMetrics2(input_path = input.path, + ## system = system, + ## reference = reference, + ## var = var, + ## data_type = "aggr_metrics", + ## period = period, + ## start_months = as.numeric(start.months), + ## calib_method = calib.method) + ## attributes(aggregated_metrics2)$metrics <- attr(aggregated_metrics2, "FileSelectors")$dat1$var[[1]] attributes(aggregated_metrics)$metrics <- metrics @@ -151,6 +162,7 @@ Scorecards_plotting <- function(recipe) { aggregated_significance <- aggregated_significance == 1 + ## TODO: Separate significance and normal metrics ####### PLOT SCORECARDS ########## ## Create simple scorecard tables @@ -166,7 +178,7 @@ Scorecards_plotting <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = names(regions), + region.names = names(regions), ## TODO: Change this metrics = metrics, table.label = table.label, fileout.label = fileout.label, @@ -197,7 +209,7 @@ Scorecards_plotting <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = names(regions), + region.names = names(regions), ## TODO: Change this metrics = metrics, table.label = table.label, fileout.label = fileout.label, @@ -226,7 +238,7 @@ Scorecards_plotting <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = attributes(regions)$names, + region.names = attributes(regions)$names, ## TODO: Change this metrics = metrics, table.label = table.label, fileout.label = fileout.label, diff --git a/recipe_bigpredidata_oper.yml b/recipe_bigpredidata_oper.yml new file mode 100644 index 00000000..04ba65da --- /dev/null +++ b/recipe_bigpredidata_oper.yml @@ -0,0 +1,84 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + # - {name: sfcWind, freq: monthly_mean} + # - {name: rsds, freq: monthly_mean} + Datasets: + System: + - {name: ECMWF-SEAS5.1} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: 2025 # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Calibration: + method: bias # Mandatory, str: bias, evmos, mse_min, crps_min, rpc_based + cross_validation: yes + save: none + Skill: + metric: crpss rpss + save: all + cross_validation: yes + Probabilities: + percentiles: + Terciles: [1/3, 2/3] + P10: [1/10] + P90: [9/10] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics, most_likely_terciles, extreme_probabilities + NA_color: white + multi_panel: no + dots: no + mask_terciles: yes + shapefile: /esarchive/scratch/cdelgado/aspect_outputs/casestudy-wine/shp_spain/recintos_provinciales_inspire_peninbal_etrs89/recintos_provinciales_inspire_peninbal_etrs89.shp + file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /home/Earth/vagudets/oldhome/git/sunset/ # replace with the directory where your code is + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: script_bigpredidata_oper.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 04:00 # hh:mm + processors_per_job: 32 + custom_directives: ['#SBATCH --exclusive'] + platform: nord4 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipe_ecvs_ano_seas.yml b/recipe_ecvs_ano_seas.yml index 890f5366..ff288f46 100644 --- a/recipe_ecvs_ano_seas.yml +++ b/recipe_ecvs_ano_seas.yml @@ -11,8 +11,8 @@ Analysis: System: #- {name: 'Meteo-France-System8'} #- {name: 'CMCC-SPS3.5'} - - {name: 'ECMWF-SEAS5.1'} - #- {name: 'UK-MetOffice-Glosea603'} + - {name: 'ECMWF-SEAS5.1'} + # - {name: 'UK-MetOffice-Glosea602'} ##- {name: 'NCEP-CFSv2'} #- {name: 'DWD-GCFS2.1'} #- {name: 'ECCC-GEM5.2-NEMO'} @@ -24,15 +24,15 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '1201' - fcst_year: '2024' + sdate: '0101' + fcst_year: # '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} - #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + # - {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: to_system @@ -73,7 +73,7 @@ Analysis: Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} start_months: NULL - metric: mean_bias enscorr rpss crpss EnsSprErr + metric: rpss metric_aggregation: 'skill' #inf_to_na: yes table_label: NULL diff --git a/script_bigpredidata_oper.R b/script_bigpredidata_oper.R new file mode 100644 index 00000000..3756a442 --- /dev/null +++ b/script_bigpredidata_oper.R @@ -0,0 +1,37 @@ +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper_test_sfcWind.yml" +#recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) + +source("modules/Crossval/Crossval_anomalies.R") +data <- Crossval_anomalies(recipe = recipe, data = data) + +# Save calibrated hindcast, forecast and probabilities + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = data, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, probabilities = data$probs) +#if (!is.null(data$fcst)) { +# # Required to plot a forecast: +# data$fcst <- res$fcst +# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, +# significance = TRUE, probabilities = res$probs) +#} else { +# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, +# significance = TRUE) +#} + -- GitLab From ba33a06d5e616e0c2e2ced96374a7180fcf370a7 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 20 Jun 2025 17:15:24 +0200 Subject: [PATCH 3/8] Use Start() in LoadMetrics(); modify Scorecards plotting functions accordingly, save significance in Scorecards_calculations() --- modules/Scorecards/R/tmp/LoadMetrics.R | 124 ++++-------------- modules/Scorecards/R/tmp/LoadMetrics2.R | 109 --------------- modules/Scorecards/R/tmp/ScorecardsMulti.R | 28 ++-- modules/Scorecards/R/tmp/ScorecardsSingle.R | 48 +++---- .../Scorecards/R/tmp/ScorecardsSystemDiff.R | 24 ++-- modules/Scorecards/Scorecards_calculations.R | 67 +++++----- modules/Scorecards/Scorecards_plotting.R | 22 +--- 7 files changed, 115 insertions(+), 307 deletions(-) delete mode 100644 modules/Scorecards/R/tmp/LoadMetrics2.R diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index aee359cb..cad2bb51 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -26,17 +26,17 @@ #'\dontrun{ #'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference. = 'ERA5', -#' var = 'tas', +#' var = 'tas', #' period = '1993-2016' +#' metric_names = c('rpss', 'crpss'), #' start_months = sprintf("%02d", 1:12), #' calib_method = 'raw', #' input_path = './scorecards_data/input_data') #'} -#'@import easyNCDF -#'@import multiApply +#'@import startR #'@export -LoadMetrics <- function(input_path, system, reference, var, period, data_type, +LoadMetrics <- function(input_path, system, reference, var, period, metric_names, start_months, calib_method = NULL) { # Initial checks @@ -70,12 +70,6 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, "the starting months.") } start_months <- sprintf("%02d", start_months) - ## Check if sdates are continuous or discrete - if (all(diff(as.numeric(start_months)) == 1)) { - consecutive_start_months <- TRUE - } else { - consecutive_start_months <- FALSE - } ## input_path if (!is.character(input_path)) { stop("Parameter 'input_path must be a character string.") @@ -90,94 +84,28 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - ## Load data for each system - - all_metrics <- NULL - - for (sys in 1:length(system)){ - for (ref in 1:length(reference)){ - - result <- .loadmetrics(input_path = input_path, - system = system[sys], - reference = reference[ref], - var = var, - period = period, - start_months = start_months, - calib_method = calib_method, - data_type = data_type) - - if (sys == 1 && ref == 1) { - all_metrics <- array(data = NA, - dim = c('system'= length(system), - 'reference'=length(reference), - attributes(result)$dim)) - } - all_metrics[sys,ref,,,,] <- result - - } - } - - attributes(all_metrics)$start_months <- start_months + # Load data + ## Outer (file) dimensions: var, sdate (month), system, reference + ## Inner dimensions: time, region + path_pattern <- file.path("$system$", "$reference$", calib_method, var, + paste0("scorecards_", "$system$", "_", + "$reference$", "_", var, "_$var$_", + period, "_s$sdate$.nc")) + metrics <- Start(dat = file.path(input_path, path_pattern), + var = metric_names, + system = system, + reference = reference, + sdate = start_months, + time = "all", + region = "all", + metadata_dims = "var", + return_vars = list(time = NULL, + region = NULL), + pattern_dim = 'dat', + retrieve = TRUE) - return(all_metrics) -} ## close function + metrics <- Subset(metrics, along = "dat", + indices = 1, drop = "selected") + return(metrics) -########################################################### - -.loadmetrics <- function(input_path, system, reference, - var, period, start_months, - calib_method, data_type) { - - ## Load data for each start date - allfiles <- sapply(start_months, function(m) { - paste0(input_path, "/", system, "/", reference, "/", calib_method, "/", - var, "/scorecards_", system, "_", reference, "_", - var, "_", data_type, "_", period, "_s", m, - ".nc")}) - allfiles_exist <- sapply(allfiles, file.exists) - - # Check dims - files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] - allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) - if (length(files_exist_by_month) == 0) { - stop("No files are found.") - } - - num_dims <- numeric(dim(allfiledims)[1]) - for (i in 1:dim(allfiledims)[1]) { - if (length(unique(allfiledims[i,])) > 1) { - warning(paste0("Dimensions of system ", system," with var ", var, - " don't match.")) - } - num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension - } - - # Loop for file - dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) - - array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', - fun = function(x) { - if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = data_type, unlist = T, - drop_var_dim = T) - names(dim(res)) <- NULL - } else { - res <- array(dim = c(length(data_type), allfiledims[-1,1])) - names(dim(res)) <- NULL - } - res - })$output1 - - # Retrieve list of units and metrics - file <- nc_open(allfiles[1]) - metrics <- ncatt_get(nc = file, varid = "metric", attname = "names")$value - regions <- ncatt_get(nc = file, varid = "region", attname = "names")$value - dim(array_met_by_sdate) <- c(allfiledims[-1,1], - sdate = length(allfiles)) - attributes(array_met_by_sdate)$metrics <- strsplit(metrics, ", ")[[1]] - attributes(array_met_by_sdate)$regions <- strsplit(regions, ", ")[[1]] - - return(array_met_by_sdate) } - - diff --git a/modules/Scorecards/R/tmp/LoadMetrics2.R b/modules/Scorecards/R/tmp/LoadMetrics2.R deleted file mode 100644 index eb7ae360..00000000 --- a/modules/Scorecards/R/tmp/LoadMetrics2.R +++ /dev/null @@ -1,109 +0,0 @@ -#' Scorecards load metrics from verification suite -#' -#'@description Scorecards function to load saved data files -#' -#'@param system A vector of character strings defining the names of the -#' system names following the archive.yml format from verification suite. -#' Accepted system names: 'ECMWF-SEAS5', 'DWD-GFCS2.1', 'CMCC-SPS3.5', -#' 'ecmwfs5','Meteo-France-System 7', 'UK-MetOffice-GloSea600', 'NCEP-CFSv2'. -#'@param reference A vector of character strings defining the names of -#' the references following the archive.yml format from verification suite -#' Pending to be test with more than one. The accepted names are: 'era5'. -#'@param var A character string following the format from -#' variable-dictionary.yml from verification suite (TO DO: multiple variables). -#' The accepted names are: 'psl', 'tas', 'sfcWind', 'prlr'. -#'@param period A character string indicating the start and end years of the -#' reference period (e.g. '1993-203') -#'@param start_months A vector indicating the numbers of the start months -#'@param input_path A character string indicating the path where metrics output -#' files from verification suite are saved (or any other compatible files) -#' -#'@return A is a list by system and reference containing an array of with -#' the following dimensions: longitude, latitude, forecast months, metrics, -#' start dates. - -#'@examples -#'\dontrun{ -#'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), -#' reference. = 'ERA5', -#' var = 'tas', -#' period = '1993-2016' -#' start_months = sprintf("%02d", 1:12), -#' calib_method = 'raw', -#' input_path = './scorecards_data/input_data') -#'} -#'@import startR -#'@export - -LoadMetrics2 <- function(input_path, system, reference, var, period, data_type, - start_months, calib_method = NULL) { - - # Initial checks - ## system - if (!is.character(system)) { - stop("Parameter 'system' must be a character vector with the system names.") - } - ## reference - if (!is.character(reference)) { - stop("Parameter 'reference' must be a character vector with the reference ", - "names.") - } - ## var - if (!is.character(var)) { - stop("Parameter 'var' must be a character vector with the var ", - "names.") - } - if (length(var) > 1) { - warning("Parameter 'var' must be of length one. Only the first value ", - "will be used.") - var <- var[1] - } - ## start_months - if (is.character(start_months)) { - warning("Parameter 'start_months' must be a numeric vector indicating ", - "the starting months.") - start_months <- as.numeric(start_months) - } - if (!is.numeric(start_months)) { - stop("Parameter 'start_months' must be a numeric vector indicating ", - "the starting months.") - } - start_months <- sprintf("%02d", start_months) - ## input_path - if (!is.character(input_path)) { - stop("Parameter 'input_path must be a character string.") - } - if (length(input_path) > 1) { - input_path <- input_path[1] - warning("Parameter 'input_path' has length greater than 1 and only the ", - "first element will be used.") - } - - ## Remove . from names - system <- gsub('.','', system, fixed = T) - reference <- gsub('.','', reference, fixed = T) - - # Load data - ## Outer (file) dimensions: var, sdate (month), system, reference - ## Inner dimensions: time, region - path_pattern <- file.path("$system$", "$reference$", calib_method, var, - paste0("scorecards_", "$system$", "_", - "$reference$", "_", var, "_$var$_", - period, "_s$sdate$.nc")) - metrics <- Start(dat = file.path(input_path, path_pattern), - var = c("rpss", "rpss-Terciles"), - system = system, - reference = reference, - sdate = start_months, - time = "all", - region = "all", - metadata_dims = "var", - return_vars = list(time = NULL, - region = NULL), - retrieve = TRUE) - - metrics <- Subset(metrics, along = "dat", - indices = 1, drop = "selected") - return(metrics) - -} diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 85630239..3c3a721b 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -93,19 +93,19 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, } ## Make sure data is in correct order for using in functions: - data_order <- c('system','reference','metric','time','sdate','region') + data_order <- c('system','reference','var','time','sdate','region') data <- Reorder(data, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics ## Select only the metrics to visualize from data - data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + data <- Subset(data, along = 'var', indices = match(metrics, metrics_loaded)) attributes(data)$metrics <- metrics if(!is.null(sign)){ sign <- Reorder(sign, data_order) - sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + sign <- Subset(sign, along = 'var', indices = match(metrics, metrics_loaded)) attributes(sign)$metrics <- metrics } @@ -210,7 +210,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, if(var == 'psl'){ data[,,pos_bias,,,] <- data[,,pos_bias,,,]/100 ## temporary } - breaks_bias <- .SCBiasBreaks(Subset(data, along = c('metric','region'), + breaks_bias <- .SCBiasBreaks(Subset(data, along = c('var','region'), indices = list(pos_bias,reg))) } @@ -249,9 +249,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_9, row_dim = model, subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = model.name, subrow_names = forecast.months, col_names = metric.names, @@ -285,7 +285,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) - new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + new_order <- c('system', 'reference', 'var', 'region','sdate', 'time') if(model == 'system'){ data_sc_10 <- Subset(Reorder(data, new_order), c('reference','region'), list(1, reg), drop = 'selected') @@ -306,9 +306,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_10, row_dim = 'time', subrow_dim = model, - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = model.name, col_names = metric.names, @@ -364,9 +364,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_11, row_dim = model, subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = model.name, subrow_names = forecast.months, col_names = metric.names, @@ -401,7 +401,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) - new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + new_order <- c('system', 'reference', 'var', 'region','sdate', 'time') if(model == 'system'){ data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') @@ -423,9 +423,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_12, row_dim = 'time', subrow_dim = model, - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = model.name, col_names = metric.names, diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 9f00549b..eca8f9b9 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -93,11 +93,11 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, if (is.null(names(dim(data)))) { stop("Parameter 'data' must have dimenision names.") } - if (!all(c('system','reference','metric','time','sdate','region') %in% - names(dim(data)))) { - stop("Dimension names of 'data' must be: 'system','reference','metric', - 'time','sdate','region'.") - } + # if (!all(c('system','reference','var','time','sdate','region') %in% + # names(dim(data)))) { + # stop("Dimension names of 'data' must be: 'system','reference','var', + # 'time','sdate','region'.") + # } if (is.null(table.label)){ table.label <- "" } @@ -106,19 +106,19 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, } ## Make sure data is in correct order for using in functions: - data_order <- c('system', 'reference', 'metric', 'time', 'sdate', 'region') + data_order <- c('var', 'system', 'reference', 'time', 'sdate', 'region') data <- Reorder(data, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics ## Select only the metrics to visualize from data - data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + data <- Subset(data, along = 'var', indices = match(metrics, metrics_loaded)) attributes(data)$metrics <- metrics - if(!is.null(sign)){ + if (!is.null(sign)) { sign <- Reorder(sign, data_order) - sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + sign <- Subset(sign, along = 'var', indices = match(metrics, metrics_loaded)) attributes(sign)$metrics <- metrics } @@ -184,18 +184,18 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, for (ref in 1:dim(data)['reference']) { ## TO DO: Apply check to each scorecard function - ## check dimension 'metric' exists: - if (!("metric" %in% names(dim(data)))) { + ## check dimension 'var' exists: + if (!("var" %in% names(dim(data)))) { dim(data) <- c(metric = 1, dim(data)) } ## Find position of mean bias metric to calculate breaks breaks_bias <- NULL if ('mean_bias' %in% metrics){ - stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) + stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('var','time','sdate','region'))) temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') - breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'metric', + breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'var', indices = pos_bias)) } @@ -233,9 +233,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_1, row_dim = 'region', subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = region.names, subrow_names = forecast.months, col_names = metric.names, @@ -266,13 +266,13 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, ## (reorder only) ## Scorecard type 2 is same as type 1 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(data)['region'] > 1) { + if (dim(data)['region'] > 1) { fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 2, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_2 <- Reorder(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) @@ -286,9 +286,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_2, row_dim = 'time', subrow_dim = 'region', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = region.names, col_names = metric.names, @@ -335,9 +335,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_3, row_dim = 'region', subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = region.names, subrow_names = forecast.months, col_names = metric.names, @@ -373,7 +373,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_4 <- Reorder(Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) @@ -387,9 +387,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_4, row_dim = 'time', subrow_dim = 'region', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = region.names, col_names = metric.names, diff --git a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R index 90ac44f6..156e24c7 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R +++ b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R @@ -65,14 +65,14 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, } ## Make sure input_data is in correct order for using in functions: - data_order <- c('system','reference','metric','time','sdate','region') + data_order <- c('system','reference','var','time','sdate','region') data <- Reorder(data, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics ## Select only the metrics to visualize from data - input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + input_data <- Subset(data, along = 'var', indices = match(metrics, metrics_loaded)) attributes(input_data)$metrics <- metrics ## Calculate difference between two systems/references @@ -191,9 +191,9 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, SCPlotScorecard(data = diff_data, row.dim = 'region', subrow.dim = 'time', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = region.names, subrow.names = forecast.months, col.names = metric.names, @@ -227,14 +227,14 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, start.year = start.year, end.year = end.year, scorecard.type = 2, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_2 <- Reorder(diff_data, new_order) SCPlotScorecard(data = data_sc_2, row.dim = 'time', subrow.dim = 'region', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = forecast.months, subrow.names = region.names, col.names = metric.names, @@ -269,9 +269,9 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, SCPlotScorecard(data = transformed_data, row.dim = 'region', subrow.dim = 'time', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = region.names, subrow.names = forecast.months, col.names = metric.names, @@ -305,14 +305,14 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_4 <- Reorder(transformed_data, new_order) SCPlotScorecard(data = data_sc_4, row.dim = 'time', subrow.dim = 'region', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = forecast.months, subrow.names = region.names, col.names = metric.names, diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index f87b849b..bc8e39c0 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -69,21 +69,23 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ", | |,")) ## Define array to filled with data - aggr_significance <- array(data = NA, - dim = c(var = length(var), - time = length(forecast.months), - region = length(regions), - metric = length(metrics))) + aggr_significance <- list() ## For data that is already aggregated by region if ("region" %in% names(dim(skill_metrics[[1]]))) { aggr_metrics <- NULL - for (met in metrics) { - skill_metrics[[met]] <- Reorder(skill_metrics[[met]], - c("time", "region", "var")) - aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) + for (met in (names(skill_metrics))) { + if (strsplit(met, split = "-", fixed = TRUE)[[1]][1] %in% metrics) { + aggr_metrics[[met]] <- Reorder(skill_metrics[[met]], + c("time", "region", "var")) + aggr_significance[[paste0(met, "_significance")]] <- + array(data = NA, + dim = c(var = length(var), + time = length(forecast.months), + region = length(regions))) + + } } - names(dim(aggr_metrics)) <- c("time", "region", "metric") } else { ## Define arrays to filled with data aggr_metrics <- list() @@ -115,20 +117,19 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, if (met == 'crpss' && inf.to.na == TRUE) { result[result == -Inf] <- NA } - aggr_metrics <- append(aggr_metrics, - list(Reorder(data = result, - order = c('var', 'time', 'region')))) - aggr_metrics_names <- c(aggr_metrics_names, met) - names(aggr_metrics) <- aggr_metrics_names + aggr_metrics[[met]] <- Reorder(data = result, + order = c('var', 'time', 'region')) + aggr_significance[[paste0(met, "_significance")]] <- + array(data = NA, + dim = c(var = length(var), + time = length(forecast.months), + region = length(regions))) + } } ## close on met - ## TODO - # aggr_metrics <- abind(aggr_metrics, along = 0) - # names(dim(aggr_metrics)) <- c("metric", "var", "time", "region") - # metrics <- aggr_metrics_names } ## close if on skill - ## TODO: Review and modify + ## TODO: Review and modify score aggregation ## Score Aggregation if (metric.aggregation == 'score') { ## Spatially aggregate data for each metric @@ -446,30 +447,28 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } ## close if on region ## Set NAs to False - aggr_significance[is.na(aggr_significance)] <- FALSE + aggr_significance <- lapply(aggr_significance, + function(x) { + x[is.na(x)] <- 0 + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res)}) - ## Include attributes - attributes(aggr_metrics)$metrics <- metrics - attributes(aggr_metrics)$forecast.months <- forecast.months - attributes(aggr_metrics)$regions <- regions - attributes(aggr_metrics)$system.name <- system - attributes(aggr_metrics)$reference.name <- reference - - # aggr_scorecards <- list(aggr_metrics = aggr_metrics, - # aggr_significance = aggr_significance) # Metadata for saving ## TODO: Significance data$hcst$attrs$Variable$metadata$region <- names(regions) attributes(data$hcst$attrs$Variable$metadata$region)$variables <- list(region = list(names = paste(names(regions), collapse = ", "))) - # data$hcst$attrs$Variable$metadata$metric <- metrics - # attributes(data$hcst$attrs$Variable$metadata$metric)$variables <- - # list(metric = list(names = paste(metrics, collapse = ", "))) ## Save metric data arrays recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", "Scorecards/") - save_metrics(recipe = recipe, metrics = aggr_metrics, + save_metrics(recipe = recipe, metrics = c(aggr_metrics, aggr_significance), data_cube = data$hcst, agg = 'region', module = "scorecards") } diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index 730a6cef..a9364e03 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -128,37 +128,27 @@ Scorecards_plotting <- function(recipe) { } ####### Load data files ####### + + source("modules/Scorecards/R/tmp/LoadMetrics2.R") aggregated_metrics <- LoadMetrics(input_path = input.path, system = system, reference = reference, var = var, - data_type = "aggr_metrics", + metric_names = metrics, period = period, start_months = as.numeric(start.months), calib_method = calib.method) + attributes(aggregated_metrics)$metrics <- attr(aggregated_metrics, "FileSelectors")$dat1$var[[1]] - ## source("modules/Scorecards/R/tmp/LoadMetrics2.R") - ## aggregated_metrics2 <- LoadMetrics2(input_path = input.path, - ## system = system, - ## reference = reference, - ## var = var, - ## data_type = "aggr_metrics", - ## period = period, - ## start_months = as.numeric(start.months), - ## calib_method = calib.method) - ## attributes(aggregated_metrics2)$metrics <- attr(aggregated_metrics2, "FileSelectors")$dat1$var[[1]] - - attributes(aggregated_metrics)$metrics <- metrics - aggregated_significance <- LoadMetrics(input_path = input.path, system = system, reference = reference, var = var, - data_type = "aggr_significance", + metric_names = paste0(metrics, "_significance"), period = period, start_months = as.numeric(start.months), calib_method = calib.method) - + aggregated_significance <- aggregated_significance == 1 -- GitLab From 3a4478e3e3dda110c6acd98bca51b340dacf545b Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 20 Jun 2025 17:20:04 +0200 Subject: [PATCH 4/8] Solve psl conflict --- modules/Scorecards/R/tmp/ScorecardsMulti.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 3c3a721b..0a65f5a0 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -206,12 +206,8 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, ## Find position of mean bias metric to calculate breaks if ('mean_bias' %in% metrics) { - pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - data[,,pos_bias,,,] <- data[,,pos_bias,,,]/100 ## temporary - } - breaks_bias <- .SCBiasBreaks(Subset(data, along = c('var','region'), - indices = list(pos_bias,reg))) + breaks_bias <- .SCBiasBreaks(Subset(data, along = c('var', 'region'), + indices = list(pos_bias ,reg))) } ## Define breaks for each metric based of metric position: -- GitLab From 4c70a17bc103dae83e005734945bde964dede608 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 25 Jun 2025 14:42:54 +0200 Subject: [PATCH 5/8] Remove non-existent path and assign class 'array' to scorecards metrics --- modules/Scorecards/R/tmp/LoadMetrics.R | 1 + modules/Scorecards/Scorecards_plotting.R | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index cad2bb51..e9004634 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -106,6 +106,7 @@ LoadMetrics <- function(input_path, system, reference, var, period, metric_names metrics <- Subset(metrics, along = "dat", indices = 1, drop = "selected") + class(metrics) <- "array" return(metrics) } diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index a9364e03..d53400e2 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -129,7 +129,6 @@ Scorecards_plotting <- function(recipe) { ####### Load data files ####### - source("modules/Scorecards/R/tmp/LoadMetrics2.R") aggregated_metrics <- LoadMetrics(input_path = input.path, system = system, reference = reference, -- GitLab From 270c6ba68903f05ae23f751b739fa499906d4abf Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 25 Jun 2025 14:56:07 +0200 Subject: [PATCH 6/8] Add stop() if metrics not found --- modules/Scorecards/R/tmp/LoadMetrics.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index e9004634..3470f5b6 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -104,6 +104,10 @@ LoadMetrics <- function(input_path, system, reference, var, period, metric_names pattern_dim = 'dat', retrieve = TRUE) + if (!is.null(attr(metrics, "NotFoundFiles"))) { + stop("Some of the requested files were not found.") + } + metrics <- Subset(metrics, along = "dat", indices = 1, drop = "selected") class(metrics) <- "array" -- GitLab From 996b69f9fc464e11798eea7091eb9dcff86c2c77 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 26 Jun 2025 12:18:27 +0200 Subject: [PATCH 7/8] Fix 'pattern_dims' --- modules/Scorecards/R/tmp/LoadMetrics.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 3470f5b6..bde6d612 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -13,7 +13,7 @@ #' variable-dictionary.yml from verification suite (TO DO: multiple variables). #' The accepted names are: 'psl', 'tas', 'sfcWind', 'prlr'. #'@param period A character string indicating the start and end years of the -#' reference period (e.g. '1993-203') +#' reference period (e.g. '1993-2016') #'@param start_months A vector indicating the numbers of the start months #'@param input_path A character string indicating the path where metrics output #' files from verification suite are saved (or any other compatible files) @@ -33,7 +33,8 @@ #' calib_method = 'raw', #' input_path = './scorecards_data/input_data') #'} -#'@import startR +#'@importFrom startR Start +#'@importFrom ClimProjDiags Subset #'@export LoadMetrics <- function(input_path, system, reference, var, period, metric_names, @@ -101,7 +102,7 @@ LoadMetrics <- function(input_path, system, reference, var, period, metric_names metadata_dims = "var", return_vars = list(time = NULL, region = NULL), - pattern_dim = 'dat', + pattern_dims = 'dat', retrieve = TRUE) if (!is.null(attr(metrics, "NotFoundFiles"))) { -- GitLab From 9c4699a7152511f11088103f7d843746212da02c Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 26 Jun 2025 14:13:30 +0200 Subject: [PATCH 8/8] Get metric names from startR_array --- modules/Scorecards/Scorecards_plotting.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index d53400e2..f8261862 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -167,8 +167,8 @@ Scorecards_plotting <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = names(regions), ## TODO: Change this - metrics = metrics, + region.names = names(regions), + metrics = attributes(aggregated_metrics)$metrics, table.label = table.label, fileout.label = fileout.label, plot.legend = plot.legend, @@ -198,8 +198,8 @@ Scorecards_plotting <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = names(regions), ## TODO: Change this - metrics = metrics, + region.names = names(regions), + metrics = attributes(aggregated_metrics)$metrics, table.label = table.label, fileout.label = fileout.label, plot.legend = plot.legend, @@ -227,8 +227,8 @@ Scorecards_plotting <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = attributes(regions)$names, ## TODO: Change this - metrics = metrics, + region.names = names(regions), + metrics = attributes(aggregated_metrics)$metrics, table.label = table.label, fileout.label = fileout.label, legend.white.space = legend.white.space, -- GitLab