From 647c91f255bd79b46e378510de5e859a88c04e45 Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 18 Oct 2023 17:03:16 +0200 Subject: [PATCH 01/91] Testings analysis --- full_ecvs_scorecards.R | 129 ++++++++++++++++++++++++++++ recipe_ecvs_scorecards_seasonal.yml | 99 +++++++++++++++++++++ recipe_tas_scorecards_seasonal.yml | 91 ++++++++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 full_ecvs_scorecards.R create mode 100644 recipe_ecvs_scorecards_seasonal.yml create mode 100644 recipe_tas_scorecards_seasonal.yml diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R new file mode 100644 index 00000000..a96f2e07 --- /dev/null +++ b/full_ecvs_scorecards.R @@ -0,0 +1,129 @@ + +source("modules/Loading/Loading.R") +#source("modules/Units/Units.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_scorecards_seasonal.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +#data <- Units(recipe, data) +# Full-cross-val workflow +sdate_dim <- dim(data$hcst$data)['syear'] +cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) +# Paralelized: +loops <- array(1:length(cross), c(loop = length(cross))) + +res <- Apply(list(loops), target = NULL, + fun = function(t) { + # subset years: + # training + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + # eval years + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr) + #Category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs')$output1 + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs')$output1 + + return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, + ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, + #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, + lims_ano_hcst_tr = lims_ano_hcst_tr, lims_ano_obs_tr = lims_ano_obs_tr)) + }, ncores = recipe$Analysis$ncores) +# RPS +source("/esarchive/scratch/nperez/git2/s2dv/R/GetProbs.R") +ano_hcst_probs_ev <- GetProbs(res$ano_hcst_ev, time_dim = 'loop', + prob_thresholds = NULL, + prob_dim = 'probs', indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_ano_hcst_tr, + ncores = recipe$Analysis$ncores) +ano_obs_probs_ev <- GetProbs(res$ano_obs_ev, time_dim = 'loop', + prob_thresholds = NULL, + prob_dim = 'probs', indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_ano_obs_tr, + ncores = recipe$Analysis$ncores) +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'loop', + ncores = recipe$Analysis$ncores) +# RPSS +rpss <- RPSS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, + time_dim = 'loop', memb_dim = NULL, + cat_dim = 'probs', + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) +# CRPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'loop', memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) +# CRPSS +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', + time_dim = 'loop', clim.cross.val = TRUE, + ncores = recipe$Analysis$ncores) + +# Corr +enscorr <- s2dv::Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = 'dat', + time_dim = 'loop', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, + time_dim = 'loop', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) +# Spread error ratio +obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, + drop = "selected") +enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', + fcst = res$ano_hcst_ev, + obs = obs_noensdim, + tdim = which(names(dim(res$ano_hcst_ev))=='loop'), + ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), + na.rm = TRUE, + ncpus = recipe$Analysis$ncores) + +skill_metrics <- list(mean_bias = mean_bias, enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, enssprerr = enssprerr, + #rps = rps, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") +# Compute save metrics +save_metrics <- Saving(recipe = recipe, data = data, skill = skill_metrics) +recipe$Run$output_dir <- original +Visualization(recipe, data, skill_metrics, significance = TRUE) diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml new file mode 100644 index 00000000..eb14fc8c --- /dev/null +++ b/recipe_ecvs_scorecards_seasonal.yml @@ -0,0 +1,99 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: K} + - {name: prlr, freq: monthly_mean, units: ms-1, flux: yes} + Datasets: + System: + - {name: ECMWF-SEAS5} # system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 5 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "EU", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.5} + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 'all' + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 14 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git3/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipe_tas_scorecards_seasonal.yml b/recipe_tas_scorecards_seasonal.yml new file mode 100644 index 00000000..86060194 --- /dev/null +++ b/recipe_tas_scorecards_seasonal.yml @@ -0,0 +1,91 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 # system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.5 + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git3/sunset/ # replace with the directory where your code is + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails -- GitLab From 2da467d12ec087568fb9330040148691dda66124 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 23 Oct 2023 13:59:09 +0200 Subject: [PATCH 02/91] full cross --- full_NAO_scorecards.R | 194 ++++++++++++++++++++++++++++ full_ecvs_scorecards.R | 30 +++-- recipe_NAO_scorecards.yml | 60 +++++++++ recipe_ecvs_scorecards_seasonal.yml | 4 +- recipe_ecvs_seasonal_oper.yml | 73 ----------- recipe_tas_scorecards_seasonal.yml | 2 +- 6 files changed, 279 insertions(+), 84 deletions(-) create mode 100644 full_NAO_scorecards.R create mode 100644 recipe_NAO_scorecards.yml delete mode 100644 recipe_ecvs_seasonal_oper.yml diff --git a/full_NAO_scorecards.R b/full_NAO_scorecards.R new file mode 100644 index 00000000..3e24c6ad --- /dev/null +++ b/full_NAO_scorecards.R @@ -0,0 +1,194 @@ + +source("modules/Loading/Loading.R") +#source("modules/Units/Units.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_NAO_scorecards.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +#data <- Units(recipe, data) +# Full-cross-val workflow +sdate_dim <- dim(data$hcst$data)['syear'] +cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) +# Paralelized: +loops <- array(1:length(cross), c(loop = length(cross))) + + source("/esarchive/scratch/nperez/git/s2dv/R/NAO.R") + source("/esarchive/scratch/nperez/git/s2dv/R/Utils.R") + source("/esarchive/scratch/nperez/git/s2dv/R/EOF.R") + source("/esarchive/scratch/nperez/git/s2dv/R/ProjectField.R") + + +res <- Apply(list(loops), target = NULL, + fun = function(t) { + # subset years: + # training + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + # eval years + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', + lat = data$obs$attrs$Variable$metadata$lat, + lon = data$obs$attrs$Variable$metadata$lon) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', + lat = data$obs$attrs$Variable$metadata$lat, + lon = data$obs$attrs$Variable$metadata$lon)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = 1)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = list('syear', c('syear','ensemble')), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), + function(z) {(z-means)/sd})}, + ncores = 1)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, + function(z) {(z-means)/sd})}, + ncores = 1, output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function (z) {(z-means)/sd})}, ncores = 1)$output1 + #Category limits + lims_nao_hcst_tr <- Apply(nao_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs')$output1 + lims_nao_obs_tr <- Apply(nao_obs_tr, target_dims = 'syear', + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs')$output1 + + return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, + #ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, + #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, + nao_hcst_tr = nao_hcst_tr, nao_hcst_ev = nao_hcst_ev, + nao_obs_ev = nao_obs_ev, nao_obs_tr = nao_obs_tr, + lims_nao_hcst_tr = lims_nao_hcst_tr, lims_nao_obs_tr = lims_nao_obs_tr)) + }, ncores = recipe$Analysis$ncores) +# RPS +source("/esarchive/scratch/nperez/git2/s2dv/R/GetProbs.R") +nao_hcst_probs_ev <- GetProbs(res$nao_hcst_ev, time_dim = 'loop', + prob_thresholds = NULL, + prob_dim = 'probs', indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_ano_hcst_tr, + ncores = recipe$Analysis$ncores) +nao_obs_probs_ev <- GetProbs(res$nao_obs_ev, time_dim = 'loop', + prob_thresholds = NULL, + prob_dim = 'probs', indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_ano_obs_tr, + ncores = recipe$Analysis$ncores) +rps <- RPS(exp = nao_hcst_probs_ev, obs = nao_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'loop', + ncores = recipe$Analysis$ncores) +# RPSS +rpss <- RPSS(exp = nao_hcst_probs_ev, obs = nao_obs_probs_ev, + time_dim = 'loop', memb_dim = NULL, + cat_dim = 'probs', + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) +# CRPS +crps <- CRPS(exp = res$nao_hcst_ev, obs = res$nao_obs_ev, + time_dim = 'loop', memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) +# CRPSS +crpss <- CRPSS(exp = res$nao_hcst_ev, obs = res$nao_obs_ev, + memb_dim = 'ensemble', + time_dim = 'loop', clim.cross.val = TRUE, + ncores = recipe$Analysis$ncores) + +# Corr +enscorr <- s2dv::Corr(res$nao_hcst_ev, res$nao_obs_ev, + dat_dim = 'dat', + time_dim = 'loop', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) +# Spread error ratio +obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, + drop = "selected") +enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', + fcst = res$ano_hcst_ev, + obs = obs_noensdim, + tdim = which(names(dim(res$ano_hcst_ev))=='loop'), + ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), + na.rm = TRUE, + ncpus = recipe$Analysis$ncores) + +skill_metrics <- list(mean_bias = mean_bias, enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, enssprerr = enssprerr, + #rps = rps, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") +# Compute save metrics +source("modules/Saving/Saving.R") +Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +#PlotEquiMap(mean_bias[1,1,1,1,1,,c(182:360, 1:181)], +# lon = -179:180, +# lat = data$hcst$attrs$Variable$metadata$latitude, filled.co = F, +# fileout = "/esarchive/scratch/nperez/test.png") +skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +data$hcst$coords$longitude <- -179:180 +Visualization(recipe, data, skill_metrics, significance = TRUE) diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R index a96f2e07..31cf1a3a 100644 --- a/full_ecvs_scorecards.R +++ b/full_ecvs_scorecards.R @@ -16,7 +16,6 @@ sdate_dim <- dim(data$hcst$data)['syear'] cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) # Paralelized: loops <- array(1:length(cross), c(loop = length(cross))) - res <- Apply(list(loops), target = NULL, fun = function(t) { # subset years: @@ -38,6 +37,8 @@ res <- Apply(list(loops), target = NULL, ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr) ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr) ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr) + rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", + "hcst_ev") #Category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, @@ -45,12 +46,14 @@ res <- Apply(list(loops), target = NULL, lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear', 'ensemble'), fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, output_dims = 'probs')$output1 - +#mem <- mem_used() + gc() return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, - lims_ano_hcst_tr = lims_ano_hcst_tr, lims_ano_obs_tr = lims_ano_obs_tr)) - }, ncores = recipe$Analysis$ncores) + lims_ano_hcst_tr = lims_ano_hcst_tr, + lims_ano_obs_tr = lims_ano_obs_tr)) + }, ncores =recipe$Analysis$ncores) # RPS source("/esarchive/scratch/nperez/git2/s2dv/R/GetProbs.R") ano_hcst_probs_ev <- GetProbs(res$ano_hcst_ev, time_dim = 'loop', @@ -99,8 +102,9 @@ enscorr <- s2dv::Corr(res$ano_hcst_ev, res$ano_obs_ev, ncores = recipe$Analysis$ncores) # Mean Bias -mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, - time_dim = 'loop', +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', ncores = recipe$Analysis$ncores) # Spread error ratio @@ -113,7 +117,6 @@ enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), na.rm = TRUE, ncpus = recipe$Analysis$ncores) - skill_metrics <- list(mean_bias = mean_bias, enscorr = enscorr$corr, enscorr_significance = enscorr$sign, enssprerr = enssprerr, #rps = rps, @@ -124,6 +127,17 @@ skill_metrics <- lapply(skill_metrics, function(x) { original <- recipe$Run$output_dir recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") # Compute save metrics -save_metrics <- Saving(recipe = recipe, data = data, skill = skill_metrics) +source("modules/Saving/Saving.R") +Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +#PlotEquiMap(mean_bias[1,1,1,1,1,,c(182:360, 1:181)], +# lon = -179:180, +# lat = data$hcst$attrs$Variable$metadata$latitude, filled.co = F, +# fileout = "/esarchive/scratch/nperez/test.png") +skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +data$hcst$coords$longitude <- -179:180 Visualization(recipe, data, skill_metrics, significance = TRUE) diff --git a/recipe_NAO_scorecards.yml b/recipe_NAO_scorecards.yml new file mode 100644 index 00000000..bd3b0587 --- /dev/null +++ b/recipe_NAO_scorecards.yml @@ -0,0 +1,60 @@ +Description: + Author: nperez + Info: ECMWF SEAS5 Seasonal Forecast Example recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0301' ## MMDD + # fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 2 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 # Mandatory, int: minimum latitude + latmax: 80 # Mandatory, int: maximum latitude + lonmin: -80 # Mandatory, int: minimum longitude + lonmax: 40 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: none + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git3/sunset/ + + diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml index eb14fc8c..b0f88f69 100644 --- a/recipe_ecvs_scorecards_seasonal.yml +++ b/recipe_ecvs_scorecards_seasonal.yml @@ -29,8 +29,8 @@ Analysis: - '1201' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' - ftime_min: 5 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months Region: - {name: "EU", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.5} Regrid: diff --git a/recipe_ecvs_seasonal_oper.yml b/recipe_ecvs_seasonal_oper.yml deleted file mode 100644 index 832f36d5..00000000 --- a/recipe_ecvs_seasonal_oper.yml +++ /dev/null @@ -1,73 +0,0 @@ -Description: - Author: nperez - Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) - -Analysis: - Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal - Variables: - - {name: tas, freq: monthly_mean, units: C} - - {name: prlr, freq: monthly_mean, units: mm, flux: no} - Datasets: - System: - - {name: ECMWF-SEAS5.1} # system21_m1 system35c3s - Multimodel: no # Mandatory, bool: Either yes/true or no/false - Reference: - - {name: ERA5} # Mandatory, str: Reference codename. See docu. - Time: - sdate: '0801' ## MMDD - fcst_year: '2023' # Optional, int: Forecast year 'YYYY' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' - ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months - Region: - - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} - Regrid: - method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_system" - #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. - Workflow: - Anomalies: - compute: no - cross_validation: no - save: none - Calibration: - method: evmos # Mandatory, str: Calibration method. See docu. - cross_validation: yes - save: none - Skill: - metric: mean_bias EnsCorr rpss crpss bss10 bss90 - save: 'all' - cross_validation: yes - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. - save: 'all' - Indicators: - index: no - Visualization: - plots: skill_metrics forecast_ensemble_mean most_likely_terciles - multi_panel: no - dots: both - ncores: 4 # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: scorecards - logo: yes -Run: - Loglevel: INFO - Terminal: yes - filesystem: esarchive - output_dir: /esarchive/scratch/nperez/cs_oper/seasonal/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git/auto-s2s/ # replace with the directory where your code is - autosubmit: yes - # fill only if using autosubmit - auto_conf: - script: /esarchive/scratch/nperez/git/auto-s2s/exec_ecvs_seasonal_oper.R # replace with the path to your script - expid: a68v # replace with your EXPID - hpc_user: bsc32339 # replace with your hpc username - wallclock: 02:00 # hh:mm - processors_per_job: 4 - platform: nord3v2 - email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: nuria.perez@bsc.es # replace with your email address - notify_completed: yes # notify me by email when a job finishes - notify_failed: yes # notify me by email when a job fails diff --git a/recipe_tas_scorecards_seasonal.yml b/recipe_tas_scorecards_seasonal.yml index 86060194..542996b0 100644 --- a/recipe_tas_scorecards_seasonal.yml +++ b/recipe_tas_scorecards_seasonal.yml @@ -76,7 +76,7 @@ Run: filesystem: esarchive output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs code_dir: /esarchive/scratch/nperez/git3/sunset/ # replace with the directory where your code is - autosubmit: yes + autosubmit: no # fill only if using autosubmit auto_conf: script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script -- GitLab From 70158b0c11f819d17f35b96488fc2333d0ce1305 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 23 Oct 2023 14:57:37 +0200 Subject: [PATCH 03/91] ecmwf configuration --- conf/archive.yml | 42 +++++++++++++++++++++++++++++ recipe_ecvs_scorecards_seasonal.yml | 8 +++--- recipe_tas_scorecards_seasonal.yml | 2 +- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 0b643ae9..4d986ed5 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,3 +1,45 @@ +cerise: + src: "/ec/res4/scratch/cyce/cerise/data/esarchive/" + System: + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", + "psl":"monthly_mean/psl_f6h", "tdps":"monthly_mean/tdps_f6h"} + nmember: + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "Meteo-France" + src: "exp/meteofrance/system8/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "psl":"psl_f6h", "tdps":"tdps_f6h"} + nmember: + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system8.txt" + UKMO-System602: + name: "UKMO-S602" + institution: "UK MetOffice" + src: "exp/ukmo/system602/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "psl":"psl_f6h", "tdps":"tdps_f6h"} + nmember: + hcst: 28 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system602.txt" esarchive: src: "/esarchive/" System: diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml index b0f88f69..27fc5f18 100644 --- a/recipe_ecvs_scorecards_seasonal.yml +++ b/recipe_ecvs_scorecards_seasonal.yml @@ -9,7 +9,7 @@ Analysis: - {name: prlr, freq: monthly_mean, units: ms-1, flux: yes} Datasets: System: - - {name: ECMWF-SEAS5} # system21_m1 system35c3s + - {name: Meteo-France-System8} Multimodel: no # Mandatory, bool: Either yes/true or no/false Reference: - {name: ERA5} # Mandatory, str: Reference codename. See docu. @@ -81,9 +81,9 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: esarchive - output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git3/sunset/ # replace with the directory where your code is + filesystem: cerise + output_dir: /perm/cyce/phase0/ # replace with the directory where you want to save the outputs + code_dir: /ec/res4/scratch/cyce/cerise/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: diff --git a/recipe_tas_scorecards_seasonal.yml b/recipe_tas_scorecards_seasonal.yml index 542996b0..f92ee25f 100644 --- a/recipe_tas_scorecards_seasonal.yml +++ b/recipe_tas_scorecards_seasonal.yml @@ -9,7 +9,7 @@ Analysis: freq: monthly_mean Datasets: System: - name: ECMWF-SEAS5 # system21_m1 system35c3s + name: Meteo-France-System8 Multimodel: no # Mandatory, bool: Either yes/true or no/false Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. -- GitLab From b60a584a2e859ce116d2ee65323e592140d16551 Mon Sep 17 00:00:00 2001 From: BSC CERISE WP5 Date: Mon, 23 Oct 2023 13:36:19 +0000 Subject: [PATCH 04/91] conf grid new systems --- conf/grid_description/griddes_system602.txt | 18 ++++++++++++++++++ conf/grid_description/griddes_system8.txt | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 conf/grid_description/griddes_system602.txt create mode 100644 conf/grid_description/griddes_system8.txt diff --git a/conf/grid_description/griddes_system602.txt b/conf/grid_description/griddes_system602.txt new file mode 100644 index 00000000..a024e8d5 --- /dev/null +++ b/conf/grid_description/griddes_system602.txt @@ -0,0 +1,18 @@ +# +# gridID 1 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + diff --git a/conf/grid_description/griddes_system8.txt b/conf/grid_description/griddes_system8.txt new file mode 100644 index 00000000..a024e8d5 --- /dev/null +++ b/conf/grid_description/griddes_system8.txt @@ -0,0 +1,18 @@ +# +# gridID 1 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + -- GitLab From 9441275160b74ca3aab2140c738cd0876baf85ff Mon Sep 17 00:00:00 2001 From: BSC CERISE WP5 Date: Mon, 23 Oct 2023 13:59:23 +0000 Subject: [PATCH 05/91] first attemp on ecmwf --- conf/archive.yml | 9 +++++++++ full_ecvs_scorecards.R | 3 +++ launch_SUNSET.sh | 2 +- recipe_ecvs_scorecards_seasonal.yml | 22 +++++++++++----------- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 4d986ed5..a47c05f0 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -40,6 +40,15 @@ cerise: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system602.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/recon/ecmwf/era5/monthly_mean/" + monthly_mean: {"tas":"tas_f1h", "prlr":"prlr_f1h", "psl":"psl_f1h", "tos":"tos_f1h", + "tasmax":"tasmax_f1h", "tasmin":"tasmin_f1h", "tdps":"tdps_f1h"} + calendar: "standard" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" esarchive: src: "/esarchive/" System: diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R index 31cf1a3a..2f33f5ef 100644 --- a/full_ecvs_scorecards.R +++ b/full_ecvs_scorecards.R @@ -141,3 +141,6 @@ skill_metrics <- lapply(skill_metrics, function(x) { }) data$hcst$coords$longitude <- -179:180 Visualization(recipe, data, skill_metrics, significance = TRUE) + +source("tools/add_logo.R") +add_logo(recipe, "tools/BSC_logo_95.jpg") diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index eb8fcf46..37ce4b34 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -103,7 +103,7 @@ fi tmpfile=$(mktemp ${TMPDIR-/tmp}/SUNSET.XXXXXX) # Create outdir and split recipes -source MODULES +#source MODULES Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile # Run with Autosubmit or directly with Slurm's sbatch? diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml index 27fc5f18..70907539 100644 --- a/recipe_ecvs_scorecards_seasonal.yml +++ b/recipe_ecvs_scorecards_seasonal.yml @@ -16,17 +16,17 @@ Analysis: Time: sdate: - '0101' - - '0201' - - '0301' - - '0401' - - '0501' - - '0601' - - '0701' - - '0801' - - '0901' - - '1001' - - '1101' - - '1201' + # - '0201' + # - '0301' + #- '0401' + #- '0501' + #- '0601' + #- '0701' + #- '0801' + #- '0901' + #- '1001' + #- '1101' + #- '1201' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months -- GitLab From b601a59c097dd7a44ca46a7868a4deb5eeaffb41 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 23 Oct 2023 15:14:48 +0000 Subject: [PATCH 06/91] fixes at ecmwf --- .gitignore | 1 + conf/slurm_templates/run_parallel_workflow.sh | 4 +- conf/slurm_templates/run_scorecards.sh | 5 +- launch_SUNSET.sh | 2 +- modules/Loading/Dev_Loading.R | 2 +- modules/Loading/R/get_regrid_params.R | 73 +++++++++++++++++++ modules/Loading/R/load_decadal.R | 2 +- modules/Loading/R/load_seasonal.R | 2 +- 8 files changed, 85 insertions(+), 6 deletions(-) create mode 100644 modules/Loading/R/get_regrid_params.R diff --git a/.gitignore b/.gitignore index e11ba7d3..263c4e64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ out-logs/ *.swp *.swo +ecsbatch.log* modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml diff --git a/conf/slurm_templates/run_parallel_workflow.sh b/conf/slurm_templates/run_parallel_workflow.sh index 461ee7e2..024c0d43 100644 --- a/conf/slurm_templates/run_parallel_workflow.sh +++ b/conf/slurm_templates/run_parallel_workflow.sh @@ -10,6 +10,8 @@ set -vx script=$1 atomic_recipe=$2 -source MODULES +#source MODULES +module load conda/22.11.1-2 +conda activate condaCerise Rscript ${script} ${atomic_recipe} diff --git a/conf/slurm_templates/run_scorecards.sh b/conf/slurm_templates/run_scorecards.sh index 5ebf6528..aea38e48 100644 --- a/conf/slurm_templates/run_scorecards.sh +++ b/conf/slurm_templates/run_scorecards.sh @@ -14,7 +14,10 @@ set -vx recipe=$1 outdir=$2 -source MODULES +#source MODULES +module load conda/22.11.1-2 +conda activate condaCerise + # Execute scorecards Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 37ce4b34..6bd933cd 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -109,7 +109,7 @@ Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile # Run with Autosubmit or directly with Slurm's sbatch? run_method=$( head -1 $tmpfile | tail -1 ) # If run method is 'sbatch', launch jobs with dependencies -if [ $run_method == "sbatch" ]; then +if [[ $run_method == "sbatch" ]]; then # Retrieve working directory codedir=$( head -2 $tmpfile | tail -1 ) # Retrieve output directory diff --git a/modules/Loading/Dev_Loading.R b/modules/Loading/Dev_Loading.R index fb456eb3..8060b8a8 100644 --- a/modules/Loading/Dev_Loading.R +++ b/modules/Loading/Dev_Loading.R @@ -1,5 +1,5 @@ ## TODO: remove paths to personal scratchs -source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +source("modules/Loading/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") diff --git a/modules/Loading/R/get_regrid_params.R b/modules/Loading/R/get_regrid_params.R new file mode 100644 index 00000000..c95372cd --- /dev/null +++ b/modules/Loading/R/get_regrid_params.R @@ -0,0 +1,73 @@ +#'Read regrid parameters from recipe and returns a list for use with Start() +#' +#'The purpose of this function is to read the recipe and archive configuration +#'data for Auto-S2S workflows, retrieve the regridding parameters for hcst and +#'obs, and return an object that can be the input for 'transform' and +#''transform_params' when the data is loaded using Start(). +#'Requires CDORemapper. +#' +#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#'@param archive Auto-S2S exp and obs archive as returned by read_yaml() +#' +#'@return A list containing regridding parameters for fcst and obs +#' +#'@import startR +#'@examples +#'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") +#'library(yaml) +#'library(startR) +#'recipe <- read_yaml("modules/data_load/recipe_1.yml") +#'archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +#'regrid_params <- get_regrid_params(recipe, archive) +#' +#'@export +get_regrid_params <- function(recipe, archive) { + + ## TODO: Multi-model case + ## TODO: 'NULL' entries had to be removed due to bug in Start(). Rewrite when + ## the bug is fixed. + exp.name <- recipe$Analysis$Datasets$System$name + ref.name <- recipe$Analysis$Datasets$Reference$name + exp_descrip <- archive$System[[exp.name]] + reference_descrip <- archive$Reference[[ref.name]] + + if (tolower(recipe$Analysis$Regrid$type) == 'to_reference') { + + regrid_params <- list(fcst.gridtype = reference_descrip$reference_grid, + fcst.gridmethod = recipe$Analysis$Regrid$method, + fcst.transform = CDORemapper, + obs.gridtype = reference_descrip$reference_grid, + obs.gridmethod = recipe$Analysis$Regrid$method, + obs.transform = CDORemapper) + + } else if (tolower(recipe$Analysis$Regrid$type) == 'to_system') { + + regrid_params <- list(fcst.gridtype = exp_descrip$reference_grid, + fcst.gridmethod = recipe$Analysis$Regrid$method, + fcst.transform = CDORemapper, + obs.gridtype = exp_descrip$reference_grid, + obs.gridmethod = recipe$Analysis$Regrid$method, + obs.transform = CDORemapper) + + } else if (tolower(recipe$Analysis$Regrid$type) == 'none') { + + regrid_params <- list(fcst.gridtype = exp_descrip$reference_grid, + fcst.gridmethod = recipe$Analysis$Regrid$method, + fcst.transform = CDORemapper, + obs.gridtype = reference_descrip$reference_grid, + obs.gridmethod = recipe$Analysis$Regrid$method, + obs.transform = CDORemapper) + + } else { + regrid_params <- list(fcst.gridtype = recipe$Analysis$Regrid$type, + fcst.gridmethod = recipe$Analysis$Regrid$method, + fcst.transform = CDORemapper, + obs.gridtype = recipe$Analysis$Regrid$type, + obs.gridmethod = recipe$Analysis$Regrid$method, + obs.transform = CDORemapper) + } + + return(regrid_params) +} + + diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 15e5bbc8..2a38a242 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -5,7 +5,7 @@ #setwd('/esarchive/scratch/aho/git/auto-s2s/') ## TODO: remove paths to personal scratchs -source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +source("modules/Loading/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/R/helper_loading_decadal.R") source("modules/Loading/R/dates2load.R") diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index f2534ac7..325d6d28 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -1,5 +1,5 @@ ## TODO: remove paths to personal scratchs -source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +source("modules/Loading/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") -- GitLab From ad7e84d63a04789d8e13e6ace5782813dd89de05 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 23 Oct 2023 15:51:27 +0000 Subject: [PATCH 07/91] grib files path fix --- modules/Loading/Loading.R | 2 +- modules/Loading/R/GRIB/{GRIB => }/GrbLoad.R | 0 modules/Loading/R/GRIB/{GRIB => }/s2dv_cube.R | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename modules/Loading/R/GRIB/{GRIB => }/GrbLoad.R (100%) rename modules/Loading/R/GRIB/{GRIB => }/s2dv_cube.R (100%) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index d2919594..f90f1df2 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -5,7 +5,7 @@ source("modules/Loading/load_datasets.R") Loading <- function(recipe) { # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) - if (tolower(recipe$Run$filesystem) == "mars") { + if (tolower(recipe$Run$filesystem) == "cerise") { source("modules/Loading/R/load_GRIB.R") data <- load_GRIB(recipe) } else { diff --git a/modules/Loading/R/GRIB/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R similarity index 100% rename from modules/Loading/R/GRIB/GRIB/GrbLoad.R rename to modules/Loading/R/GRIB/GrbLoad.R diff --git a/modules/Loading/R/GRIB/GRIB/s2dv_cube.R b/modules/Loading/R/GRIB/s2dv_cube.R similarity index 100% rename from modules/Loading/R/GRIB/GRIB/s2dv_cube.R rename to modules/Loading/R/GRIB/s2dv_cube.R -- GitLab From be5de041c26fb33dfefaec6ea29d7915334caf04 Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 24 Oct 2023 10:18:18 +0000 Subject: [PATCH 08/91] fix cdo type parameter --- conf/archive.yml | 2 +- launch_SUNSET.sh | 2 ++ modules/Loading/Loading.R | 2 +- modules/Loading/R/load_GRIB.R | 14 ++++++++------ 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index a47c05f0..92168424 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -17,7 +17,7 @@ cerise: Meteo-France-System8: name: "Meteo-France System 8" institution: "Meteo-France" - src: "exp/meteofrance/system8/" + src: "exp/meteo_france/system8/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 6bd933cd..f635635a 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -104,6 +104,8 @@ tmpfile=$(mktemp ${TMPDIR-/tmp}/SUNSET.XXXXXX) # Create outdir and split recipes #source MODULES +module load conda/22.11.1-2 +conda activate condaCerise Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile # Run with Autosubmit or directly with Slurm's sbatch? diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index f90f1df2..a77323a1 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -5,7 +5,7 @@ source("modules/Loading/load_datasets.R") Loading <- function(recipe) { # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) - if (tolower(recipe$Run$filesystem) == "cerise") { + if (tolower(recipe$Run$filesystem) %in% c("cerise", "mars")) { source("modules/Loading/R/load_GRIB.R") data <- load_GRIB(recipe) } else { diff --git a/modules/Loading/R/load_GRIB.R b/modules/Loading/R/load_GRIB.R index 8ae2a74d..796327cb 100644 --- a/modules/Loading/R/load_GRIB.R +++ b/modules/Loading/R/load_GRIB.R @@ -22,7 +22,7 @@ load_GRIB <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax # can only be 360 ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name #'tas' + variable <- recipe$Analysis$Variables$name store.freq <- recipe$Analysis$Variables$freq regrid.method <- recipe$Analysis$Regrid$method @@ -54,7 +54,7 @@ load_GRIB <- function(recipe) { # The correct files #exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/" - hcst.path <- paste0(archive$src, hcst.dir) + hcst.path <- paste0(archive$src, hcst.dir, freq.hcst) hcst.year <- paste0(as.numeric(hcst.inityear):as.numeric(hcst.endyear)) hcst.files <- paste0(hcst.path, variable, '_', hcst.year, hcst.sdate, '.grb') @@ -94,16 +94,18 @@ load_GRIB <- function(recipe) { # Load reference #------------------------------------------------------------------- #obs_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_era5_tas/" - obs.path <- paste0(archive$src, obs.dir) + obs.path <- paste0(archive$src, obs.dir, freq.obs, "/") # Use hcst time attr to load obs hcst_times <- attr(hcst, 'time') hcst_times_strings <- format(hcst_times, '%Y%m') obs.files <- paste0(obs.path, variable, '_', hcst_times_strings, '.grb') - if (!regrid.type %in% c('none', 'to_reference')) { - if (regrid.type == 'to_system') { - regrid_list <- c(method = regrid.method, type = exp_descrip$reference_grid) + if (!regrid.type %in% c('none')) { + if (regrid.type == 'to_reference') { + regrid_list <- list(method = regrid.method, type = reference_descrip$reference_grid) + } else if (regrid.type == 'to_system') { + regrid_list <- list(method = regrid.method, type = exp_descrip$reference_grid) } else { # e.g., "r360x181" regrid_list <- list(method = regrid.method, type = regrid.type) } -- GitLab From 8e008425a51e1b112ac56215c769fe22a3dd299d Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 24 Oct 2023 10:57:09 +0000 Subject: [PATCH 09/91] recon folder corrected for cerise config --- conf/archive.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/archive.yml b/conf/archive.yml index 92168424..1aa3c52e 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -44,7 +44,7 @@ cerise: ERA5: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/recon/ecmwf/era5/monthly_mean/" + src: "recon/ecmwf/era5/monthly_mean/" monthly_mean: {"tas":"tas_f1h", "prlr":"prlr_f1h", "psl":"psl_f1h", "tos":"tos_f1h", "tasmax":"tasmax_f1h", "tasmin":"tasmin_f1h", "tdps":"tdps_f1h"} calendar: "standard" -- GitLab From a94818c390901e095f5c75da9038f7a7b54d4d2e Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 24 Oct 2023 14:10:22 +0000 Subject: [PATCH 10/91] temporal GetProbs version --- GetProbs.R | 343 +++++++++++++++++++++++++++++++++++++++++ full_ecvs_scorecards.R | 2 +- 2 files changed, 344 insertions(+), 1 deletion(-) create mode 100644 GetProbs.R diff --git a/GetProbs.R b/GetProbs.R new file mode 100644 index 00000000..b510f30f --- /dev/null +++ b/GetProbs.R @@ -0,0 +1,343 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. The default +#' value is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin, the rest dimensions +#'of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic +#'categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- 1:dim(data)[time_dim] + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (any(!dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + any(!names(dim(weights)) %in% namesdim_weights)) { + stop(paste0("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".")) + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop(paste0("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c("bin", time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c("bin", time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, len = dim(data)[1], + pos = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} + + diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R index 2f33f5ef..414a1511 100644 --- a/full_ecvs_scorecards.R +++ b/full_ecvs_scorecards.R @@ -55,7 +55,7 @@ res <- Apply(list(loops), target = NULL, lims_ano_obs_tr = lims_ano_obs_tr)) }, ncores =recipe$Analysis$ncores) # RPS -source("/esarchive/scratch/nperez/git2/s2dv/R/GetProbs.R") +source("GetProbs.R") ano_hcst_probs_ev <- GetProbs(res$ano_hcst_ev, time_dim = 'loop', prob_thresholds = NULL, prob_dim = 'probs', indices_for_quantiles = NULL, -- GitLab From 98fd54a4df19c5fc067d6ffc4fb717b3f753460c Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 27 Oct 2023 17:16:47 +0200 Subject: [PATCH 11/91] Use cdo to get time attr instead of GRIB message --- modules/Loading/R/GRIB/GrbLoad.R | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index ef1df0cb..f91a4825 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -22,29 +22,29 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, #---------------------------------------- #NOTE: ValidityTime is not considered now. So if the time frequency is less than daily, it has problem. - # METHOD 1: Get first message to figure out the validityDate/Time of each message - #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". - gm1 <- grib_get_message(file_to_load, 1) - first_ftime <- as.character(gm1$validityDate) - first_ftime_hour <- gm1$validityTime - # For monthly data - #NOTE: may not be correct because it is calculated by the first message - cdo_time_attr <- clock::add_months(as.POSIXct(paste0(first_ftime, ' ', first_ftime_hour), - format = "%Y%m%d %H", tz = 'UTC'), time_step - 1) - cdo_time <- format(cdo_time_attr, "%Y%m%d") - -# # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) -# #TODO: Change to method 1 because can't predict what cdo will produce -# cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) -# cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] -# cdo_time <- cdo_time[which(cdo_time != "")] -## # Check if there is member dim or not -## has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) -# if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) -# cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" -# cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') -# cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) -# cdo_time <- gsub('-', '', cdo_time) +# # METHOD 1: Get first message to figure out the validityDate/Time of each message +# #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". +# gm1 <- grib_get_message(file_to_load, 1) +# first_ftime <- as.character(gm1$validityDate) +# first_ftime_hour <- gm1$validityTime +# # For monthly data +# #NOTE: may not be correct because it is calculated by the first message +# cdo_time_attr <- clock::add_months(as.POSIXct(first_ftime, +# format = "%Y%m%d", tz = 'UTC'), time_step - 1) +# cdo_time <- format(cdo_time_attr, "%Y%m%d") + + # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) + #TODO: Change to method 1 because can't predict what cdo will produce + cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) + cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] + cdo_time <- cdo_time[which(cdo_time != "")] + # Check if there is member dim or not + has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) + if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) + cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" + cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') + cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) + cdo_time <- gsub('-', '', cdo_time) #---------------------------------------- -- GitLab From f2ca221f0aa1731c9bcb8a6669278c83168b0d87 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 30 Oct 2023 17:34:59 +0100 Subject: [PATCH 12/91] cross.val option for clim scores and bin_dim_abs --- modules/Skill/R/CRPS_clim.R | 31 +++++++++++++++++++++++-------- modules/Skill/R/RPS_clim.R | 21 ++++++++++++++++----- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/modules/Skill/R/CRPS_clim.R b/modules/Skill/R/CRPS_clim.R index 36db4e94..50d63642 100644 --- a/modules/Skill/R/CRPS_clim.R +++ b/modules/Skill/R/CRPS_clim.R @@ -1,13 +1,28 @@ -CRPS_clim <- function(obs, memb_dim ='ensemble'){ +# CRPS version for climatology +CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross.val= TRUE){ time_dim <- names(dim(obs)) obs_time_len <- dim(obs)[time_dim] - + + if (isFALSE(clim.cross.val)) { ## Without cross-validation ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - names(dim(ref)) <- c(time_dim, memb_dim) - # ref: [sdate, memb] - # obs: [sdate] + } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb] + # obs: [sdate] crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, - dat_dim = NULL, Fair = FALSE) - # crps_ref should be [sdate] - return(mean(crps_ref)) + dat_dim = NULL, Fair = FALSE) + + # crps_ref should be [sdate] + if (return_mean == TRUE) { + return(mean(crps_ref)) + } else { + return(crps_ref) + } } + diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R index e8b6452d..ae9e6cc2 100644 --- a/modules/Skill/R/RPS_clim.R +++ b/modules/Skill/R/RPS_clim.R @@ -1,12 +1,18 @@ # RPS version for climatology -RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3)) { +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), cross.val = T, + bin_dim_abs = NULL, return_mean = TRUE) { if (is.null(indices_for_clim)){ indices_for_clim <- 1:length(obs) } - - obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: - prob_thresholds = prob_thresholds, weights = NULL, cross.val = T) ## here! + if (is.null(bin_dim_abs)) { + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + } else { + obs_probs <- obs + } # clim_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) @@ -16,5 +22,10 @@ RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) - return(mean(rps_ref)) + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } } + -- GitLab From 6980ec74df3d358269a169d186015229cfbba61a Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 31 Oct 2023 18:41:36 +0100 Subject: [PATCH 13/91] fixes to grib code --- modules/Loading/R/GRIB/GrbLoad.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index f91a4825..249fb85c 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -24,7 +24,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, # # METHOD 1: Get first message to figure out the validityDate/Time of each message # #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". -# gm1 <- grib_get_message(file_to_load, 1) + gm1 <- grib_get_message(file_to_load, 1) # first_ftime <- as.character(gm1$validityDate) # first_ftime_hour <- gm1$validityTime # # For monthly data @@ -39,7 +39,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] cdo_time <- cdo_time[which(cdo_time != "")] # Check if there is member dim or not - has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) + has.memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') @@ -178,7 +178,8 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, syear_time_dim, dim(result)[3:4], ensemble = 1)) } else { - result <- array(result, dim = c(dim(result)[1:2], ensemble = has.memb, + nmemb <- as.numeric(dim(result)[3])/length(time_step) + result <- array(result, dim = c(dim(result)[1:2], ensemble = nmemb, time = length(time_step), dim(result)[4])) result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude", "ensemble")) dim(result) <- c(dat = 1, var = 1, sday = 1, sweek = 1, dim(result)) -- GitLab From 7253ffa18b20e43bf65cd7681153c343a729e0bf Mon Sep 17 00:00:00 2001 From: nperez Date: Tue, 31 Oct 2023 18:57:48 +0100 Subject: [PATCH 14/91] fix grib to load observations --- modules/Loading/R/GRIB/GrbLoad.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index 249fb85c..739ff6b3 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -172,7 +172,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, times <- as.POSIXct(times, origin = '1970-01-01', tz = 'UTC') # Reshape and reorder array - if (is.null(has.memb)) { # obs doesn't have memb; reshape syear/time dim + if (is.null(has.memb) | has.memb == FALSE) { # obs doesn't have memb; reshape syear/time dim result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude")) result <- array(result, dim = c(dat = 1, var = 1, syear_time_dim, dim(result)[3:4], -- GitLab From 4e9921255544a16fca4e188ac660b0229228e8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Antoine=20Bretonni=C3=A8re?= Date: Sun, 5 Nov 2023 17:50:17 +0100 Subject: [PATCH 15/91] Update conda with cdo and autosubmit --- .../environment-cerise-localgribR-ecmwf.yml | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/conda_installation/environment-cerise-localgribR-ecmwf.yml b/conda_installation/environment-cerise-localgribR-ecmwf.yml index 983347f0..c70811e8 100644 --- a/conda_installation/environment-cerise-localgribR-ecmwf.yml +++ b/conda_installation/environment-cerise-localgribR-ecmwf.yml @@ -208,6 +208,7 @@ dependencies: - r-lattice=0.21_8=r42h57805ef_1 - r-lifecycle=1.0.3=r42hc72bb7e_2 - r-listenv=0.9.0=r42hc72bb7e_1 + - r-lobstr=1.1.2=r42ha503ecb_3 - r-log4r=0.4.3=r42h57805ef_1 - r-lubridate=1.9.2=r42h57805ef_2 - r-magick=2.7.3=r42h7525677_1 @@ -242,6 +243,7 @@ dependencies: - r-proj4=1.0_12=r42h4db2be8_0 - r-promises=1.2.1=r42ha503ecb_0 - r-proxy=0.4_27=r42h57805ef_2 + - r-pryr=0.1.6=r42ha503ecb_1 - r-ps=1.7.5=r42h57805ef_1 - r-r6=2.5.1=r42hc72bb7e_2 - r-rappdirs=0.3.3=r42h57805ef_2 @@ -328,4 +330,46 @@ dependencies: - xz=5.2.6=h166bdaf_0 - zlib=1.2.13=hd590300_5 - zstd=1.5.5=hfc55251_0 + - pip: + - argparse==1.4.0 + - autosubmit==4.0.98 + - autosubmitconfigparser==1.0.49 + - bcrypt==4.0.1 + - bscearth-utils==0.5.2 + - cdo==1.6.0 + - certifi==2023.7.22 + - cffi==1.16.0 + - charset-normalizer==3.3.1 + - configobj==5.0.8 + - coverage==7.3.2 + - cryptography==41.0.5 + - cycler==0.12.1 + - cython==3.0.4 + - fonttools==4.43.1 + - idna==3.4 + - iniconfig==2.0.0 + - kiwisolver==1.4.5 + - matplotlib==3.5.3 + - mock==5.1.0 + - networkx==2.6.3 + - nose==1.3.7 + - packaging==23.2 + - paramiko==3.3.1 + - pillow==10.1.0 + - pluggy==1.3.0 + - portalocker==2.7.0 + - psutil==5.9.6 + - py3dotplus==1.1.0 + - pycparser==2.21 + - pygments==2.16.1 + - pynacl==1.5.0 + - pyparsing==3.1.1 + - pytest==7.4.3 + - python-dateutil==2.8.2 + - pythondialog==3.5.3 + - requests==2.31.0 + - ruamel-yaml==0.17.21 + - six==1.16.0 + - urllib3==2.0.7 + - xlib==0.21 prefix: /perm/cyce/conda/envs/condaCerise -- GitLab From df7ecc77301a5c078281d3e27d0c09d8b4de8699 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 10 Nov 2023 12:17:02 +0000 Subject: [PATCH 16/91] Working version --- GetProbs.R | 22 +- .../environment-cerise-localgribR-ecmwf.yml | 44 ++++ conf/archive.yml | 13 +- conf/slurm_templates/run_parallel_workflow.sh | 1 + conf/slurm_templates/run_scorecards.sh | 1 + datadownloading/dl-era5.bash | 127 +++++++++++ datadownloading/dl-seasonal.bash | 126 +++++++++++ full_ecvs_scorecards.R | 200 ++++++++++++++---- launch_SUNSET.sh | 2 + modules/Loading/R/GRIB/GrbLoad.R | 14 +- modules/Scorecards/R/tmp/LoadMetrics.R | 2 +- modules/Scorecards/R/tmp/ScorecardsSingle.R | 4 +- recipe_NAO_scorecards.yml | 4 +- recipe_ecvs_scorecards_seasonal.yml | 42 ++-- recipe_tas_scorecards_seasonal.yml | 4 +- 15 files changed, 522 insertions(+), 84 deletions(-) create mode 100755 datadownloading/dl-era5.bash create mode 100755 datadownloading/dl-seasonal.bash diff --git a/GetProbs.R b/GetProbs.R index b510f30f..9509a601 100644 --- a/GetProbs.R +++ b/GetProbs.R @@ -31,8 +31,9 @@ #' which should not be included. The default value is NULL and, in this case, #' 'prob_thresholds' is used for calculating the probabilities. #'@param bin_dim_abs A character string of the dimension name of -#' 'abs_thresholds' array in which category limits are stored. The default -#' value is 'bin'. +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. #'@param indices_for_quantiles A vector of the indices to be taken along #' 'time_dim' for computing the absolute thresholds between the probabilistic #' categories. If NULL (default), the whole period is used. It is only used @@ -49,9 +50,9 @@ #' computation. The default value is NULL. #' #'@return -#'A numerical array of probabilities with dimensions c(bin, the rest dimensions -#'of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic -#'categories, i.e., \code{length(prob_thresholds) + 1}. +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. #' #'@examples #'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) @@ -101,6 +102,10 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', "dimension exists, set it as NULL.") } } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } ## prob_thresholds, abs_thresholds if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", @@ -134,9 +139,6 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', names(dim(abs_thresholds)) <- bin_dim_abs } # bin_dim_abs - if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { - stop('Parameter "bin_dim_abs" must be a character string.') - } if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") } @@ -213,7 +215,7 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', if (is.null(abs_thresholds)) { res <- Apply(data = list(data = data), target_dims = c(time_dim, memb_dim), - output_dims = c("bin", time_dim), + output_dims = c(bin_dim_abs, time_dim), fun = .GetProbs, prob_thresholds = prob_thresholds, indices_for_quantiles = indices_for_quantiles, @@ -221,7 +223,7 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', } else { res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), target_dims = list(c(time_dim, memb_dim), abs_target_dims), - output_dims = c("bin", time_dim), + output_dims = c(bin_dim_abs, time_dim), fun = .GetProbs, prob_thresholds = NULL, indices_for_quantiles = NULL, diff --git a/conda_installation/environment-cerise-localgribR-ecmwf.yml b/conda_installation/environment-cerise-localgribR-ecmwf.yml index 983347f0..c70811e8 100644 --- a/conda_installation/environment-cerise-localgribR-ecmwf.yml +++ b/conda_installation/environment-cerise-localgribR-ecmwf.yml @@ -208,6 +208,7 @@ dependencies: - r-lattice=0.21_8=r42h57805ef_1 - r-lifecycle=1.0.3=r42hc72bb7e_2 - r-listenv=0.9.0=r42hc72bb7e_1 + - r-lobstr=1.1.2=r42ha503ecb_3 - r-log4r=0.4.3=r42h57805ef_1 - r-lubridate=1.9.2=r42h57805ef_2 - r-magick=2.7.3=r42h7525677_1 @@ -242,6 +243,7 @@ dependencies: - r-proj4=1.0_12=r42h4db2be8_0 - r-promises=1.2.1=r42ha503ecb_0 - r-proxy=0.4_27=r42h57805ef_2 + - r-pryr=0.1.6=r42ha503ecb_1 - r-ps=1.7.5=r42h57805ef_1 - r-r6=2.5.1=r42hc72bb7e_2 - r-rappdirs=0.3.3=r42h57805ef_2 @@ -328,4 +330,46 @@ dependencies: - xz=5.2.6=h166bdaf_0 - zlib=1.2.13=hd590300_5 - zstd=1.5.5=hfc55251_0 + - pip: + - argparse==1.4.0 + - autosubmit==4.0.98 + - autosubmitconfigparser==1.0.49 + - bcrypt==4.0.1 + - bscearth-utils==0.5.2 + - cdo==1.6.0 + - certifi==2023.7.22 + - cffi==1.16.0 + - charset-normalizer==3.3.1 + - configobj==5.0.8 + - coverage==7.3.2 + - cryptography==41.0.5 + - cycler==0.12.1 + - cython==3.0.4 + - fonttools==4.43.1 + - idna==3.4 + - iniconfig==2.0.0 + - kiwisolver==1.4.5 + - matplotlib==3.5.3 + - mock==5.1.0 + - networkx==2.6.3 + - nose==1.3.7 + - packaging==23.2 + - paramiko==3.3.1 + - pillow==10.1.0 + - pluggy==1.3.0 + - portalocker==2.7.0 + - psutil==5.9.6 + - py3dotplus==1.1.0 + - pycparser==2.21 + - pygments==2.16.1 + - pynacl==1.5.0 + - pyparsing==3.1.1 + - pytest==7.4.3 + - python-dateutil==2.8.2 + - pythondialog==3.5.3 + - requests==2.31.0 + - ruamel-yaml==0.17.21 + - six==1.16.0 + - urllib3==2.0.7 + - xlib==0.21 prefix: /perm/cyce/conda/envs/condaCerise diff --git a/conf/archive.yml b/conf/archive.yml index 1aa3c52e..39d1357c 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -8,7 +8,7 @@ cerise: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", "sfcWind":"monthly_mean/sfcWind_f6h/", "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", - "psl":"monthly_mean/psl_f6h", "tdps":"monthly_mean/tdps_f6h"} + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} nmember: hcst: 40 calendar: "proleptic_gregorian" @@ -21,7 +21,7 @@ cerise: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", - "psl":"psl_f6h", "tdps":"tdps_f6h"} + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} nmember: hcst: 25 time_stamp_lag: "+1" @@ -34,7 +34,7 @@ cerise: monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", - "psl":"psl_f6h", "tdps":"tdps_f6h"} + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} nmember: hcst: 28 time_stamp_lag: "+1" @@ -45,8 +45,11 @@ cerise: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/monthly_mean/" - monthly_mean: {"tas":"tas_f1h", "prlr":"prlr_f1h", "psl":"psl_f1h", "tos":"tos_f1h", - "tasmax":"tasmax_f1h", "tasmin":"tasmin_f1h", "tdps":"tdps_f1h"} + monthly_mean: {"tas":"monthly_mean/tas_f1h/", "monthly_mean/prlr":"prlr_f1h/", + "psl":"monthly_mean/psl_f1h/", + "tos":"monthly_mean/tos_f1h/", + "tasmax":"monthly_mean/tasmax_f1h/", + "tasmin":"monthly_mean/tasmin_f1h/", "tdps":"monthly_mean/tdps_f1h/"} calendar: "standard" reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" esarchive: diff --git a/conf/slurm_templates/run_parallel_workflow.sh b/conf/slurm_templates/run_parallel_workflow.sh index 024c0d43..789fe362 100644 --- a/conf/slurm_templates/run_parallel_workflow.sh +++ b/conf/slurm_templates/run_parallel_workflow.sh @@ -13,5 +13,6 @@ atomic_recipe=$2 #source MODULES module load conda/22.11.1-2 conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib Rscript ${script} ${atomic_recipe} diff --git a/conf/slurm_templates/run_scorecards.sh b/conf/slurm_templates/run_scorecards.sh index aea38e48..9a2b5c82 100644 --- a/conf/slurm_templates/run_scorecards.sh +++ b/conf/slurm_templates/run_scorecards.sh @@ -17,6 +17,7 @@ outdir=$2 #source MODULES module load conda/22.11.1-2 conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib # Execute scorecards Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} diff --git a/datadownloading/dl-era5.bash b/datadownloading/dl-era5.bash new file mode 100755 index 00000000..f6b33f86 --- /dev/null +++ b/datadownloading/dl-era5.bash @@ -0,0 +1,127 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate cdsapi +#export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="mon" +workdir=$SCRATCH/cerise/data/ + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f1h" + else + freq_dir="1hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/recon/ecmwf/era5/$freq_dir + cd $workdir/esarchive/recon/ecmwf/era5/$freq_dir + + for year in $( seq 1979 2023); do + for mon in $( seq -w 1 12); do + + if [[ $freq == "mon" ]]; then + + for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} + + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +python << EOF +import cdsapi + +c = cdsapi.Client() + +c.retrieve( + 'reanalysis-era5-single-levels-monthly-means', + { + 'format': 'grib', + 'product_type': 'monthly_averaged_reanalysis', + 'variable': '$code', + 'year': '$year', + 'month': '$mon', + 'time': '00:00', + }, + '${var}${suffix}/${var}_${year}${mon}.grib') +EOF + fi #file exists + done #var + else #freq 6h + for var in uas vas tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "uas") code="10m_u_component_of_wind";; + "vas") code="10m_v_component_of_wind";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +python << EOF + +import cdsapi + +c = cdsapi.Client() + +c.retrieve( + 'reanalysis-era5-single-levels', + { + 'product_type': 'reanalysis', + 'format': 'grib', + 'variable': $code, + 'time': [ + '00:00', '01:00', '02:00', + '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', + '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', + '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', + '21:00', '22:00', '23:00', + ], + 'day': [ + '01', '02', '03', + '04', '05', '06', + '07', '08', '09', + '10', '11', '12', + '13', '14', '15', + '16', '17', '18', + '19', '20', '21', + '22', '23', '24', + '25', '26', '27', + '28', '29', '30', + ], + 'year': '$year', + 'month': '$mon', + }, + '${var}${suffix}/${var}_${year}${mon}.grb') + +EOF + + fi #file exists + done #var + fi #freq + done #year + done #mon diff --git a/datadownloading/dl-seasonal.bash b/datadownloading/dl-seasonal.bash new file mode 100755 index 00000000..3efea020 --- /dev/null +++ b/datadownloading/dl-seasonal.bash @@ -0,0 +1,126 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="mon" +workdir=$SCRATCH/cerise/data/ + + +for model in ecmwf cmcc meteo_france dwd ukmo; do +#model="ecmwf" #cmcc, meteo_france, dwd, cmcc, ukmo + + case $model in + + "ecmwf") origin="ecmf" ; system="51";; + "meteo_france") origin="lfpw" ; system="8";; + "dwd") origin="edzw" ; system="21";; + "cmcc") origin="cmcc" ; system="35" ;; + "ukmo") origin="egrr" ; system="602" ;; + esac + + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f6h" + else + freq_dir="6hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/exp/$model/system$system/$freq_dir + cd $workdir/esarchive/exp/$model/system$system/$freq_dir + + for year in $( seq 1979 2023); do + for mon in $( seq -w 1 12); do + + if [[ $freq == "mon" ]]; then + + for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01, +expver=1, +fcmonth=1/2/3/4/5/6, +levtype=sfc, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=msmm, +system=$system, +time=00:00:00, +type=fcmean, +expect=any, +target='${var}${suffix}/${var}_${year}${mon}01.grb' + +EOF + cdo shifttime,-15days ${var}${suffix}/${var}_${year}${mon}01.grb ${var}${suffix}/${var}_${year}${mon}01.grb2 + mv ${var}${suffix}/${var}_${year}${mon}01.grb2 ${var}${suffix}/${var}_${year}${mon}01.grb + fi #file exists + done #var + else #freq 6h + for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01 +expver=1, +step=1/to/5160/by/6, +levtype=sfc, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=mmsf, +system=$system, +time=00:00:00, +type=fc, +expect=any, +target="${var}${suffix}/${var}_${year}${mon}01.grb" +EOF + + fi #file exists + done #var + fi #freq + done #year + done #mon +done #model diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R index 414a1511..d3d75436 100644 --- a/full_ecvs_scorecards.R +++ b/full_ecvs_scorecards.R @@ -2,6 +2,7 @@ source("modules/Loading/Loading.R") #source("modules/Units/Units.R") source("modules/Saving/Saving.R") +source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] @@ -10,68 +11,161 @@ recipe <- read_atomic_recipe(recipe_file) #recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) -#data <- Units(recipe, data) +data <- Units(recipe, data) # Full-cross-val workflow sdate_dim <- dim(data$hcst$data)['syear'] +nmemb <- dim(data$hcst$data)['ensemble'] +nftime <- dim(data$hcst$data)['time'] +nlats <- dim(data$hcst$data)['latitude'] +nlons <- dim(data$hcst$data)['longitude'] + +info(recipe$Run$logger, + paste("ftime", nftime)) +info(recipe$Run$logger, + paste(dim(data$obs$data))) + + cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) # Paralelized: -loops <- array(1:length(cross), c(loop = length(cross))) -res <- Apply(list(loops), target = NULL, - fun = function(t) { - # subset years: +#loops <- array(1:length(cross), c(loop = length(cross))) +#res <- Apply(list(loops), target = NULL, +# fun = function(t) { +ano_hcst_ev_res <- array(NA, c(nftime, nlats, + nlons, nmemb, + sdate_dim)) +ano_obs_ev_res <- array(NA, c(nftime, nlats, + nlons, ensemble = 1, sdate_dim)) +ano_obs_tr_res <- array(NA, c(sample = sdate_dim - 1, nftime, + nlats, nlons, ensemble = 1, sdate_dim)) +lims_ano_hcst_tr_res <- array(NA, c(probs = 2, nftime, nlats, + nlons, sdate_dim)) +lims_ano_obs_tr_res <- array(NA, c(probs = 2, nftime, nlats, + nlons, sdate_dim)) +info(recipe$Run$logger, + paste(dim(data$hcst$data))) +info(recipe$Run$logger, + paste(names(dim(data$hcst$data)))) + + + +#crps_clim_res <- array(NA, c(nftime, nlats, nlons, ensemble = 1, sdate_dim)) +for (t in 1:sdate_dim) { + info(recipe$Run$logger, + paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos # training - obs_tr <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$train.dexes) - hcst_tr <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$train.dexes) + obs_tr <- data$obs$data[1,1,1,1,cross[[t]]$train.dexes,,,,] + #Subset(data$obs$data, along = 'syear', + # indices = cross[[t]]$train.dexes) + hcst_tr <- data$hcst$data[1,1,1,1,cross[[t]]$train.dexes,,,,] + #Subset(data$hcst$data, along = 'syear', + # indices = cross[[t]]$train.dexes) # eval years - hcst_ev <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - obs_ev <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - # compute climatology: + hcst_ev <- data$hcst$data[1,1,1,1,cross[[t]]$eval.dexes,,,,] + #Subset(data$hcst$data, along = 'syear', + # indices = cross[[t]]$eval.dexes) + obs_ev <- data$obs$data[1,1,1,1,cross[[t]]$eval.dexes,,,,] + #Subset(data$obs$data, along = 'syear', + # indices = cross[[t]]$eval.dexes) + info(recipe$Run$logger, + paste(names(dim(hcst_ev)))) + info(recipe$Run$logger, + paste(dim(hcst_ev))) + + info(recipe$Run$logger, + paste(names(dim(obs_tr)))) + info(recipe$Run$logger, + paste(dim(obs_tr))) + + dim(obs_tr) <- c(syear = (as.numeric(sdate_dim) - 1), nftime, nlats, nlons) + dim(obs_ev) <- c(syear = 1, nftime, nlats, nlons) + dim(hcst_tr) <- c(syear = (as.numeric(sdate_dim) - 1), nftime, nlats, nlons, nmemb) + dim(hcst_ev) <- c(syear = 1, nftime, nlats, nlons, nmemb) + # compute climatology: clim_obs_tr <- MeanDims(obs_tr, 'syear') clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) # compute anomalies: - ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr) - ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr) - ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr) - ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr) + info(recipe$Run$logger, + paste("dims:", dim(clim_hcst_tr))) + + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = recipe$Analysis$ncores) + + # CRPS_clim: + # build the reference forecast usnig the observational trainig sample + #ref <- array(data = rep(ano_obs_tr[1,1,1,1,,1,1,1,1], each = 1), + # dim = c(sdate = 1, member = sdate_dim)) + #crps_clim <- SpecsVerification::enscrps_cpp(ens = ref, + # obs = ano_obs_ev[1,1,1,1,,1,1,1,1], R_new = NA) +# crps_clim <- Apply(list(ano_obs_ev, ano_obs_tr), target_dims = 'syear', +# fun = function(x,y) { +# ref <- array(y, c(syear = 1, ensemble = length(y))) +# res <- SpecsVerification::enscrps_cpp(ens = ref, +# obs = x, R_new = NA) #No fair +# }, ncores = recipe$Analysis$ncores)$output1 + rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", "hcst_ev") #Category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs')$output1 - lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear', 'ensemble'), + output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs')$output1 + output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 #mem <- mem_used() gc() - return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, - ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, - #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, - lims_ano_hcst_tr = lims_ano_hcst_tr, - lims_ano_obs_tr = lims_ano_obs_tr)) - }, ncores =recipe$Analysis$ncores) + ano_hcst_ev_res[,,,,t] <- ano_hcst_ev + ano_obs_ev_res[,,,,t] <- ano_obs_ev + ano_obs_tr_res[,,,,,t] <- ano_obs_tr + lims_ano_hcst_tr_res[,,,,t] <- lims_ano_hcst_tr + lims_ano_obs_tr_res[,,,,t] <- lims_ano_obs_tr +# crps_clim_res[,,,,t] <- crps_clim + res <- list(ano_hcst_ev = ano_hcst_ev_res, + ano_obs_ev = ano_obs_ev_res, + ano_obs_tr = ano_obs_tr_res, #required as reference forecast for the CRPSS + lims_ano_hcst_tr = lims_ano_hcst_tr_res, + lims_ano_obs_tr = lims_ano_obs_tr_res)#, +# crps_clim = crps_clim_res) +} +# return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, +# ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, +# #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, +# lims_ano_hcst_tr = lims_ano_hcst_tr, +# lims_ano_obs_tr = lims_ano_obs_tr)) +# }, ncores =recipe$Analysis$ncores) # RPS source("GetProbs.R") -ano_hcst_probs_ev <- GetProbs(res$ano_hcst_ev, time_dim = 'loop', +ano_hcst_probs_ev <- GetProbs(res$ano_hcst_ev, time_dim = 'syear', prob_thresholds = NULL, - prob_dim = 'probs', indices_for_quantiles = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, memb_dim = 'ensemble', abs_thresholds = res$lims_ano_hcst_tr, ncores = recipe$Analysis$ncores) -ano_obs_probs_ev <- GetProbs(res$ano_obs_ev, time_dim = 'loop', +ano_obs_probs_ev <- GetProbs(res$ano_obs_ev, time_dim = 'syear', prob_thresholds = NULL, - prob_dim = 'probs', indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_ano_obs_tr, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_ano_obs_tr, ncores = recipe$Analysis$ncores) rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'loop', + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', cross.val = FALSE)$output1 # RPSS rpss <- RPSS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, - time_dim = 'loop', memb_dim = NULL, + time_dim = 'syear', memb_dim = NULL, cat_dim = 'probs', # We should use a line like this #abs_threshold = res$lims_ano_hcst_tr, @@ -80,18 +174,32 @@ rpss <- RPSS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, ncores = recipe$Analysis$ncores) # CRPS crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'loop', memb_dim = 'ensemble', + time_dim = 'syear', memb_dim = 'ensemble', ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + ncores = recipe$Analysis$ncores) + + # CRPSS -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, memb_dim = 'ensemble', - time_dim = 'loop', clim.cross.val = TRUE, + time_dim = 'syear', clim.cross.val = FALSE, ncores = recipe$Analysis$ncores) + # Corr enscorr <- s2dv::Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = 'dat', - time_dim = 'loop', + dat_dim = NULL, + time_dim = 'syear', method = 'pearson', memb_dim = 'ensemble', memb = F, @@ -113,13 +221,13 @@ obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', fcst = res$ano_hcst_ev, obs = obs_noensdim, - tdim = which(names(dim(res$ano_hcst_ev))=='loop'), + tdim = which(names(dim(res$ano_hcst_ev))=='syear'), ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), - na.rm = TRUE, + na.rm = FALSE, ncpus = recipe$Analysis$ncores) skill_metrics <- list(mean_bias = mean_bias, enscorr = enscorr$corr, enscorr_significance = enscorr$sign, enssprerr = enssprerr, - #rps = rps, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, crpss = crpss$crpss, crpss_significance = crpss$sign) skill_metrics <- lapply(skill_metrics, function(x) { @@ -128,7 +236,12 @@ original <- recipe$Run$output_dir recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") # Compute save metrics source("modules/Saving/Saving.R") -Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + skill = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + recipe$Run$output_dir <- original source("modules/Visualization/Visualization.R") @@ -144,3 +257,4 @@ Visualization(recipe, data, skill_metrics, significance = TRUE) source("tools/add_logo.R") add_logo(recipe, "tools/BSC_logo_95.jpg") + diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index f635635a..64374e8d 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -106,6 +106,8 @@ tmpfile=$(mktemp ${TMPDIR-/tmp}/SUNSET.XXXXXX) #source MODULES module load conda/22.11.1-2 conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile # Run with Autosubmit or directly with Slurm's sbatch? diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index 739ff6b3..827d275d 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -45,7 +45,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) cdo_time <- gsub('-', '', cdo_time) - + #---------------------------------------- # all members + ftimes: length should be memb*ftime (e.g., 51*7) @@ -92,7 +92,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, attr(result, 'edition') <- gm1$edition attr(result, 'shortName') <- gm1$shortName #NOTE: Tune varaible name!! - if (gm1$shortName == '2t') attr(result, 'shortName') <- 'tas' + attr(result, 'shortName') <- tune_var_name(gm1$shortName) attr(result, 'name') <- gm1$name attr(result, 'units') <- gm1$units # attr(result, 'validityDate') <- gm1$validityDate @@ -247,3 +247,13 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, values } + +tune_var_name <- function(x) { + if (x == '2t') { + res <- "tas" + } else if (x == 'tprate') { + res <- "prlr" + } else { + warning("Conversion name", x, "needed?") + } +} diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index e5e15421..c2afed29 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -157,7 +157,7 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, var, "-skill_", period, "_s", m, # mod.pressure, ".nc")}) allfiles_exist <- sapply(allfiles, file.exists) - + warning(paste("Exist", allfiles_exist)) # Check dims files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 56f08204..1c010b1c 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -94,8 +94,8 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, ftime_dim = 'time') ## Load configuration files - sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive - var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + sys_dict <- read_yaml("conf/archive.yml")$esarchive + var_dict <- read_yaml("conf/variable-dictionary.yml")$vars ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name diff --git a/recipe_NAO_scorecards.yml b/recipe_NAO_scorecards.yml index bd3b0587..88929bc7 100644 --- a/recipe_NAO_scorecards.yml +++ b/recipe_NAO_scorecards.yml @@ -1,6 +1,6 @@ Description: - Author: nperez - Info: ECMWF SEAS5 Seasonal Forecast Example recipe (monthly mean, tas) + Author: Nuria Perez-Zanon + Info: Cerise phase 0 assessment NAO index Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml index 70907539..dd42c8c1 100644 --- a/recipe_ecvs_scorecards_seasonal.yml +++ b/recipe_ecvs_scorecards_seasonal.yml @@ -1,38 +1,46 @@ Description: - Author: nperez - Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + Author: Nuria Perez-Zanon + Info: Cerise phase 0 assessment for ECVs Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - {name: tas, freq: monthly_mean, units: K} - - {name: prlr, freq: monthly_mean, units: ms-1, flux: yes} + - {name: tasmin, freq: monthly_mean, units: K} + - {name: tasmin, freq: monthly_mean, units: K} + - {name: tos, freq: monthly_mean, units: K} + - {name: sfcWind, freq: monthly_mean, units: ms-1} + - {name: tdps, freq: monthly_mean, units: K} + - {name: psl, freq: monthly_mean, units: hPa} + #- {name: prlr, freq: monthly_mean, units: ms-1, flux: yes} Datasets: System: - {name: Meteo-France-System8} + - {name: CMCC-SPS3.5} + - {name: UKMO-System602} Multimodel: no # Mandatory, bool: Either yes/true or no/false Reference: - {name: ERA5} # Mandatory, str: Reference codename. See docu. Time: sdate: - '0101' - # - '0201' - # - '0301' - #- '0401' - #- '0501' - #- '0601' - #- '0701' - #- '0801' - #- '0901' - #- '1001' - #- '1101' - #- '1201' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 3 # Mandatory, int: Last leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - - {name: "EU", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.5} + - {name: "EU", latmin: 0, latmax: 20, lonmin: 0, lonmax: 45} Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. type: "to_system" @@ -74,7 +82,7 @@ Analysis: col1_width: NULL col2_width: NULL calculate_diff: FALSE - ncores: 14 # Optional, int: number of cores, defaults to 1 + ncores: 4 # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: scorecards logo: yes diff --git a/recipe_tas_scorecards_seasonal.yml b/recipe_tas_scorecards_seasonal.yml index f92ee25f..aa6559c1 100644 --- a/recipe_tas_scorecards_seasonal.yml +++ b/recipe_tas_scorecards_seasonal.yml @@ -73,8 +73,8 @@ Analysis: Run: Loglevel: INFO Terminal: yes - filesystem: esarchive - output_dir: /esarchive/scratch/nperez/cs_oper/ # replace with the directory where you want to save the outputs + filesystem: cerise + output_dir: /perm/cyce/phase0/ # replace with the directory where you want to save the outputs code_dir: /esarchive/scratch/nperez/git3/sunset/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit -- GitLab From ae2de39ccc8df42b3504b2834a1ed2d6ea197493 Mon Sep 17 00:00:00 2001 From: nperez Date: Mon, 13 Nov 2023 12:10:35 +0000 Subject: [PATCH 17/91] fix era5 conf for mars --- conf/archive.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/archive.yml b/conf/archive.yml index 39d1357c..c7db21a2 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -44,7 +44,7 @@ cerise: ERA5: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/monthly_mean/" + src: "recon/ecmwf/era5/" monthly_mean: {"tas":"monthly_mean/tas_f1h/", "monthly_mean/prlr":"prlr_f1h/", "psl":"monthly_mean/psl_f1h/", "tos":"monthly_mean/tos_f1h/", -- GitLab From 8d07ec75caa7733c2d72cdf33e4401168482379d Mon Sep 17 00:00:00 2001 From: nperez Date: Wed, 15 Nov 2023 11:40:23 +0000 Subject: [PATCH 18/91] =?UTF-8?q?fixes=20variable=20names=C3=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- conf/archive.yml | 10 ++++---- full_ecvs_scorecards.R | 14 ++++++++--- modules/Loading/R/GRIB/GrbLoad.R | 26 ++++++++++++++++++-- modules/Visualization/R/plot_skill_metrics.R | 2 ++ recipe_ecvs_scorecards_seasonal.yml | 14 +++++------ recipe_tas_scorecards_seasonal.yml | 8 +++--- 6 files changed, 53 insertions(+), 21 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index c7db21a2..fbd2e9e7 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -6,7 +6,7 @@ cerise: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/cmcc/system35/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "tos":"monthly_mean/tos_f6h", "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} nmember: @@ -18,7 +18,7 @@ cerise: name: "Meteo-France System 8" institution: "Meteo-France" src: "exp/meteo_france/system8/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tos":"monthly_mean/tos_f6h", "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} @@ -31,7 +31,7 @@ cerise: name: "UKMO-S602" institution: "UK MetOffice" src: "exp/ukmo/system602/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tos":"monthly_mean/tos_f6h", "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} @@ -45,8 +45,8 @@ cerise: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - monthly_mean: {"tas":"monthly_mean/tas_f1h/", "monthly_mean/prlr":"prlr_f1h/", - "psl":"monthly_mean/psl_f1h/", + monthly_mean: {"tas":"monthly_mean/tas_f1h/", "prlr":"monthly_mean/prlr_f1h/", + "psl":"monthly_mean/psl_f1h/", "sfcWind":"monthly_mean/sfcWind_f1h/", "tos":"monthly_mean/tos_f1h/", "tasmax":"monthly_mean/tasmax_f1h/", "tasmin":"monthly_mean/tasmin_f1h/", "tdps":"monthly_mean/tdps_f1h/"} diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R index d3d75436..fec1a53d 100644 --- a/full_ecvs_scorecards.R +++ b/full_ecvs_scorecards.R @@ -249,9 +249,17 @@ source("modules/Visualization/Visualization.R") # lon = -179:180, # lat = data$hcst$attrs$Variable$metadata$latitude, filled.co = F, # fileout = "/esarchive/scratch/nperez/test.png") -skill_metrics <- lapply(skill_metrics, function(x) { - Subset(x, along = 'longitude', indices = c(182:360, 1:181)) - }) +if (data$hcst$coords$longitude[1] != 0) { + skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +} + info(recipe$Run$logger, + paste("lons:", data$hcst$coords$longitude)) + info(recipe$Run$logger, + paste("lons:", data$obs$coords$longitude)) + + data$hcst$coords$longitude <- -179:180 Visualization(recipe, data, skill_metrics, significance = TRUE) diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index 827d275d..fda8ea84 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -92,7 +92,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, attr(result, 'edition') <- gm1$edition attr(result, 'shortName') <- gm1$shortName #NOTE: Tune varaible name!! - attr(result, 'shortName') <- tune_var_name(gm1$shortName) + attr(result, 'shortName') <- tune_var_name(gm1$shortName, dat[[dat_i]]) attr(result, 'name') <- gm1$name attr(result, 'units') <- gm1$units # attr(result, 'validityDate') <- gm1$validityDate @@ -248,12 +248,34 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, } -tune_var_name <- function(x) { +tune_var_name <- function(x, y) { if (x == '2t') { res <- "tas" } else if (x == 'tprate') { res <- "prlr" + } else if (x == "10si") { + res <- "sfcWind" + } else if (x == "2d") { + res <- "tdps" + } else if (x == "msl") { + res <- "psl" + } else if (x == "sst") { + res <- "tos" + } else if (x == "tp") { + if (grepl("prlr", y)) { + res <- "prlr" + } else if (grepl("tasmax", y)) { + res <- "tasmax" + } else if (grepl("tasmin", y)) { + res <- "tasmin" + } + } else if (x == "mx2t24") { + res <- "tasmax" + } else if (x == "mn2t24") { + res <- "tasmin" } else { + res <- x warning("Conversion name", x, "needed?") } + return(res) } diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index 2698d499..76febf6b 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -242,6 +242,8 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, } fileout <- paste0(outfile, "_ft", forecast_time, ".png") # Plot + info(recipe$Run$logger, + paste("Plotting", display_name)) do.call(fun, args = c(base_args, diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml index dd42c8c1..47c91f6c 100644 --- a/recipe_ecvs_scorecards_seasonal.yml +++ b/recipe_ecvs_scorecards_seasonal.yml @@ -6,12 +6,12 @@ Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - {name: tas, freq: monthly_mean, units: K} - - {name: tasmin, freq: monthly_mean, units: K} - - {name: tasmin, freq: monthly_mean, units: K} - - {name: tos, freq: monthly_mean, units: K} - - {name: sfcWind, freq: monthly_mean, units: ms-1} - - {name: tdps, freq: monthly_mean, units: K} - - {name: psl, freq: monthly_mean, units: hPa} + - {name: tasmin, freq: monthly_mean} + - {name: tasmax, freq: monthly_mean} + - {name: tos, freq: monthly_mean} + - {name: sfcWind, freq: monthly_mean} + - {name: tdps, freq: monthly_mean} + - {name: psl, freq: monthly_mean} #- {name: prlr, freq: monthly_mean, units: ms-1, flux: yes} Datasets: System: @@ -66,7 +66,6 @@ Analysis: Visualization: plots: skill_metrics #forecast_ensemble_mean most_likely_terciles multi_panel: no - dots: both Scorecards: execute: yes # yes/no regions: @@ -105,3 +104,4 @@ Run: email_address: nuria.perez@bsc.es # replace with your email address notify_completed: yes # notify me by email when a job finishes notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_scorecards_seasonal.yml b/recipe_tas_scorecards_seasonal.yml index aa6559c1..9a5d5580 100644 --- a/recipe_tas_scorecards_seasonal.yml +++ b/recipe_tas_scorecards_seasonal.yml @@ -5,7 +5,7 @@ Description: Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - name: tas + name: tasmin freq: monthly_mean Datasets: System: @@ -16,14 +16,14 @@ Analysis: Time: sdate: '0101' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: - latmin: -90 + latmin: 0 latmax: 90 lonmin: 0 - lonmax: 359.5 + lonmax: 15.5 Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. type: "to_system" -- GitLab From 74653a7335e32d0e65b684d3c1a6f05c5c038678 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 12:40:00 +0100 Subject: [PATCH 19/91] completed information for MIROC6 --- conf/archive_decadal.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 2e0a1b29..2d4ff679 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -228,8 +228,8 @@ esarchive: # ---- MIROC6: - name: - institution: + name: "MIROC6" + institution: "MIROC" src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: -- GitLab From f123014bfabc55cdb8ecc24a000278cf9daa087b Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 12:40:56 +0100 Subject: [PATCH 20/91] create multimodel recipe when splitting --- tools/divide_recipe.R | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index b22274cc..85f80005 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -21,7 +21,7 @@ divide_recipe <- function(recipe) { Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", "code_dir", "logfile", "filesystem")]) - # duplicate recipe by independent variables:ç + # duplicate recipe by independent variables: # If a single variable is not given inside a list, rebuild structure if (any(c("name", "freq", "units") %in% names(recipe$Analysis$Variables))) { variables <- recipe$Analysis$Variables @@ -48,31 +48,33 @@ divide_recipe <- function(recipe) { recipe$Analysis$Datasets$System <- NULL recipe$Analysis$Datasets$System[[1]] <- system } - - if (recipe$Analysis$Datasets$Multimodel %in% c(TRUE, 'both')) { + n_models <- ifelse(test = recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both'), + yes = length(recipe$Analysis$Datasets$System)+1, + no = length(recipe$Analysis$Datasets$System)) + for (sys in 1:n_models) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- - list(System = recipe$Analysis$Datasets$System, - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) - } - } else { - for (sys in 1:length(recipe$Analysis$Datasets$System)) { - for (reci in 1:length(all_recipes)) { + if (sys == length(recipe$Analysis$Datasets$System)+1){ all_recipes[[reci]]$Analysis$Datasets <- - list(System = recipe$Analysis$Datasets$System[[sys]], + list(System = list(name = 'Multimodel', + models = unlist(recipe$Analysis$Datasets$System)), Multimodel = recipe$Analysis$Datasets$Multimodel, Reference = NULL) - } - if (sys == 1) { - recipes <- all_recipes } else { - recipes <- append(recipes, all_recipes) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System[[sys]], + Multimodel = 'no', + Reference = NULL) } - } # Rest of horizons - all_recipes <- recipes - rm(list = 'recipes') + } + if (sys == 1) { + recipes <- all_recipes + } else { + recipes <- append(recipes, all_recipes) + } } + all_recipes <- recipes + rm(list = 'recipes') + # Check references # If a single reference is not given inside a list, rebuild structure if (c("name") %in% names(recipe$Analysis$Datasets$Reference)) { -- GitLab From 87ff7c4a3fee02cbef76504b1b627d7f5175433d Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 12:55:13 +0100 Subject: [PATCH 21/91] added checks for the multimodel --- tools/check_recipe.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 01b790b8..9b979de4 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -94,6 +94,23 @@ check_recipe <- function(recipe) { error_status <- T } } + # Check multimodel + if (!is.null(recipe$Analysis$Datasets$Multimodel)){ + if (!tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('yes','true','no','false','both')){ + error(recipe$Run$logger, + paste("The specified execution for the multimodel is not valid.", + "Please specify yes/true, no/false or both")) + error_status <- T + } + if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','pooled')){ + error(recipe$Run$logger, + paste("The specified approach for the multimodel is not valid.", + "Please specify mean or pooled")) + error_status <- T + } + } else { + recipe$Analysis$Datasets$Multimodel <- 'no' + } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { -- GitLab From af7689d28aac3692deb81e0ae1de88d13f212d14 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 15:32:41 +0100 Subject: [PATCH 22/91] changing the atomic recipes of the individual models to save all outputs --- tools/divide_recipe.R | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 85f80005..5ae10f52 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -48,9 +48,14 @@ divide_recipe <- function(recipe) { recipe$Analysis$Datasets$System <- NULL recipe$Analysis$Datasets$System[[1]] <- system } - n_models <- ifelse(test = recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both'), - yes = length(recipe$Analysis$Datasets$System)+1, - no = length(recipe$Analysis$Datasets$System)) + # Modify the saving of the individual models in case multimodel is yes or both + if (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both')){ + n_models <- length(recipe$Analysis$Datasets$System)+1 + mm <- TRUE + } else { + n_models <- length(recipe$Analysis$Datasets$System) + mm <- FALSE + } for (sys in 1:n_models) { for (reci in 1:length(all_recipes)) { if (sys == length(recipe$Analysis$Datasets$System)+1){ @@ -74,6 +79,15 @@ divide_recipe <- function(recipe) { } all_recipes <- recipes rm(list = 'recipes') + for (reci in 1:length(all_recipes)){ + if (isTRUE(mm) && all_recipes[[reci]]$Analysis$Datasets$System$name != 'Multimodel'){ + all_recipes[[reci]]$Analysis$Workflow$Anomalies$save <- 'yes' + all_recipes[[reci]]$Analysis$Workflow$Calibration$save <- 'yes' + all_recipes[[reci]]$Analysis$Workflow$Skill$save <- 'yes' + all_recipes[[reci]]$Analysis$Workflow$Probabilities$save <- 'yes' + all_recipes[[reci]]$Analysis$Workflow$Indicators$save <- 'yes' + } + } # Check references # If a single reference is not given inside a list, rebuild structure -- GitLab From 19004a7393866ea9aa5489ad6c2ecb03ad52f7dd Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 16:38:10 +0100 Subject: [PATCH 23/91] adapted checks for the multimode --- tools/check_recipe.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 9b979de4..8c6dd93b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -81,7 +81,8 @@ check_recipe <- function(recipe) { } # Check system names if (!is.null(archive)) { - if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { + if (!all(recipe$Analysis$Datasets$System$name %in% + c(names(archive$System),'Multimodel'))) { error(recipe$Run$logger, "The specified System name was not found in the archive.") error_status <- T @@ -95,17 +96,20 @@ check_recipe <- function(recipe) { } } # Check multimodel - if (!is.null(recipe$Analysis$Datasets$Multimodel)){ - if (!tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('yes','true','no','false','both')){ + if (!is.null(recipe$Analysis$Datasets$Multimodel) && + !recipe$Analysis$Datasets$Multimodel %in% c('no','false')){ + if (!tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% + c('yes','true','no','false','both')){ error(recipe$Run$logger, paste("The specified execution for the multimodel is not valid.", - "Please specify yes/true, no/false or both")) + "Please specify yes/true, no/false or both.")) error_status <- T } - if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','pooled')){ + if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% + c('mean','pooled')){ error(recipe$Run$logger, paste("The specified approach for the multimodel is not valid.", - "Please specify mean or pooled")) + "Please specify mean or pooled.")) error_status <- T } } else { -- GitLab From 152b04d29970734272497c3f8f51bc5d7eff7953 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 16:38:23 +0100 Subject: [PATCH 24/91] . --- tools/divide_recipe.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 5ae10f52..716d45aa 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -81,11 +81,11 @@ divide_recipe <- function(recipe) { rm(list = 'recipes') for (reci in 1:length(all_recipes)){ if (isTRUE(mm) && all_recipes[[reci]]$Analysis$Datasets$System$name != 'Multimodel'){ - all_recipes[[reci]]$Analysis$Workflow$Anomalies$save <- 'yes' - all_recipes[[reci]]$Analysis$Workflow$Calibration$save <- 'yes' - all_recipes[[reci]]$Analysis$Workflow$Skill$save <- 'yes' - all_recipes[[reci]]$Analysis$Workflow$Probabilities$save <- 'yes' - all_recipes[[reci]]$Analysis$Workflow$Indicators$save <- 'yes' + all_recipes[[reci]]$Analysis$Workflow$Anomalies$save <- 'all' + all_recipes[[reci]]$Analysis$Workflow$Calibration$save <- 'all' + all_recipes[[reci]]$Analysis$Workflow$Skill$save <- 'all' + all_recipes[[reci]]$Analysis$Workflow$Probabilities$save <- 'all' + all_recipes[[reci]]$Analysis$Workflow$Indicators$save <- 'all' } } -- GitLab From 047b010612e81e21e7b5584ac8128456fd6d5ce9 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 16:38:37 +0100 Subject: [PATCH 25/91] . --- modules/Calibration/Calibration.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 989d9b94..16d7b96b 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -28,7 +28,8 @@ Calibration <- function(recipe, data) { } else { ## TODO: Calibrate full fields when present # Calibration function params - mm <- recipe$Analysis$Datasets$Multimodel + mm <- !is.null(recipe$Analysis$Datasets$Multimodel) && + !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false') if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { -- GitLab From d728413a25c3816051e614c9d5dd1a4032d22a3a Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 17:33:00 +0100 Subject: [PATCH 26/91] only forcing to save the outputs needed for the multimodel --- tools/divide_recipe.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 716d45aa..1ad1bba4 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -81,11 +81,7 @@ divide_recipe <- function(recipe) { rm(list = 'recipes') for (reci in 1:length(all_recipes)){ if (isTRUE(mm) && all_recipes[[reci]]$Analysis$Datasets$System$name != 'Multimodel'){ - all_recipes[[reci]]$Analysis$Workflow$Anomalies$save <- 'all' - all_recipes[[reci]]$Analysis$Workflow$Calibration$save <- 'all' - all_recipes[[reci]]$Analysis$Workflow$Skill$save <- 'all' - all_recipes[[reci]]$Analysis$Workflow$Probabilities$save <- 'all' - all_recipes[[reci]]$Analysis$Workflow$Indicators$save <- 'all' + all_recipes[[reci]]$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$readFrom]]$save <- 'all' } } -- GitLab From e6e8190c1af00648b8b3c50ccdea5433f7232d58 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 24 Nov 2023 17:34:19 +0100 Subject: [PATCH 27/91] first version of the recipe and script for the multimodel --- example_scripts/example_multimodel.R | 50 +++++++++++++++++ recipes/recipe_multimodel_seasonal.yml | 78 ++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 example_scripts/example_multimodel.R create mode 100644 recipes/recipe_multimodel_seasonal.yml diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel.R new file mode 100644 index 00000000..28f867b4 --- /dev/null +++ b/example_scripts/example_multimodel.R @@ -0,0 +1,50 @@ + +################################ +### SEASONAL MULTIMODEL TEST ### +################################ + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Read recipe +recipe_file <- "recipe_multimodel_seasonal.yml" +system(paste0('Rscript split.R ',recipe_file)) + +for (recipe_file in c("/esarchive/scratch/cdelgado/sunset_outputs/recipe_multimodel_seasonal_20231124155552/logs/recipes/atomic_recipe_01.yml", + "/esarchive/scratch/cdelgado/sunset_outputs/recipe_multimodel_seasonal_20231124155552/logs/recipes/atomic_recipe_02.yml", + "/esarchive/scratch/cdelgado/sunset_outputs/recipe_multimodel_seasonal_20231124155552/logs/recipes/atomic_recipe_03.yml")){ + + recipe <- prepare_outputs(recipe_file) + + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + + # Load datasets and create multimodel + data <- Multimodel(recipe) + + } else { + + # Load datasets + data <- Loading(recipe) + # Change units + data <- Units(recipe, data) + # Calibrate datasets + data <- Calibration(recipe, data) + # Compute anomalies + data <- Anomalies(recipe, data) + # Compute percentiles and probability bins + probabilities <- Probabilities(recipe, data) + + } + + # Compute skill metrics + skill_metrics <- Skill(recipe, data) + # Plot data + Visualization(recipe, data, skill_metrics, probabilities, significance = T) + +} diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml new file mode 100644 index 00000000..92196e04 --- /dev/null +++ b/recipes/recipe_multimodel_seasonal.yml @@ -0,0 +1,78 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for seasonal multi-model + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + Datasets: + System: + - {name: ECMWF-SEAS5.1} + - {name: DWD-GCFS2.1} + Multimodel: + execute: both # Mandatory, bool: Either yes/true or no/false + approach: pooled + createFrom: Calibration + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0801' ## MMDD + # - '0901' ## MMDD + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2007' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "region1", latmin: 42, latmax: 44, lonmin: 4, lonmax: 6} + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + Workflow: + Anomalies: + compute: no + cross_validation: yes + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rpss + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/cdelgado/sunset_outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/gitlat/SUNSET/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/cdelgado/gitlat/SUNSET/main_multimodel_seasonal.R # replace with the path to your script + expid: XXXX # replace with your EXPID + hpc_user: bsc32924 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: carlos.delgado@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails -- GitLab From 6440cf688cf925309a85be6f0184b8bc4de5550b Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 11:30:15 +0100 Subject: [PATCH 28/91] error in case grid is to_system and multimodel is asked --- tools/check_recipe.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 8c6dd93b..92cc59c9 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -196,6 +196,14 @@ check_recipe <- function(recipe) { "The 'Regrid' element must specify the 'method' and 'type'.") error_status <- T } + if (recipe$Analysis$Regrid$type == 'to_system' && + tolower(recipe$Analysis$Datasets$Multimodel$execute) + %in% c('both','yes','true')) { + error(recipe$Run$logger, + paste0("The 'Regrid$type' cannot be 'to_system' if ", + "'Multimodel$execute' is yes/true or both.")) + error_status <- T + } # TODO: Add Workflow checks? # ... # calculate number of workflows to create for each variable and -- GitLab From 8ba69161685bb8ae3c6d88150112b25b375f1536 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 11:30:40 +0100 Subject: [PATCH 29/91] . --- example_scripts/example_multimodel.R | 19 ++++++++++++++----- recipes/recipe_multimodel_seasonal.yml | 10 +++++----- tools/divide_recipe.R | 2 +- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel.R index 28f867b4..6b84dee5 100644 --- a/example_scripts/example_multimodel.R +++ b/example_scripts/example_multimodel.R @@ -11,21 +11,30 @@ source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") +source("modules/Multimodel/Multimodel.R") -# Read recipe -recipe_file <- "recipe_multimodel_seasonal.yml" +## Complete recipe to be splitted +sunset_outputs_folder <- '/esarchive/scratch/cdelgado/sunset_outputs/' +recipe_file <- "recipes/recipe_multimodel_seasonal.yml" +system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_seasonal_*')) system(paste0('Rscript split.R ',recipe_file)) -for (recipe_file in c("/esarchive/scratch/cdelgado/sunset_outputs/recipe_multimodel_seasonal_20231124155552/logs/recipes/atomic_recipe_01.yml", - "/esarchive/scratch/cdelgado/sunset_outputs/recipe_multimodel_seasonal_20231124155552/logs/recipes/atomic_recipe_02.yml", - "/esarchive/scratch/cdelgado/sunset_outputs/recipe_multimodel_seasonal_20231124155552/logs/recipes/atomic_recipe_03.yml")){ +atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder),'/logs/recipes/') +atomic_recipe_files <- list.files(atomic_recipe_folder) +atomic_recipe_files <- atomic_recipe_files[-length(atomic_recipe_files)] +atomic_recipe_files <- paste0(atomic_recipe_folder,atomic_recipe_files) + +for (recipe_file in atomic_recipe_files){ + # Read recipe recipe <- prepare_outputs(recipe_file) if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ # Load datasets and create multimodel data <- Multimodel(recipe) + probabilities <- data$probs + data <- data$data } else { diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index 92196e04..9cf1eb01 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -12,8 +12,8 @@ Analysis: - {name: ECMWF-SEAS5.1} - {name: DWD-GCFS2.1} Multimodel: - execute: both # Mandatory, bool: Either yes/true or no/false - approach: pooled + execute: both # Mandatory: Either both, yes/true or no/false + approach: pooled #mean, median createFrom: Calibration Reference: - {name: ERA5} # Mandatory, str: Reference codename. See docu. @@ -27,7 +27,7 @@ Analysis: ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: - - {name: "region1", latmin: 42, latmax: 44, lonmin: 4, lonmax: 6} + - {name: "region1", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. type: "to_system" @@ -55,14 +55,14 @@ Analysis: dots: both ncores: 4 # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: scorecards + Output_format: s2s4e # scorecards logo: yes Run: Loglevel: INFO Terminal: yes filesystem: esarchive output_dir: /esarchive/scratch/cdelgado/sunset_outputs/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/gitlat/SUNSET/ # replace with the directory where your code is + code_dir: /esarchive/scratch/cdelgado/gitlat/SUNSET/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 1ad1bba4..728f8dc5 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -81,7 +81,7 @@ divide_recipe <- function(recipe) { rm(list = 'recipes') for (reci in 1:length(all_recipes)){ if (isTRUE(mm) && all_recipes[[reci]]$Analysis$Datasets$System$name != 'Multimodel'){ - all_recipes[[reci]]$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$readFrom]]$save <- 'all' + all_recipes[[reci]]$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save <- 'all' } } -- GitLab From 6577df7765417ad4bb46492117985358a0931961 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 11:33:23 +0100 Subject: [PATCH 30/91] setting the calendar of the first model for the multimodel outputs --- modules/Saving/R/save_corr.R | 6 +++++- modules/Saving/R/save_forecast.R | 6 +++++- modules/Saving/R/save_metrics.R | 6 +++++- modules/Saving/R/save_percentiles.R | 7 ++++++- modules/Saving/R/save_probabilities.R | 6 +++++- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 050fe68d..1eb58b93 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -25,7 +25,11 @@ save_corr <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 00a22850..4451ce54 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -16,7 +16,11 @@ save_forecast <- function(recipe, global_attributes <- .get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index cd4252ab..e4f6096c 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -24,7 +24,11 @@ save_metrics <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 862ed5ff..976045bf 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -23,7 +23,12 @@ save_percentiles <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } + # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index b7da0449..850a6d06 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -29,7 +29,11 @@ save_probabilities <- function(recipe, } fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar + if (global_attributes$system == 'Multimodel'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } else { + calendar <- archive$System[[global_attributes$system]]$calendar + } # Generate vector containing leadtimes ## TODO: Move to a separate function? -- GitLab From f569dfd9703d3f9fa1c2804130025c1c109e05ea Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 11:35:03 +0100 Subject: [PATCH 31/91] seasonal multimodel developed. pooled works for both deterministic and probabilistic forecasts. for mean and median approaches, deterministic multimodel works and probabilistic multimodel is not correct (need to adjust the probabilities computation) --- modules/Multimodel/Multimodel.R | 88 ++++++ .../Multimodel/R/load_multimodel_seasonal.R | 274 ++++++++++++++++++ 2 files changed, 362 insertions(+) create mode 100644 modules/Multimodel/Multimodel.R create mode 100644 modules/Multimodel/R/load_multimodel_seasonal.R diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R new file mode 100644 index 00000000..6d01ebac --- /dev/null +++ b/modules/Multimodel/Multimodel.R @@ -0,0 +1,88 @@ +# This module load the outputs saved for each individual forecast system +# and creates the multimodel ensemble + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Multimodel/R/load_multimodel_seasonal.R') + +Multimodel <- function(recipe) { + + # recipe: auto-s2s recipe as provided by read_yaml + + # Loading data saved in the jobs for individual models + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + data <- load_multimodel_seasonal(recipe = recipe) + } else if (tolower(recipe$Analysis$Horizon) == 'decadal'){ + stop('Decadal multimodel not implemented yet') + } else {stop(paste0('Multimodel not implemented for ',recipe$Analysis$Horizon))} + + # Creating the multi-model + if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { + + # Deterministic hindcast + data$hcst$data <- CSTools::MergeDims(data = data$hcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- CSTools::MergeDims(data = data$fcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$fcst$dims <- dim(data$fcst$data) + } + + # Probabilistic hindcast and forecast + prob <- Probabilities(recipe, data) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','median')) { + + # Probabilistic hindcast and forecast + warning('Probabilities for multi-model mean is still under development. + If Skill is used, the results are not correct for the probabilitic metrics. + Probabilities cannot be computed because data has an extra dimension (model). + Also, the function should return the observed probabilities. + Maybe it is better to use GetProbs.') + prob <- NULL #Probabilities(recipe, data) + + # Deterministic hindcast + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$dims <- s2dv::InsertDim(data = data$hcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$dims <- s2dv::InsertDim(data = data$fcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$fcst$dims <- dim(data$fcst$data) + } + + } else {stop('Incorrect multi-model approach')} + + # TODO: Cleaning the individual model outputs + # if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')){ + # system(paste0('rm -r ')) + # } + + return(list(data = data, prob = prob)) +} diff --git a/modules/Multimodel/R/load_multimodel_seasonal.R b/modules/Multimodel/R/load_multimodel_seasonal.R new file mode 100644 index 00000000..6b9aa788 --- /dev/null +++ b/modules/Multimodel/R/load_multimodel_seasonal.R @@ -0,0 +1,274 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") + +load_multimodel_seasonal <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$models + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), + "$var$_$file_date$.nc") + hcst.path <- gsub(variable[1], "$var$", hcst.path) + hcst.path <- gsub('Multimodel', "$model$", hcst.path) + hcst.path <- gsub('atomic_recipe_\\d{2}_\\d{14}', "$aux$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) + obs.path <- gsub('\\$aux\\$','*',obs.path) + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = 'all', + aux = 'all', + aux_depends = 'model', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + ## Removing "aux" dimension (needed to load the data) + hcst <- Subset(x = hcst, along = 'aux', indices = 1, drop = 'selected') + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + # Convert hcst to s2dv_cube object + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = 'all', + aux = 'all', + aux_depends = 'model', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + ## Removing "aux" dimension (needed to load the data) + fcst <- Subset(x = fcst, along = 'aux', indices = 1, drop = 'selected') + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} -- GitLab From 84662e3922e388b8c56e83b393e563e73500cd75 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 12:40:29 +0100 Subject: [PATCH 32/91] added checks for multimodel --- tools/check_recipe.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 92cc59c9..60658629 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -106,10 +106,17 @@ check_recipe <- function(recipe) { error_status <- T } if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% - c('mean','pooled')){ + c('mean','median','pooled')){ error(recipe$Run$logger, paste("The specified approach for the multimodel is not valid.", - "Please specify mean or pooled.")) + "Please specify mean, median or pooled.")) + error_status <- T + } + if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% + c('Calibration','Anomalies','Indicators')){ + error(recipe$Run$logger, + paste("The specified approach for the multimodel is not valid.", + "Please specify Calibration, Anomalies, Indicators.")) error_status <- T } } else { -- GitLab From 7085cbbd38caec99f7a4db8311363ec7d1265775 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 12:40:36 +0100 Subject: [PATCH 33/91] . --- recipes/recipe_multimodel_seasonal.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index 9cf1eb01..1f629af8 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -30,7 +30,7 @@ Analysis: - {name: "region1", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_system" + type: "to_reference" Workflow: Anomalies: compute: no -- GitLab From c233a64fec9bee7bd00c5a68f828261864f4f176 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 17:29:24 +0100 Subject: [PATCH 34/91] renamed --- .../{example_multimodel.R => example_multimodel_seasonal.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename example_scripts/{example_multimodel.R => example_multimodel_seasonal.R} (100%) diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel_seasonal.R similarity index 100% rename from example_scripts/example_multimodel.R rename to example_scripts/example_multimodel_seasonal.R -- GitLab From d07a3982338301507bc5d37a4f007871e4676152 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 18:38:25 +0100 Subject: [PATCH 35/91] corrected check --- tools/check_recipe.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 60658629..a5ab2894 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -113,9 +113,9 @@ check_recipe <- function(recipe) { error_status <- T } if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% - c('Calibration','Anomalies','Indicators')){ + c('calibration','anomalies','indicators')){ error(recipe$Run$logger, - paste("The specified approach for the multimodel is not valid.", + paste("The specified 'createFrom' for the multimodel is not valid.", "Please specify Calibration, Anomalies, Indicators.")) error_status <- T } -- GitLab From d7934199f37e3220e1e58f13e8d2930910afe134 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 18:38:35 +0100 Subject: [PATCH 36/91] . --- example_scripts/example_multimodel_seasonal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_scripts/example_multimodel_seasonal.R b/example_scripts/example_multimodel_seasonal.R index 6b84dee5..1ba9a64f 100644 --- a/example_scripts/example_multimodel_seasonal.R +++ b/example_scripts/example_multimodel_seasonal.R @@ -33,7 +33,7 @@ for (recipe_file in atomic_recipe_files){ # Load datasets and create multimodel data <- Multimodel(recipe) - probabilities <- data$probs + probabilities <- data$prob data <- data$data } else { -- GitLab From 67c691f0ad44bdb79a8fa21c4b4825d1c6b3a9d5 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 18:39:51 +0100 Subject: [PATCH 37/91] included the multimodel name and approach in the figures --- modules/Visualization/R/plot_ensemble_mean.R | 7 ++++++- modules/Visualization/R/plot_most_likely_terciles_map.R | 7 ++++++- modules/Visualization/R/plot_skill_metrics.R | 7 ++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 3d00742d..c72a4335 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -7,7 +7,12 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o latitude <- fcst$coords$lat longitude <- fcst$coords$lon archive <- get_archive(recipe) - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + system_name <- paste0('Multimodel-', + recipe$Analysis$Datasets$Multimodel$approach) + } else { + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + } start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) if (tolower(recipe$Analysis$Horizon) == "seasonal") { diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 5d60f8c1..65b587fc 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -27,7 +27,12 @@ plot_most_likely_terciles <- function(recipe, latitude <- fcst$coords$lat longitude <- fcst$coords$lon archive <- get_archive(recipe) - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + system_name <- paste0('Multimodel-', + recipe$Analysis$Datasets$Multimodel$approach) + } else { + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + } start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) if (tolower(recipe$Analysis$Horizon) == "seasonal") { diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index b4c2b273..98730e4b 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -23,7 +23,12 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, latitude <- data_cube$coords$lat longitude <- data_cube$coords$lon archive <- get_archive(recipe) - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + system_name <- paste0('Multimodel-', + recipe$Analysis$Datasets$Multimodel$approach) + } else { + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + } hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) if (tolower(recipe$Analysis$Horizon) == "seasonal") { -- GitLab From 7f3ad596861392df7ac465d3cd1eb77e62548058 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 30 Nov 2023 15:19:29 +0100 Subject: [PATCH 38/91] filename of atomic recipes include the splitted dimensions --- tools/divide_recipe.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 728f8dc5..46cc14ab 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -160,11 +160,16 @@ divide_recipe <- function(recipe) { } # Rest of horizons # Save all recipes in separate YAML files for (reci in 1:length(all_recipes)) { - if (reci < 10) { - recipe_number <- paste0("0", reci) - } else { - recipe_number <- reci - } + # if (reci < 10) { + # recipe_number <- paste0("0", reci) + # } else { + # recipe_number <- reci + # } + recipe_number <- paste0('var-',all_recipes[[reci]]$Analysis$Variables$name, + '_sys-',all_recipes[[reci]]$Analysis$Datasets$System$name, + '_ref-',all_recipes[[reci]]$Analysis$Datasets$Reference$name, + '_reg-',all_recipes[[reci]]$Analysis$Region$name, + '_sdate-',all_recipes[[reci]]$Analysis$Time$sdate) write_yaml(all_recipes[[reci]], paste0(recipe$Run$output_dir, "/logs/recipes/atomic_recipe_", recipe_number, ".yml")) -- GitLab From be5e141687a31ee24fe4f598c6c5dd8fedd587de Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 30 Nov 2023 16:59:54 +0100 Subject: [PATCH 39/91] removing dots in folders with model names --- tools/divide_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 46cc14ab..4b46d113 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -166,7 +166,7 @@ divide_recipe <- function(recipe) { # recipe_number <- reci # } recipe_number <- paste0('var-',all_recipes[[reci]]$Analysis$Variables$name, - '_sys-',all_recipes[[reci]]$Analysis$Datasets$System$name, + '_sys-',gsub('\\.','',all_recipes[[reci]]$Analysis$Datasets$System$name), '_ref-',all_recipes[[reci]]$Analysis$Datasets$Reference$name, '_reg-',all_recipes[[reci]]$Analysis$Region$name, '_sdate-',all_recipes[[reci]]$Analysis$Time$sdate) -- GitLab From 139d41dba500c3a5a16f85aecf08ddf30594995a Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 30 Nov 2023 17:00:23 +0100 Subject: [PATCH 40/91] fixed the path for loading individual model outputs --- modules/Multimodel/R/load_multimodel_seasonal.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Multimodel/R/load_multimodel_seasonal.R b/modules/Multimodel/R/load_multimodel_seasonal.R index 6b9aa788..9dad9871 100644 --- a/modules/Multimodel/R/load_multimodel_seasonal.R +++ b/modules/Multimodel/R/load_multimodel_seasonal.R @@ -10,7 +10,7 @@ load_multimodel_seasonal <- function(recipe) { exp.name <- recipe$Analysis$Datasets$System$models store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] - exp_descrip <- archive$System[[exp.name]] + exp_descrip <- archive$System[[exp.name[1]]] reference_descrip <- archive$Reference[[ref.name]] sdates <- dates2load(recipe, recipe$Run$logger) @@ -33,7 +33,7 @@ load_multimodel_seasonal <- function(recipe) { "$var$_$file_date$.nc") hcst.path <- gsub(variable[1], "$var$", hcst.path) hcst.path <- gsub('Multimodel', "$model$", hcst.path) - hcst.path <- gsub('atomic_recipe_\\d{2}_\\d{14}', "$aux$", hcst.path) + hcst.path <- gsub('_\\d{14}/outputs', "_$aux$/outputs", hcst.path) fcst.path <- obs.path <- hcst.path obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) -- GitLab From bf234f52c7d7fca230f2fd512c8b5cad90d5e27a Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 30 Nov 2023 17:37:41 +0100 Subject: [PATCH 41/91] improved script --- example_scripts/example_multimodel_seasonal.R | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/example_scripts/example_multimodel_seasonal.R b/example_scripts/example_multimodel_seasonal.R index 1ba9a64f..68b16cbf 100644 --- a/example_scripts/example_multimodel_seasonal.R +++ b/example_scripts/example_multimodel_seasonal.R @@ -13,18 +13,32 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("modules/Multimodel/Multimodel.R") -## Complete recipe to be splitted +## Cleaning output directory and splitting recipe sunset_outputs_folder <- '/esarchive/scratch/cdelgado/sunset_outputs/' recipe_file <- "recipes/recipe_multimodel_seasonal.yml" -system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_seasonal_*')) +# system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_seasonal_*')) system(paste0('Rscript split.R ',recipe_file)) +## Finding atomic recipes atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder),'/logs/recipes/') +atomic_recipe_folder <- atomic_recipe_folder[length(atomic_recipe_folder)] atomic_recipe_files <- list.files(atomic_recipe_folder) atomic_recipe_files <- atomic_recipe_files[-length(atomic_recipe_files)] atomic_recipe_files <- paste0(atomic_recipe_folder,atomic_recipe_files) -for (recipe_file in atomic_recipe_files){ +## Splitting recipes to run multimodel after individual models +atomic_recipes_models <- c() +atomic_recipes_multimodel <- c() +for (r in atomic_recipe_files){ + if (grepl('sys-Multimodel',r)){ + atomic_recipes_multimodel <- c(atomic_recipes_multimodel,r) + } else { + atomic_recipes_models <- c(atomic_recipes_models,r) + } +} + +## Running atomic recipes +for (recipe_file in c(atomic_recipes_models,atomic_recipes_multimodel)){ # Read recipe recipe <- prepare_outputs(recipe_file) @@ -32,9 +46,9 @@ for (recipe_file in atomic_recipe_files){ if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ # Load datasets and create multimodel - data <- Multimodel(recipe) - probabilities <- data$prob - data <- data$data + mm <- Multimodel(recipe) + data <- mm$data + probabilities <- mm$prob } else { @@ -42,8 +56,6 @@ for (recipe_file in atomic_recipe_files){ data <- Loading(recipe) # Change units data <- Units(recipe, data) - # Calibrate datasets - data <- Calibration(recipe, data) # Compute anomalies data <- Anomalies(recipe, data) # Compute percentiles and probability bins -- GitLab From 16a885c5a80084f8b1f9c59dd4eadbe5b44d11eb Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 30 Nov 2023 17:38:12 +0100 Subject: [PATCH 42/91] added more variables, models, etc to be splitted --- recipes/recipe_multimodel_seasonal.yml | 34 ++++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index 1f629af8..f8b59a6a 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -6,34 +6,52 @@ Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - {name: tas, freq: monthly_mean, units: C} - # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + - {name: prlr, freq: monthly_mean, units: mm, flux: no} + - {name: psl, freq: monthly_mean, units: hPa} + - {name: sfcWind, freq: monthly_mean, units: ms-1} Datasets: System: - {name: ECMWF-SEAS5.1} + - {name: CMCC-SPS3.5} - {name: DWD-GCFS2.1} + - {name: Meteo-France-System8} + - {name: UK-MetOffice-Glosea601} + - {name: ECCC-GEM5-NEMO} + - {name: NCEP-CFSv2} Multimodel: execute: both # Mandatory: Either both, yes/true or no/false approach: pooled #mean, median - createFrom: Calibration + createFrom: Anomalies Reference: - {name: ERA5} # Mandatory, str: Reference codename. See docu. Time: sdate: - - '0801' ## MMDD - # - '0901' ## MMDD + - '0101' ## MMDD + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' fcst_year: '2023' # Optional, int: Forecast year 'YYYY' hcst_start: '2007' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: - - {name: "region1", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} + - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} + - {name: "Germany", latmin: 45, latmax: 56, lonmin: 4, lonmax: 17} Regrid: - method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_reference" + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "r360x180" Workflow: Anomalies: - compute: no + compute: yes cross_validation: yes save: none Calibration: -- GitLab From c1ea81b38f503d9400dc5cbbb25c9926e011f58f Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 30 Nov 2023 17:57:56 +0100 Subject: [PATCH 43/91] fixed check for multimodel --- tools/check_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index a5ab2894..3ec33e5a 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -97,7 +97,7 @@ check_recipe <- function(recipe) { } # Check multimodel if (!is.null(recipe$Analysis$Datasets$Multimodel) && - !recipe$Analysis$Datasets$Multimodel %in% c('no','false')){ + !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false')){ if (!tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('yes','true','no','false','both')){ error(recipe$Run$logger, -- GitLab From 8b6d8da33fcc70071f1c4bda649bea1bec63fce7 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 11:40:35 +0100 Subject: [PATCH 44/91] error for multimodel mean and median (until Skill accepts probabilities) --- tools/check_recipe.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 3ec33e5a..79867086 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -106,10 +106,10 @@ check_recipe <- function(recipe) { error_status <- T } if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% - c('mean','median','pooled')){ + c('pooled')){ #,'mean','median')){ error(recipe$Run$logger, paste("The specified approach for the multimodel is not valid.", - "Please specify mean, median or pooled.")) + "Please specify pooled.")) #, mean or median.")) error_status <- T } if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% -- GitLab From ba8bce3a0e1fffef70e7f6bb797e3bdfd5e99c3b Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 11:41:29 +0100 Subject: [PATCH 45/91] using read_atomic_recipe instead of prepare_outputs --- example_scripts/example_multimodel_seasonal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_scripts/example_multimodel_seasonal.R b/example_scripts/example_multimodel_seasonal.R index 68b16cbf..61e5cd49 100644 --- a/example_scripts/example_multimodel_seasonal.R +++ b/example_scripts/example_multimodel_seasonal.R @@ -41,7 +41,7 @@ for (r in atomic_recipe_files){ for (recipe_file in c(atomic_recipes_models,atomic_recipes_multimodel)){ # Read recipe - recipe <- prepare_outputs(recipe_file) + recipe <- read_atomic_recipe(recipe_file) if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ -- GitLab From aa1f125c622185e7734523afeafc7839db9ed06c Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 12:05:38 +0100 Subject: [PATCH 46/91] fixed checks for multimodel grid --- tools/check_recipe.R | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 79867086..5061f146 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -203,13 +203,16 @@ check_recipe <- function(recipe) { "The 'Regrid' element must specify the 'method' and 'type'.") error_status <- T } - if (recipe$Analysis$Regrid$type == 'to_system' && - tolower(recipe$Analysis$Datasets$Multimodel$execute) - %in% c('both','yes','true')) { - error(recipe$Run$logger, - paste0("The 'Regrid$type' cannot be 'to_system' if ", - "'Multimodel$execute' is yes/true or both.")) - error_status <- T + if (!is.null(recipe$Analysis$Datasets$Multimodel) && + !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false')){ + if (recipe$Analysis$Regrid$type == 'to_system' && + tolower(recipe$Analysis$Datasets$Multimodel$execute) + %in% c('both','yes','true')) { + error(recipe$Run$logger, + paste0("The 'Regrid$type' cannot be 'to_system' if ", + "'Multimodel$execute' is yes/true or both.")) + error_status <- T + } } # TODO: Add Workflow checks? # ... -- GitLab From d48720b38ce6e365bbfc933620d508a58db4a39f Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 12:43:30 +0100 Subject: [PATCH 47/91] included institutions --- conf/archive_decadal.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 9a0453ea..cf871842 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -84,7 +84,7 @@ esarchive: # ---- HadGEM3-GC31-MM: name: "HadGEM3-GC31-MM" - institution: + institution: "Met Office Hadley Centre" src: hcst: "exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppB-forecast/" @@ -148,7 +148,7 @@ esarchive: #NOTE: no data there CESM1-1-CAM5-CMIP5: name: "CESM1-1-CAM5-CMIP5" - institution: + institution: "National Center for Atmospheric Research" src: hcst: "exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast" fcst: @@ -169,7 +169,7 @@ esarchive: #NOTE: in tapes CMCC-CM2-SR5: name: "CMCC-CM2-SR5" - institution: + institution: "Euro-Mediterranean Center on Climate Change" src: hcst: "exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/" fcst: "exp/CMIP6/dcppB-forecast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppB-forecast/" @@ -210,7 +210,7 @@ esarchive: # ---- IPSL-CM6A-LR: name: "IPSL-CM6A-LR" - institution: "IPSL" + institution: "Institut Pierre-Simon Laplace" src: hcst: "exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/" fcst: @@ -229,7 +229,7 @@ esarchive: # ---- MIROC6: name: "MIROC6" - institution: "MIROC" + institution: "Model for Interdisciplinary Research on Climate" src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" fcst: @@ -248,7 +248,7 @@ esarchive: # ---- MPI-ESM1.2-HR: name: "MPI-ESM1.2-HR" - institution: "MIROC" + institution: "Max-Planck-Institute for Meteorology" src: hcst: "exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/" fcst: @@ -306,7 +306,7 @@ esarchive: #NOTE: NorCPM1-i1 and i2 are under the same directory NorCPM1-i1: name: "NorCPM1-i1" - institution: "NCC" + institution: "NorESM Climate modeling Consortium" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: @@ -325,7 +325,7 @@ esarchive: # ---- NorCPM1-i2: name: "NorCPM1-i2" - institution: "NCC" + institution: "NorESM Climate modeling Consortium" src: hcst: "exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/" fcst: -- GitLab From 7e3299b5fc668d13fa5c0d427b5b61d333165a08 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 12:45:58 +0100 Subject: [PATCH 48/91] same script for seasonal and decadal multimodel --- .../{example_multimodel_seasonal.R => example_multimodel.R} | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) rename example_scripts/{example_multimodel_seasonal.R => example_multimodel.R} (95%) diff --git a/example_scripts/example_multimodel_seasonal.R b/example_scripts/example_multimodel.R similarity index 95% rename from example_scripts/example_multimodel_seasonal.R rename to example_scripts/example_multimodel.R index 61e5cd49..3ea86c31 100644 --- a/example_scripts/example_multimodel_seasonal.R +++ b/example_scripts/example_multimodel.R @@ -13,9 +13,11 @@ source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") source("modules/Multimodel/Multimodel.R") +horizon <- 'decadal' # seasonal + ## Cleaning output directory and splitting recipe sunset_outputs_folder <- '/esarchive/scratch/cdelgado/sunset_outputs/' -recipe_file <- "recipes/recipe_multimodel_seasonal.yml" +recipe_file <- paste0("recipes/recipe_multimodel_",horizon,".yml") # system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_seasonal_*')) system(paste0('Rscript split.R ',recipe_file)) -- GitLab From 405b5ae2bac616ea132130cd8213f7a11cf2ef75 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 13:28:22 +0100 Subject: [PATCH 49/91] adapted for saving the members for each model for decadal --- tools/divide_recipe.R | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 4b46d113..ba987f03 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -49,9 +49,9 @@ divide_recipe <- function(recipe) { recipe$Analysis$Datasets$System[[1]] <- system } # Modify the saving of the individual models in case multimodel is yes or both - if (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both')){ + if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes','both')){ n_models <- length(recipe$Analysis$Datasets$System)+1 - mm <- TRUE + mm <- tolower(recipe$Analysis$Datasets$Multimodel$approach) } else { n_models <- length(recipe$Analysis$Datasets$System) mm <- FALSE @@ -59,9 +59,15 @@ divide_recipe <- function(recipe) { for (sys in 1:n_models) { for (reci in 1:length(all_recipes)) { if (sys == length(recipe$Analysis$Datasets$System)+1){ + # seasonal only needs the model name; decadal also needs the members + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + m <- unlist(recipe$Analysis$Datasets$System) + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + m <- recipe$Analysis$Datasets$System + } all_recipes[[reci]]$Analysis$Datasets <- list(System = list(name = 'Multimodel', - models = unlist(recipe$Analysis$Datasets$System)), + models = m), Multimodel = recipe$Analysis$Datasets$Multimodel, Reference = NULL) } else { @@ -79,9 +85,12 @@ divide_recipe <- function(recipe) { } all_recipes <- recipes rm(list = 'recipes') - for (reci in 1:length(all_recipes)){ - if (isTRUE(mm) && all_recipes[[reci]]$Analysis$Datasets$System$name != 'Multimodel'){ + for (reci in 1:length(all_recipes)) { + if (!isFALSE(mm) && tolower(all_recipes[[reci]]$Analysis$Datasets$System$name) != 'multimodel') { all_recipes[[reci]]$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save <- 'all' + if (mm %in% c('mean','median')) { + all_recipes[[reci]]$Analysis$Workflow$Probabilities$save <- 'all' + } } } -- GitLab From 66c6f2131cdff9b697765056da3d6d608a30db35 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 13:28:41 +0100 Subject: [PATCH 50/91] decadal multimodel recipe --- recipes/recipe_multimodel_decadal.yml | 76 +++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 recipes/recipe_multimodel_decadal.yml diff --git a/recipes/recipe_multimodel_decadal.yml b/recipes/recipe_multimodel_decadal.yml new file mode 100644 index 00000000..1901ce94 --- /dev/null +++ b/recipes/recipe_multimodel_decadal.yml @@ -0,0 +1,76 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for decadal multi-model + +Analysis: + Horizon: decadal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + - {name: pr, freq: monthly_mean, units: mm, flux: no} + Datasets: + System: + - {name: EC-Earth3-i4, member: r1i4p1f1 r2i4p1f1} + - {name: HadGEM3-GC31-MM, member: r1i1p1f2 r2i1p1f2 r3i1p1f2} + Multimodel: + execute: both # Mandatory: Either both, yes/true or no/false + approach: pooled #mean, median + createFrom: Anomalies + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + fcst_year: '2021' # Optional, int: Forecast year 'YYYY' + hcst_start: '2007' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} + - {name: "Germany", latmin: 45, latmax: 56, lonmin: 4, lonmax: 17} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "r360x180" + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rpss + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: s2s4e # scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/cdelgado/sunset_outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/cdelgado/gitlat/SUNSET/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/cdelgado/gitlat/SUNSET/main_multimodel_seasonal.R # replace with the path to your script + expid: XXXX # replace with your EXPID + hpc_user: bsc32924 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: carlos.delgado@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails -- GitLab From 3d4adbf6b73521970d8e8f6b1e30cfd1368b6591 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 13:29:08 +0100 Subject: [PATCH 51/91] . --- example_scripts/example_multimodel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel.R index 3ea86c31..42e9eddc 100644 --- a/example_scripts/example_multimodel.R +++ b/example_scripts/example_multimodel.R @@ -18,7 +18,7 @@ horizon <- 'decadal' # seasonal ## Cleaning output directory and splitting recipe sunset_outputs_folder <- '/esarchive/scratch/cdelgado/sunset_outputs/' recipe_file <- paste0("recipes/recipe_multimodel_",horizon,".yml") -# system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_seasonal_*')) +# system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_',horizon,'_*')) system(paste0('Rscript split.R ',recipe_file)) ## Finding atomic recipes -- GitLab From 44d44bb0b7f662e79a120f8f2ce1a6cb10714c3a Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 14:02:28 +0100 Subject: [PATCH 52/91] . --- example_scripts/example_multimodel.R | 1 + 1 file changed, 1 insertion(+) diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel.R index 42e9eddc..e81c0b56 100644 --- a/example_scripts/example_multimodel.R +++ b/example_scripts/example_multimodel.R @@ -23,6 +23,7 @@ system(paste0('Rscript split.R ',recipe_file)) ## Finding atomic recipes atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder),'/logs/recipes/') +atomic_recipe_folder <- atomic_recipe_folder[grepl(horizon,atomic_recipe_folder)] atomic_recipe_folder <- atomic_recipe_folder[length(atomic_recipe_folder)] atomic_recipe_files <- list.files(atomic_recipe_folder) atomic_recipe_files <- atomic_recipe_files[-length(atomic_recipe_files)] -- GitLab From ae3c185d2c01b0cf543ac8ed788677765c6a0248 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 14:03:48 +0100 Subject: [PATCH 53/91] prlr instead of pr for observations (in case prlr folder does not exist) --- modules/Loading/R/load_decadal.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 48ffe8c5..83822a44 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -99,14 +99,14 @@ load_decadal <- function(recipe) { if (multi_path & length(variable) > 1) { stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") } - + Start_default_arg_list <- list( dat = path_list, var = variable, syear = paste0(sdates_hcst), chunk = 'all', chunk_depends = 'syear', - time = indices(time_ind), + time = startR::indices(time_ind), time_across = 'chunk', merge_across_dims = TRUE, largest_dims_length = need_largest_dims_length, @@ -279,6 +279,10 @@ load_decadal <- function(recipe) { obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, "$var$$var_dir$", "$var$_$file_date$.nc") var_dir_obs <- archive$Reference[[ref.name]][[store.freq]][variable] # list(tas = "_f1h-r1440x721cds", tos = "_f1h-r1440x721cds") + if (is.null(unlist(var_dir_obs)) && identical(variable,'pr')){ + var_dir_obs <- archive$Reference[[ref.name]][[store.freq]]['prlr'] + names(var_dir_obs) <- 'pr' + } # obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq, # paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]])) -- GitLab From 9a802d7a10e46fcc58529a315040d6c49bccf043 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 16:36:54 +0100 Subject: [PATCH 54/91] . --- recipes/recipe_multimodel_decadal.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/recipe_multimodel_decadal.yml b/recipes/recipe_multimodel_decadal.yml index 1901ce94..558b8104 100644 --- a/recipes/recipe_multimodel_decadal.yml +++ b/recipes/recipe_multimodel_decadal.yml @@ -9,8 +9,8 @@ Analysis: - {name: pr, freq: monthly_mean, units: mm, flux: no} Datasets: System: - - {name: EC-Earth3-i4, member: r1i4p1f1 r2i4p1f1} - - {name: HadGEM3-GC31-MM, member: r1i1p1f2 r2i1p1f2 r3i1p1f2} + - {name: CanESM5, member: r1i1p2f1 r2i1p2f1} + - {name: EC-Earth3-i4, member: r1i4p1f1 r2i4p1f1 r3i4p1f1} Multimodel: execute: both # Mandatory: Either both, yes/true or no/false approach: pooled #mean, median -- GitLab From 44e900bef446cb1fde86a2b54094b0ada8e5a5ed Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 16:37:16 +0100 Subject: [PATCH 55/91] fixed version for pr HadGEM3 --- conf/archive_decadal.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index cf871842..fd18be99 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -91,9 +91,9 @@ esarchive: first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} - grid: {"tas":"gn", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} + grid: {"tas":"gn", "psl":"gr", "pr":"gn", "ts":"gr", "tos":"gr"} #version depends on member and variable - version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} + version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200417", "ts":"v20200316", "tos":"v20200417"} daily_mean: grid: {"tasmin":"gn", "tasmax":"gn", "pr":"gn"} version: {"tasmin":"v20200417", "tasmax":"v20200417", "pr":"v20200417"} -- GitLab From 17c8f8262a339a37810660c6c55092aede2e4049 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 1 Dec 2023 16:50:20 +0100 Subject: [PATCH 56/91] Add Multimodel to launch_SUNSET.sh (no autosubmit) --- conf/slurm_templates/run_parallel_workflow.sh | 10 ++--- example_scripts/multimodel_seasonal.R | 39 +++++++++++++++++++ launch_SUNSET.sh | 35 +++++++++++++---- modules/Loading/R/get_regrid_params.R | 26 ++++++------- split.R | 7 ++++ tools/divide_recipe.R | 20 +++++++--- 6 files changed, 104 insertions(+), 33 deletions(-) create mode 100644 example_scripts/multimodel_seasonal.R diff --git a/conf/slurm_templates/run_parallel_workflow.sh b/conf/slurm_templates/run_parallel_workflow.sh index 789fe362..e9ef6964 100644 --- a/conf/slurm_templates/run_parallel_workflow.sh +++ b/conf/slurm_templates/run_parallel_workflow.sh @@ -1,7 +1,5 @@ #!/bin/bash -#SBATCH -J SUNSET_verification - # Slurm directive description: # -J: job name @@ -10,9 +8,9 @@ set -vx script=$1 atomic_recipe=$2 -#source MODULES -module load conda/22.11.1-2 -conda activate condaCerise -export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib +source MODULES +# module load conda/22.11.1-2 +# conda activate condaCerise +# export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib Rscript ${script} ${atomic_recipe} diff --git a/example_scripts/multimodel_seasonal.R b/example_scripts/multimodel_seasonal.R new file mode 100644 index 00000000..d4e20e44 --- /dev/null +++ b/example_scripts/multimodel_seasonal.R @@ -0,0 +1,39 @@ + +########################################### +### SEASONAL MULTIMODEL TEST - LAUNCHER ### +########################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Multimodel/Multimodel.R") + +# Read recipe +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +if (recipe$Analysis$Datasets$System$name == 'Multimodel') { + # Load datasets and create multimodel + mm <- Multimodel(recipe) + data <- mm$data + probabilities <- mm$prob +} else { + # Load datasets + data <- Loading(recipe) + # Change units + data <- Units(recipe, data) + # Compute anomalies + data <- Anomalies(recipe, data) + # Compute percentiles and probability bins + probabilities <- Probabilities(recipe, data) +} +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 64374e8d..6ba4fe97 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -103,10 +103,10 @@ fi tmpfile=$(mktemp ${TMPDIR-/tmp}/SUNSET.XXXXXX) # Create outdir and split recipes -#source MODULES -module load conda/22.11.1-2 -conda activate condaCerise -export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib +source MODULES +# module load conda/22.11.1-2 +# conda activate condaCerise +# export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile @@ -118,8 +118,10 @@ if [[ $run_method == "sbatch" ]]; then codedir=$( head -2 $tmpfile | tail -1 ) # Retrieve output directory outdir=$( head -3 $tmpfile | tail -1 ) + # Multimodel TRUE/FALSE + multimodel=$( head -4 $tmpfile | tail -1) # Scorecards TRUE/FALSE - scorecards=$( head -4 $tmpfile | tail -1) + scorecards=$( head -5 $tmpfile | tail -1) # Create directory for slurm output logdir=${codedir}/out-logs/slurm_logs/ @@ -133,17 +135,34 @@ if [[ $run_method == "sbatch" ]]; then verification_job_list=() echo "Submitting verification jobs..." # Loop over atomic recipes - for atomic_recipe in ${outdir}/logs/recipes/atomic_recipe_??.yml; do + for atomic_recipe in ${outdir}/logs/recipes/atomic_recipe_*.yml; do job_number=$(($job_number + 1)) job_name=$(basename $outdir)_$(printf %02d $job_number) outfile=${logdir}/run-${job_name}.out errfile=${logdir}/run-${job_name}.err # Send batch job and capture job ID - job_ID=$(sbatch --parsable --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + job_ID=$(sbatch --parsable --job-name="SUNSET_verification" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) # Add job ID to array verification_job_list+=($job_ID) echo "Submitted batch job $job_ID" done + + multimodel_job_list=() + job_number=0 + echo $multimodel + if [[ $multimodel == "TRUE" ]]; then + for atomic_recipe in ${outdir}/logs/recipes/multimodel/atomic_recipe_*.yml; do + job_number=$(($job_number + 1)) + job_name=$(basename $outdir)_$(printf %02d $job_number) + outfile=${logdir}/run-${job_name}.out + errfile=${logdir}/run-${job_name}.err + # Send batch job and capture job ID + job_ID=$(sbatch --parsable --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --job-name="SUNSET_multimodel" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + # Add job ID to array + multimodel_job_list+=($job_ID) + echo "Submitted batch job $job_ID" + done + fi # Submit scorecards job with dependency on verification jobs, passed as a # comma-separated string. The scorecards job will not run until all the @@ -153,7 +172,7 @@ if [[ $run_method == "sbatch" ]]; then echo "Submitting scorecards jobs..." outfile=${logdir}/run-scorecards.out errfile=${logdir}/run-scorecards.err - sbatch --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --output=$outfile --error=$errfile --time=01:00:00 conf/slurm_templates/run_scorecards.sh ${recipe} ${outdir} + sbatch --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]} ${multimodel_job_list[*]}") --output=$outfile --error=$errfile --time=01:00:00 conf/slurm_templates/run_scorecards.sh ${recipe} ${outdir} fi fi diff --git a/modules/Loading/R/get_regrid_params.R b/modules/Loading/R/get_regrid_params.R index c95372cd..ef08adcd 100644 --- a/modules/Loading/R/get_regrid_params.R +++ b/modules/Loading/R/get_regrid_params.R @@ -24,6 +24,7 @@ get_regrid_params <- function(recipe, archive) { ## TODO: Multi-model case + ## If multi-model, use the first system grid? ## TODO: 'NULL' entries had to be removed due to bug in Start(). Rewrite when ## the bug is fixed. exp.name <- recipe$Analysis$Datasets$System$name @@ -36,27 +37,27 @@ get_regrid_params <- function(recipe, archive) { regrid_params <- list(fcst.gridtype = reference_descrip$reference_grid, fcst.gridmethod = recipe$Analysis$Regrid$method, fcst.transform = CDORemapper, - obs.gridtype = reference_descrip$reference_grid, - obs.gridmethod = recipe$Analysis$Regrid$method, - obs.transform = CDORemapper) + obs.gridtype = NULL, + obs.gridmethod = NULL, + obs.transform = NULL) } else if (tolower(recipe$Analysis$Regrid$type) == 'to_system') { - regrid_params <- list(fcst.gridtype = exp_descrip$reference_grid, - fcst.gridmethod = recipe$Analysis$Regrid$method, - fcst.transform = CDORemapper, + regrid_params <- list(fcst.gridtype = NULL, + fcst.gridmethod = NULL, + fcst.transform = NULL, obs.gridtype = exp_descrip$reference_grid, obs.gridmethod = recipe$Analysis$Regrid$method, obs.transform = CDORemapper) } else if (tolower(recipe$Analysis$Regrid$type) == 'none') { - regrid_params <- list(fcst.gridtype = exp_descrip$reference_grid, - fcst.gridmethod = recipe$Analysis$Regrid$method, - fcst.transform = CDORemapper, - obs.gridtype = reference_descrip$reference_grid, - obs.gridmethod = recipe$Analysis$Regrid$method, - obs.transform = CDORemapper) + regrid_params <- list(fcst.gridtype = NULL, + fcst.gridmethod = NULL, + fcst.transform = NULL, + obs.gridtype = NULL, + obs.gridmethod = NULL, + obs.transform = NULL) } else { regrid_params <- list(fcst.gridtype = recipe$Analysis$Regrid$type, @@ -70,4 +71,3 @@ get_regrid_params <- function(recipe, archive) { return(regrid_params) } - diff --git a/split.R b/split.R index 9bd95e86..e3cbaf80 100755 --- a/split.R +++ b/split.R @@ -48,6 +48,13 @@ if (!is.null(recipe$Run$autosubmit) && (recipe$Run$autosubmit)) { cat(paste0(recipe$Run$code_dir, "\n")) # Output directory cat(paste0(run_parameters$outdir, "\n")) + # Multimodel + if (!is.null(recipe$Analysis$Datasets$Multimodel$execute) && + (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both'))) { + cat("TRUE\n") + } else { + cat("FALSE\n") + } # Scorecards if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || (!recipe$Analysis$Workflow$Scorecards$execute)) { diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 4b46d113..4110b0c6 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -49,8 +49,11 @@ divide_recipe <- function(recipe) { recipe$Analysis$Datasets$System[[1]] <- system } # Modify the saving of the individual models in case multimodel is yes or both - if (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both')){ - n_models <- length(recipe$Analysis$Datasets$System)+1 + if (recipe$Analysis$Datasets$Multimodel$execute %in% c(TRUE, 'both')) { + # Create directory for multimodel recipes + dir.create(paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/"), + recursive = TRUE) + n_models <- length(recipe$Analysis$Datasets$System) + 1 mm <- TRUE } else { n_models <- length(recipe$Analysis$Datasets$System) @@ -58,7 +61,7 @@ divide_recipe <- function(recipe) { } for (sys in 1:n_models) { for (reci in 1:length(all_recipes)) { - if (sys == length(recipe$Analysis$Datasets$System)+1){ + if (sys == length(recipe$Analysis$Datasets$System) + 1) { all_recipes[[reci]]$Analysis$Datasets <- list(System = list(name = 'Multimodel', models = unlist(recipe$Analysis$Datasets$System)), @@ -160,19 +163,24 @@ divide_recipe <- function(recipe) { } # Rest of horizons # Save all recipes in separate YAML files for (reci in 1:length(all_recipes)) { + ## TODO: Sort dependencies # if (reci < 10) { # recipe_number <- paste0("0", reci) # } else { # recipe_number <- reci # } recipe_number <- paste0('var-',all_recipes[[reci]]$Analysis$Variables$name, - '_sys-',gsub('\\.','',all_recipes[[reci]]$Analysis$Datasets$System$name), + '_sys-',gsub('\\.', '', all_recipes[[reci]]$Analysis$Datasets$System$name), '_ref-',all_recipes[[reci]]$Analysis$Datasets$Reference$name, '_reg-',all_recipes[[reci]]$Analysis$Region$name, '_sdate-',all_recipes[[reci]]$Analysis$Time$sdate) + if (all_recipes[[reci]]$Analysis$Datasets$System$name == 'Multimodel') { + recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/") + } else { + recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/") + } write_yaml(all_recipes[[reci]], - paste0(recipe$Run$output_dir, "/logs/recipes/atomic_recipe_", - recipe_number, ".yml")) + paste0(recipe_dir, "atomic_recipe_", recipe_number, ".yml")) } info(recipe$Run$logger, paste("The main recipe has been divided into", length(all_recipes), -- GitLab From e5a48cca09a74c9657bde65efa0d97c41d84f97b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 1 Dec 2023 17:07:44 +0100 Subject: [PATCH 57/91] remove unneeded echo --- launch_SUNSET.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 6ba4fe97..12f9c26d 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -149,7 +149,6 @@ if [[ $run_method == "sbatch" ]]; then multimodel_job_list=() job_number=0 - echo $multimodel if [[ $multimodel == "TRUE" ]]; then for atomic_recipe in ${outdir}/logs/recipes/multimodel/atomic_recipe_*.yml; do job_number=$(($job_number + 1)) -- GitLab From f3cf15e854bd7a6b509c7a46e53aa199a9040552 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 17:23:59 +0100 Subject: [PATCH 58/91] same script for seasonal and decadal --- modules/Multimodel/Multimodel.R | 8 ++---- ...ultimodel_seasonal.R => load_multimodel.R} | 28 ++++++++++++------- 2 files changed, 20 insertions(+), 16 deletions(-) rename modules/Multimodel/{R/load_multimodel_seasonal.R => load_multimodel.R} (92%) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 6d01ebac..b333c526 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -4,18 +4,14 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") -source('modules/Multimodel/R/load_multimodel_seasonal.R') +source('modules/Multimodel/load_multimodel.R') Multimodel <- function(recipe) { # recipe: auto-s2s recipe as provided by read_yaml # Loading data saved in the jobs for individual models - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - data <- load_multimodel_seasonal(recipe = recipe) - } else if (tolower(recipe$Analysis$Horizon) == 'decadal'){ - stop('Decadal multimodel not implemented yet') - } else {stop(paste0('Multimodel not implemented for ',recipe$Analysis$Horizon))} + data <- load_multimodel(recipe) # Creating the multi-model if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { diff --git a/modules/Multimodel/R/load_multimodel_seasonal.R b/modules/Multimodel/load_multimodel.R similarity index 92% rename from modules/Multimodel/R/load_multimodel_seasonal.R rename to modules/Multimodel/load_multimodel.R index 9dad9871..a3f56997 100644 --- a/modules/Multimodel/R/load_multimodel_seasonal.R +++ b/modules/Multimodel/load_multimodel.R @@ -3,16 +3,30 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") -load_multimodel_seasonal <- function(recipe) { +load_multimodel <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name - exp.name <- recipe$Analysis$Datasets$System$models + if (tolower(recipe$Analysis$Horizon) == 'seasonal'){ + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} store.freq <- recipe$Analysis$Variables$freq variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] exp_descrip <- archive$System[[exp.name[1]]] reference_descrip <- archive$Reference[[ref.name]] sdates <- dates2load(recipe, recipe$Run$logger) + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + browser() + cat('LAS FECHAS EN LOS ARCHIVOS CAMBIAN DEPENDIENDO DEL MODELO. + SEGURAMENTE SEA POR EL MES DE INICIALIZACION. + TAMBIEN HAY QUE ARREGLAR EL NUMERO MAXIMO DE MIEMBROS.') + sdates$hcst <- paste0(as.numeric(sdates$hcst)+1,'0101') + if (!is.null(recipe$Analysis$Time$fcst_year)) { + sdates$fcst <- paste0(as.numeric(sdates$fcst)+1,'0101') + } + } lats.min <- recipe$Analysis$Region$latmin lats.max <- recipe$Analysis$Region$latmax @@ -33,20 +47,16 @@ load_multimodel_seasonal <- function(recipe) { "$var$_$file_date$.nc") hcst.path <- gsub(variable[1], "$var$", hcst.path) hcst.path <- gsub('Multimodel', "$model$", hcst.path) - hcst.path <- gsub('_\\d{14}/outputs', "_$aux$/outputs", hcst.path) fcst.path <- obs.path <- hcst.path obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) - obs.path <- gsub('\\$aux\\$','*',obs.path) # Load hindcast #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, - model = 'all', - aux = 'all', - aux_depends = 'model', + model = exp.name, time = 'all', latitude = 'all', latitude_reorder = Sort(), @@ -100,9 +110,7 @@ load_multimodel_seasonal <- function(recipe) { fcst <- Start(dat = fcst.path, var = variable, file_date = sdates$fcst, - model = 'all', - aux = 'all', - aux_depends = 'model', + model = exp.name, time = 'all', latitude = 'all', latitude_reorder = Sort(), -- GitLab From 741fb692298182c7ed9d22c8bfb20ecd48016d73 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 1 Dec 2023 17:27:35 +0100 Subject: [PATCH 59/91] . --- example_scripts/example_multimodel.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel.R index e81c0b56..0a9e626b 100644 --- a/example_scripts/example_multimodel.R +++ b/example_scripts/example_multimodel.R @@ -42,19 +42,14 @@ for (r in atomic_recipe_files){ ## Running atomic recipes for (recipe_file in c(atomic_recipes_models,atomic_recipes_multimodel)){ - # Read recipe recipe <- read_atomic_recipe(recipe_file) - if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ - # Load datasets and create multimodel mm <- Multimodel(recipe) data <- mm$data probabilities <- mm$prob - } else { - # Load datasets data <- Loading(recipe) # Change units @@ -63,12 +58,9 @@ for (recipe_file in c(atomic_recipes_models,atomic_recipes_multimodel)){ data <- Anomalies(recipe, data) # Compute percentiles and probability bins probabilities <- Probabilities(recipe, data) - } - # Compute skill metrics skill_metrics <- Skill(recipe, data) # Plot data Visualization(recipe, data, skill_metrics, probabilities, significance = T) - } -- GitLab From 3a7c40f84929a15e02bf137fae1319dc835b8649 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Mon, 4 Dec 2023 10:13:19 +0100 Subject: [PATCH 60/91] fixed archive for MIROC6 --- conf/archive_decadal.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index fd18be99..8cf38387 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -232,11 +232,11 @@ esarchive: institution: "Model for Interdisciplinary Research on Climate" src: hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" - fcst: + fcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/" monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} - version: {"tas":"v20200417", "pr":["v20200416","v20200504"], "psl":"v20200504", "tasmin":"v20200417", "tasmax":"v20200504"} + version: {"tas":"v20200417", "pr":"v20200504", "psl":"v20200504", "tasmin":"v20200417", "tasmax":"v20200504"} daily_mean: grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} version: {"pr":"v20191217", "tas":"v20200416", "tasmax":"v20200416", "tasmin":"v20200416"} -- GitLab From 9f4d1c61fea12d1b0030929a34269b233143fe5b Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Mon, 4 Dec 2023 10:14:07 +0100 Subject: [PATCH 61/91] . --- example_scripts/example_multimodel.R | 24 +++++++----------------- recipes/recipe_multimodel_decadal.yml | 2 +- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/example_scripts/example_multimodel.R b/example_scripts/example_multimodel.R index 0a9e626b..c07ee551 100644 --- a/example_scripts/example_multimodel.R +++ b/example_scripts/example_multimodel.R @@ -22,26 +22,16 @@ recipe_file <- paste0("recipes/recipe_multimodel_",horizon,".yml") system(paste0('Rscript split.R ',recipe_file)) ## Finding atomic recipes -atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder),'/logs/recipes/') -atomic_recipe_folder <- atomic_recipe_folder[grepl(horizon,atomic_recipe_folder)] +atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder, pattern = horizon),'/logs/recipes/') atomic_recipe_folder <- atomic_recipe_folder[length(atomic_recipe_folder)] -atomic_recipe_files <- list.files(atomic_recipe_folder) -atomic_recipe_files <- atomic_recipe_files[-length(atomic_recipe_files)] -atomic_recipe_files <- paste0(atomic_recipe_folder,atomic_recipe_files) - -## Splitting recipes to run multimodel after individual models -atomic_recipes_models <- c() -atomic_recipes_multimodel <- c() -for (r in atomic_recipe_files){ - if (grepl('sys-Multimodel',r)){ - atomic_recipes_multimodel <- c(atomic_recipes_multimodel,r) - } else { - atomic_recipes_models <- c(atomic_recipes_models,r) - } -} +atomic_recipes <- list.files(paste0(atomic_recipe_folder), pattern = '.yml') +atomic_recipes <- atomic_recipes[-length(atomic_recipes)] +atomic_recipes_multimodel <- list.files(paste0(atomic_recipe_folder,'multimodel')) +atomic_recipes <- paste0(atomic_recipe_folder,atomic_recipes) +atomic_recipes_multimodel <- paste0(atomic_recipe_folder,'multimodel/',atomic_recipes_multimodel) ## Running atomic recipes -for (recipe_file in c(atomic_recipes_models,atomic_recipes_multimodel)){ +for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ # Read recipe recipe <- read_atomic_recipe(recipe_file) if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ diff --git a/recipes/recipe_multimodel_decadal.yml b/recipes/recipe_multimodel_decadal.yml index 558b8104..87d4d93f 100644 --- a/recipes/recipe_multimodel_decadal.yml +++ b/recipes/recipe_multimodel_decadal.yml @@ -9,7 +9,7 @@ Analysis: - {name: pr, freq: monthly_mean, units: mm, flux: no} Datasets: System: - - {name: CanESM5, member: r1i1p2f1 r2i1p2f1} + - {name: MIROC6, member: r1i1p1f1 r2i1p1f1} - {name: EC-Earth3-i4, member: r1i4p1f1 r2i4p1f1 r3i4p1f1} Multimodel: execute: both # Mandatory: Either both, yes/true or no/false -- GitLab From 01fe7c8bcbd0418fe4ebc1faa63f110b427fadda Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Mon, 4 Dec 2023 10:41:25 +0100 Subject: [PATCH 62/91] fixed problem with start month (1101) - this should be changed to account for the different initialisation month of the decadal models --- modules/Multimodel/load_multimodel.R | 17 +++-------------- modules/Saving/R/save_percentiles.R | 14 +++++++++++--- modules/Saving/R/save_probabilities.R | 12 ++++++++++-- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index a3f56997..791ab4fb 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -17,16 +17,6 @@ load_multimodel <- function(recipe) { exp_descrip <- archive$System[[exp.name[1]]] reference_descrip <- archive$Reference[[ref.name]] sdates <- dates2load(recipe, recipe$Run$logger) - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - browser() - cat('LAS FECHAS EN LOS ARCHIVOS CAMBIAN DEPENDIENDO DEL MODELO. - SEGURAMENTE SEA POR EL MES DE INICIALIZACION. - TAMBIEN HAY QUE ARREGLAR EL NUMERO MAXIMO DE MIEMBROS.') - sdates$hcst <- paste0(as.numeric(sdates$hcst)+1,'0101') - if (!is.null(recipe$Analysis$Time$fcst_year)) { - sdates$fcst <- paste0(as.numeric(sdates$fcst)+1,'0101') - } - } lats.min <- recipe$Analysis$Region$latmin lats.max <- recipe$Analysis$Region$latmax @@ -47,6 +37,9 @@ load_multimodel <- function(recipe) { "$var$_$file_date$.nc") hcst.path <- gsub(variable[1], "$var$", hcst.path) hcst.path <- gsub('Multimodel', "$model$", hcst.path) + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + hcst.path <- gsub('.nc', "1101.nc", hcst.path) + } fcst.path <- obs.path <- hcst.path obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) @@ -73,8 +66,6 @@ load_multimodel <- function(recipe) { time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - ## Removing "aux" dimension (needed to load the data) - hcst <- Subset(x = hcst, along = 'aux', indices = 1, drop = 'selected') ############################# #NOTE: NOT TESTED YET @@ -127,8 +118,6 @@ load_multimodel <- function(recipe) { time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - ## Removing "aux" dimension (needed to load the data) - fcst <- Subset(x = fcst, along = 'aux', indices = 1, drop = 'selected') ############################# #NOTE: NOT TESTED YET diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 976045bf..1abe84cc 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -23,8 +23,12 @@ save_percentiles <- function(recipe, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - if (global_attributes$system == 'Multimodel'){ - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + if (global_attributes$system == 'Multimodel') { + if (fcst.horizon == 'decadal') { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } } else { calendar <- archive$System[[global_attributes$system]]$calendar } @@ -33,7 +37,11 @@ save_percentiles <- function(recipe, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + if (global_attributes$system == 'Multimodel') { + init_month <- 11 #TODO: put as if init_month is January + } else { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + } init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), cal = calendar) diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index 850a6d06..5b46f236 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -30,7 +30,11 @@ save_probabilities <- function(recipe, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq if (global_attributes$system == 'Multimodel'){ - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + if (fcst.horizon == 'decadal') { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } } else { calendar <- archive$System[[global_attributes$system]]$calendar } @@ -40,7 +44,11 @@ save_probabilities <- function(recipe, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + if (global_attributes$system == 'Multimodel') { + init_month <- 11 #TODO: put as if init_month is January + } else { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + } init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), cal = calendar) -- GitLab From d0720babbd984efceb6cd85da2c61fffb578b1af Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 13 Dec 2023 09:44:00 +0100 Subject: [PATCH 63/91] Update README --- README.md | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 538fa029..5758fbf3 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,29 @@ -ESS Verification Suite +SUNSET: SUbseasoNal to decadal climate forecast post-processing and asSEmenT suite ====================== -This is the Git project for the ESS Verification Suite, which will serve as a tool for research projects and operational workflows involving subseasonal to seasonal to decadal forecast verification. +This is the Git project for the SUNSET, a collaborative in-house tool developed at BSC-ES for research projects and operational workflows involving subseasonal to seasonal to decadal forecast verification. -The main developers of the tool are Victòria Agudetse (@vagudets), An-Chi Ho (@aho), Lluís Palma (@lpalma) and Núria Pérez-Zanón (@nperez). +This is the Git project for SUNSET, an collaborative R-based tool developed in-house at BSC-ES that aims to provide climate services for sub-seasonal, seasonal and decadal climate forecast time scales. The tool post-processes climate forecast outputs by applying state-of-the-art methodologies to tailor climate products for each application and sector (e.g.: agriculture, energy, water management, or health). + +Its modular design allows flexibility in defining the required post-processing steps, as well as the products definition by deciding on the forecast system and reference datasets, variables, and forecast horizon among others. The tool also allows for the creation and visualization of climate forecast products, such as maps for the most likely terciles, and performs the verification of the products, which can be visualized on maps and scorecards. The inclusion of a launcher script provides a user-friendly way to parallelize the computation on HPC machines. + +Victòria Agudetse (@vagudets) is the maintainer of the SUNSET repository. Resources --------- -You can access the documentation of the Verification Suite through the wiki: -[Auto-s2s Wiki](https://earth.bsc.es/gitlab/es/auto-s2s/-/wikis/home?target=_blank) +You can access the documentation through the wiki: +[SUNSET Wiki](https://earth.bsc.es/gitlab/es/sunset/-/wikis/home?target=_blank) + +Use cases and hands-on tutorials are available in the repository for you to follow: + +[SUNSET Use Cases](https://earth.bsc.es/gitlab/es/sunset/-/tree/master/use_cases/) You may also find useful information in the slides from past user meetings: +[R-tools Climate Forecast Analysis Training session 2023](https://earth.bsc.es/wiki/lib/exe/fetch.php?media=tools:day2_04_handson_r_tools.pdf) + [User meeting March 2023](https://docs.google.com/presentation/d/18VoqgJCzcZTmqNyXL3op_KecsPxsWRkf/edit#slide=id.p1?target=_blank) [User meeting September 2022](https://docs.google.com/presentation/d/14-qq__fblMt7xvJDaqS5UqfQMXWCf3Ju/edit#slide=id.p1?target=_blank) @@ -25,9 +35,9 @@ Branching strategy Branches containing developments that are to be merged into the tool must contain "dev-" at the beginning of the name, followed by a short, meaningful description of the development in question. E.g. "dev-loading-subseasonal" for the branch containing developments related to the loading of subseasonal datasets. -Users that wish to incorporate their own developments into the core of the tool are encouraged to create a personal fork of the Auto-S2S repository to work on their projects. Please contact Victòria Agudetse at victoria.agudetse@bsc.es to discuss the first steps. +Users that wish to incorporate their own developments into the core of the tool are encouraged to create a local copy repository to work on their projects and push their changes to a new branch. Please contact Victòria Agudetse (@vagudets) or Núria Pérez-Zanón (@nperez) to discuss the first steps. Mailing list ------------ -User meetings, internal releases and news are announced through the mailing list. You can send an email to victoria.agudetse@bsc.es or an.ho@bsc.es to request subscription. +User meetings, internal releases and news are announced through the mailing list. You can send an email to victoria.agudetse@bsc.es to request subscription. -- GitLab From 356529d4a4ca17615fb18c3d6cc738f2dec244a2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 15 Dec 2023 14:55:45 +0100 Subject: [PATCH 64/91] Set default value for na.rm in Anomalies() --- modules/Anomalies/Anomalies.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index edb6a8be..2d54365a 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -20,6 +20,11 @@ Anomalies <- function(recipe, data) { cross <- FALSE cross_msg <- "without" } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs + } original_dims <- data$hcst$dim # Save full fields @@ -57,12 +62,12 @@ Anomalies <- function(recipe, data) { clim_hcst <- Apply(data$hcst$data, target_dims = c('syear', 'ensemble'), mean, - na.rm = recipe$Analysis$remove_NAs, + na.rm = na.rm, ncores = recipe$Analysis$ncores)$output1 clim_obs <- Apply(data$obs$data, target_dims = c('syear', 'ensemble'), mean, - na.rm = recipe$Analysis$remove_NAs, + na.rm = na.rm, ncores = recipe$Anaysis$ncores)$output1 data$hcst$data <- Ano(data = data$hcst$data, clim = clim_hcst) data$obs$data <- Ano(data = data$obs$data, clim = clim_obs) -- GitLab From 6b48dca91b46cfe4b4738bcc5705c650ceae7a2f Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Fri, 15 Dec 2023 17:07:01 +0100 Subject: [PATCH 65/91] removing the individual models output in case multimodel=true/yes --- modules/Multimodel/Multimodel.R | 16 ++++++++++++---- modules/Multimodel/load_multimodel.R | 6 +++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index b333c526..24bc4b51 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -75,10 +75,18 @@ Multimodel <- function(recipe) { } else {stop('Incorrect multi-model approach')} - # TODO: Cleaning the individual model outputs - # if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')){ - # system(paste0('rm -r ')) - # } + # Removing the temporary data of models in case user only requests multimodel + if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + unlink(file.path(recipe$Run$output_dir,"outputs/*/*",gsub('\\.','',exp.name)), + recursive = T) + unlink(file.path(recipe$Run$output_dir,"plots/*",gsub('\\.','',exp.name)), + recursive = T) + } return(list(data = data, prob = prob)) } diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index 791ab4fb..ab68b889 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -7,7 +7,7 @@ load_multimodel <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name - if (tolower(recipe$Analysis$Horizon) == 'seasonal'){ + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { exp.name <- recipe$Analysis$Datasets$System$models } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') @@ -49,7 +49,7 @@ load_multimodel <- function(recipe) { hcst <- Start(dat = hcst.path, var = variable, file_date = sdates$hcst, - model = exp.name, + model = gsub('\\.','',exp.name), time = 'all', latitude = 'all', latitude_reorder = Sort(), @@ -101,7 +101,7 @@ load_multimodel <- function(recipe) { fcst <- Start(dat = fcst.path, var = variable, file_date = sdates$fcst, - model = exp.name, + model = gsub('\\.','',exp.name), time = 'all', latitude = 'all', latitude_reorder = Sort(), -- GitLab From ea0aa345bfb4b309d8036456cd873a1a36364074 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 18 Dec 2023 10:45:51 +0100 Subject: [PATCH 66/91] Correct seasonal grid consistency check --- modules/Loading/R/load_seasonal.R | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 38d4c96f..8760e01e 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -313,35 +313,40 @@ load_seasonal <- function(recipe) { # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) - + # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + if (!isTRUE(all.equal(as.vector(hcst$coords$latitude), + as.vector(obs$coords$latitude)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) - hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$coords$latitude[1], + "; Last hcst lat: ", + hcst$coords$latitude[length(hcst$coords$latitude)]) info(recipe$Run$logger, hcst_lat_msg) - obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + obs_lat_msg <- paste0("First obs lat: ", obs$coords$latitude[1], + "; Last obs lat: ", + obs$coords$latitude[length(obs$coords$latitude)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + if (!isTRUE(all.equal(as.vector(hcst$coords$longitude), + as.vector(obs$coords$longitude)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) - hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$coords$longitude[1], + "; Last hcst lon: ", + hcst$coords$longitude[length(hcst$coords$longitude)]) info(recipe$Run$logger, hcst_lon_msg) - obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + obs_lon_msg <- paste0("First obs lon: ", obs$coords$longitude[1], + "; Last obs lon: ", + obs$coords$longitude[length(obs$coords$longitude)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") - } } -- GitLab From fbe871c90ae1239872fab1d358c7d737b3472f3f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 19 Dec 2023 14:36:30 +0100 Subject: [PATCH 67/91] add na.rm to max() in plot_ensemble_mean() --- modules/Visualization/R/plot_ensemble_mean.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index c72a4335..10561d10 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -56,7 +56,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o # Define brks, centered around zero in the case of anomalies if (grepl("anomaly", var_long_name)) { variable <- paste(variable, "anomaly") - max_value <- max(abs(var_ens_mean)) + max_value <- max(abs(var_ens_mean), na.rm = TRUE) ugly_intervals <- seq(-max_value, max_value, max_value/20) brks <- pretty(ugly_intervals, n = 12, min.n = 8) cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) -- GitLab From daeed9e4d326a71a684c0ae9e8f375eedcb03f71 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 19 Dec 2023 14:36:51 +0100 Subject: [PATCH 68/91] Make multimodel compatible with Scorecards output format (WIP) --- modules/Multimodel/load_multimodel.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index ab68b889..1afae8f5 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -33,8 +33,18 @@ load_multimodel <- function(recipe) { # Find the saved data directory recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", recipe$Analysis$Datasets$Multimodel$createFrom) - hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), - "$var$_$file_date$.nc") + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- "01" + ## TODO: Fix shortdate + filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + + hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) hcst.path <- gsub(variable[1], "$var$", hcst.path) hcst.path <- gsub('Multimodel', "$model$", hcst.path) if (tolower(recipe$Analysis$Horizon) == 'decadal') { -- GitLab From 5f57b5cbffc147714ee3e38692eaf81c71923bec Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 19 Dec 2023 15:35:57 +0100 Subject: [PATCH 69/91] Make multimodel compatible with Scorecards output format --- modules/Multimodel/load_multimodel.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index 1afae8f5..5e7c1517 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -36,8 +36,7 @@ load_multimodel <- function(recipe) { if (tolower(recipe$Analysis$Output_format) == "scorecards") { hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end - shortdate <- "01" - ## TODO: Fix shortdate + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", hcst_start, "-", hcst_end, "_s", shortdate, ".nc") } else { @@ -47,7 +46,8 @@ load_multimodel <- function(recipe) { hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) hcst.path <- gsub(variable[1], "$var$", hcst.path) hcst.path <- gsub('Multimodel', "$model$", hcst.path) - if (tolower(recipe$Analysis$Horizon) == 'decadal') { + if ((tolower(recipe$Analysis$Horizon) == 'decadal') && + (tolower(recipe$Analysis$Output_format) != "scorecards")) { hcst.path <- gsub('.nc', "1101.nc", hcst.path) } fcst.path <- obs.path <- hcst.path -- GitLab From a6d482e9d1e8860a468173c13fc23a6fd3cba18d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 20 Dec 2023 11:12:54 +0100 Subject: [PATCH 70/91] Recipe changes --- recipes/recipe_multimodel_seasonal.yml | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index f8b59a6a..4addb764 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -7,17 +7,11 @@ Analysis: Variables: - {name: tas, freq: monthly_mean, units: C} - {name: prlr, freq: monthly_mean, units: mm, flux: no} - - {name: psl, freq: monthly_mean, units: hPa} - - {name: sfcWind, freq: monthly_mean, units: ms-1} Datasets: System: - {name: ECMWF-SEAS5.1} - {name: CMCC-SPS3.5} - {name: DWD-GCFS2.1} - - {name: Meteo-France-System8} - - {name: UK-MetOffice-Glosea601} - - {name: ECCC-GEM5-NEMO} - - {name: NCEP-CFSv2} Multimodel: execute: both # Mandatory: Either both, yes/true or no/false approach: pooled #mean, median @@ -27,17 +21,7 @@ Analysis: Time: sdate: - '0101' ## MMDD - - '0201' - - '0301' - - '0401' - - '0501' - '0601' - - '0701' - - '0801' - - '0901' - - '1001' - - '1101' - - '1201' fcst_year: '2023' # Optional, int: Forecast year 'YYYY' hcst_start: '2007' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' @@ -45,7 +29,6 @@ Analysis: ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} - - {name: "Germany", latmin: 45, latmax: 56, lonmin: 4, lonmax: 17} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: "r360x180" @@ -73,14 +56,14 @@ Analysis: dots: both ncores: 4 # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: s2s4e # scorecards + Output_format: scorecards # scorecards logo: yes Run: Loglevel: INFO Terminal: yes filesystem: esarchive - output_dir: /esarchive/scratch/cdelgado/sunset_outputs/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/cdelgado/gitlat/SUNSET/ # replace with the directory where your code is + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ # replace with the directory where your code is autosubmit: no # fill only if using autosubmit auto_conf: -- GitLab From 663bb699267ffa9382d6f01c5a67ea3376c027cb Mon Sep 17 00:00:00 2001 From: VICTORIA AGUDETSE ROURES Date: Fri, 29 Dec 2023 12:25:55 +0100 Subject: [PATCH 71/91] Add 'multimodel' to multimodel slurm job namesC --- launch_SUNSET.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 0c76cdce..70654d79 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -153,8 +153,8 @@ if [[ $run_method == "sbatch" ]]; then for atomic_recipe in ${outdir}/logs/recipes/multimodel/atomic_recipe_*.yml; do job_number=$(($job_number + 1)) job_name=$(basename $outdir)_$(printf %02d $job_number) - outfile=${logdir}/run-${job_name}.out - errfile=${logdir}/run-${job_name}.err + outfile=${logdir}/run-multimodel-${job_name}.out + errfile=${logdir}/run-multimodel-${job_name}.err # Send batch job and capture job ID job_ID=$(sbatch --parsable --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --job-name="SUNSET_multimodel" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) # Add job ID to array -- GitLab From 562ccc4899209d670949e2bb05e795dd74eef016 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 3 Jan 2024 16:44:33 +0100 Subject: [PATCH 72/91] for decadal, initialisation month is not written in the filenames --- modules/Multimodel/load_multimodel.R | 4 ---- modules/Saving/R/get_filename.R | 13 ++++++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index 5e7c1517..c6bd4585 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -46,10 +46,6 @@ load_multimodel <- function(recipe) { hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) hcst.path <- gsub(variable[1], "$var$", hcst.path) hcst.path <- gsub('Multimodel', "$model$", hcst.path) - if ((tolower(recipe$Analysis$Horizon) == 'decadal') && - (tolower(recipe$Analysis$Output_format) != "scorecards")) { - hcst.path <- gsub('.nc', "1101.nc", hcst.path) - } fcst.path <- obs.path <- hcst.path obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index 54bea81c..5225a1de 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -7,7 +7,7 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { # variable, forecast date, startdate, aggregation, forecast horizon and # type of metric/forecast/probability. - if (recipe$Analysis$Horizon == "subseasonal") { + if (tolower(recipe$Analysis$Horizon) == "subseasonal") { shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") dd <- "week" } else { @@ -15,6 +15,17 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { dd <- "month" } + if (tolower(recipe$Analysis$Horizon) == "decadal") { + # to not save the month and day in the filename (needed for the multimodel) + date <- substr(date,1,4) + # for the models initialised in January - it may be better to do this in save_* functions + archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive + exp.name <- recipe$Analysis$Datasets$System$name + if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1){ + date <- as.character(as.numeric(date)-1) + } + } + switch(tolower(agg), "region" = {gg <- "-region"}, "global" = {gg <- ""}) -- GitLab From fb88e8fe1d1ad3cb3af024efb82c520cc5a0177a Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 3 Jan 2024 16:46:19 +0100 Subject: [PATCH 73/91] for decadal, ftime=1 corresponds to January for all models; sdate_sum for consistency of BCC sdates --- conf/archive_decadal.yml | 16 ++++++++++++++++ modules/Loading/R/load_decadal.R | 10 +++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 8cf38387..c7c6f3a4 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -19,6 +19,7 @@ esarchive: calendar: "proleptic_gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/ecearth/a1ua/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20190713/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-196110.nc" #'r512x256' # ---- @@ -41,6 +42,7 @@ esarchive: #NOTE:There are many members but not all of them are available on ESGF (only r6-10 available). Then, we might have some variables for the rest of the members (r1-5 and r11-15), but not for all the variables. That's why i'm only using r6-10 member: r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/ec-earth3/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r6i2p1f1/Amon/tas/gr/v20200730/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r6i2p1f1_gr_196011-196012.nc" #'r512x256' # ---- @@ -79,6 +81,7 @@ esarchive: calendar: "proleptic_gregorian" member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/ecearth/a3w5/original_files/cmorfiles/DCPP/EC-Earth-Consortium/EC-Earth3/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" # ---- @@ -100,6 +103,7 @@ esarchive: calendar: "360-day" member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/HadGEM3-GC31-MM/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/r1i1p1f2/Amon/tas/gn/v20200417/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_196011-196012.nc" #'r432x324' # ---- @@ -121,6 +125,7 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1 initial_month: 1 + sdate_add: 1 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/BCC-CSM2-MR/DCPP/BCC/BCC-CSM2-MR/dcppA-hindcast/r8i1p1f1/Amon/tas/gn/v20200101/tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s2008-r8i1p1f1_gn_200801-201712.nc" # ---- @@ -142,6 +147,7 @@ esarchive: calendar: "365_day" member: r1i1p2f1,r2i1p2f1,r3i1p2f1,r4i1p2f1,r5i1p2f1,r6i1p2f1,r7i1p2f1,r8i1p2f1, r9i1p2f1, r10i1p2f1, r11i1p2f1,r12i1p2f1,r13i1p2f1,r14i1p2f1,r15i1p2f1,r16i1p2f1,r17i1p2f1,r18i1p2f1, r19i1p2f1, r20i1p2f1,r21i1p2f1,r22i1p2f1,r23i1p2f1,r24i1p2f1,r25i1p2f1,r26i1p2f1,r27i1p2f1,r28i1p2f1, r29i1p2f1, r30i1p2f1, r31i1p2f1,r32i1p2f1,r33i1p2f1,r34i1p2f1,r35i1p2f1,r36i1p2f1,r37i1p2f1,r38i1p2f1, r39i1p2f1, r40i1p2f1 initial_month: 1 #next year Jan + sdate_add: 0 reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc" # ---- @@ -163,6 +169,7 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1, r9i1p1f1, r10i1p1f1, r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1,r17i1p1f1,r18i1p1f1, r19i1p1f1, r20i1p1f1,r21i1p1f1,r22i1p1f1,r23i1p1f1,r24i1p1f1,r25i1p1f1,r26i1p1f1,r27i1p1f1,r28i1p1f1, r29i1p1f1, r30i1p1f1, r31i1p1f1,r32i1p1f1,r33i1p1f1,r34i1p1f1,r35i1p1f1,r36i1p1f1,r37i1p1f1,r38i1p1f1, r39i1p1f1, r40i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/ncar/cesm-dple-dcppA-hindcast/cmorfiles/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -184,6 +191,7 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20210312/tas_Amon_CMCC-CM2-SR5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -205,6 +213,7 @@ esarchive: calendar: "365_day" member: r1i1p1f1,r2i1p1f1,r3i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20220212/tas_Amon_FGOALS-f3-L_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-197012.nc" # ---- @@ -224,6 +233,7 @@ esarchive: calendar: "gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 1 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/IPSL-CM6A-LR/DCPP/IPSL/IPSL-CM6A-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20200504/tas_Amon_IPSL-CM6A-LR_dcppA-hindcast_s2008-r1i1p1f1_gr_200901-201812.nc" # ---- @@ -243,6 +253,7 @@ esarchive: calendar: "standard" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200417/tas_Amon_MIROC6_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -262,6 +273,7 @@ esarchive: calendar: "standard" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-HR/DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200320/tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -281,6 +293,7 @@ esarchive: calendar: "proleptic_gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1,r11i1p1f1,r12i1p1f1,r13i1p1f1,r14i1p1f1,r15i1p1f1,r16i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MPI-ESM1-2-LR/DCPP/MPI-M/MPI-ESM1-2-LR/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MPI-ESM1-2-LR_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc" # ---- @@ -300,6 +313,7 @@ esarchive: calendar: "proleptic_gregorian" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 11 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/MRI-ESM2-0/DCPP/MRI/MRI-ESM2-0/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_MRI-ESM2-0_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201312.nc" # ---- @@ -320,6 +334,7 @@ esarchive: calendar: "noleap" member: r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r6i1p1f1,r7i1p1f1,r8i1p1f1,r9i1p1f1,r10i1p1f1 initial_month: 10 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20191029/tas_Amon_NorCPM1_dcppA-hindcast_s2008-r1i1p1f1_gn_200810-201812.nc" # ---- @@ -339,6 +354,7 @@ esarchive: calendar: "noleap" member: r1i2p1f1,r2i2p1f1,r3i2p1f1,r4i2p1f1,r5i2p1f1,r6i2p1f1,r7i2p1f1,r8i2p1f1,r9i2p1f1,r10i2p1f1 initial_month: 10 + sdate_add: 0 reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/NorCPM1/DCPP/NCC/NorCPM1/dcppA-hindcast/r1i2p1f1/Amon/pr/gn/v20200101/pr_Amon_NorCPM1_dcppA-hindcast_s2008-r1i2p1f1_gn_200810-201812.nc" diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 66338a50..cf4b98c0 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -43,12 +43,16 @@ load_decadal <- function(recipe) { lons.max <- as.numeric(recipe$Analysis$Region$lonmax) #10 # change to: sdates <- dates2load(recipe, logger) - sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015 - sdates_fcst <- recipe$Analysis$Time$fcst + # sdate_add for BCC model (the correspondence between sdate and ftime is different for this model) + sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) + + archive$System[[exp.name]]$sdate_add + sdates_fcst <- as.numeric(recipe$Analysis$Time$fcst) + archive$System[[exp.name]]$sdate_add if (store.freq == "monthly_mean") { + # ftime = 1 corresponds to the first January for all models + time_add <- list('1' = 0, '10' = 3, '11' = 2) time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) - + + time_add[[as.character(archive$System[[exp.name]]$initial_month)]] } else if (store.freq == "daily_mean") { time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min), ftimemax = as.numeric(recipe$Analysis$Time$ftime_max), -- GitLab From 93d453126fa0a3ef9596bfc79d439a591de67614 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Torres Date: Thu, 4 Jan 2024 13:35:46 +0100 Subject: [PATCH 74/91] initialisation month is not saved for decdal --- modules/Saving/R/get_filename.R | 14 ++++++++++---- modules/Saving/R/save_forecast.R | 16 ++++++++++------ modules/Saving/R/save_metrics.R | 21 ++++++++++++++------- modules/Saving/R/save_observations.R | 11 ++++++----- modules/Saving/R/save_percentiles.R | 23 +++++++++++++---------- modules/Saving/R/save_probabilities.R | 18 ++++++++++-------- 6 files changed, 63 insertions(+), 40 deletions(-) diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index 5225a1de..a1793799 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -53,13 +53,19 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { file <- paste0("scorecards_", system, "_", reference, "_", var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) } else { + + if (tolower(recipe$Analysis$Horizon) == "decadal") { + shortdate_aux <- '' + } else { + shortdate_aux <- paste0("_", dd, shortdate) + } + switch(file.type, - "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, - "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "skill" = {file <- paste0(var, gg, "-skill", shortdate_aux)}, + "corr" = {file <- paste0(var, gg, "-corr", shortdate_aux)}, "exp" = {file <- paste0(var, gg, "_", date)}, "obs" = {file <- paste0(var, gg, "-obs_", date)}, - "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, - shortdate)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles",shortdate_aux)}, "probs" = {file <- paste0(var, gg, "-probs_", date)}, "bias" = {file <- paste0(var, gg, "-bias_", date)}) } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 4451ce54..1c050160 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -32,13 +32,17 @@ save_forecast <- function(recipe, ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$fcst_year)+1, + '-01-01'), cal = calendar) } } else { if (type == 'hcst') { diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index e4f6096c..2a59f2c2 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -25,7 +25,11 @@ save_metrics <- function(recipe, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq if (global_attributes$system == 'Multimodel'){ - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + if (fcst.horizon == 'decadal'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } } else { calendar <- archive$System[[global_attributes$system]]$calendar } @@ -35,10 +39,12 @@ save_metrics <- function(recipe, cal = calendar) if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -53,8 +59,9 @@ save_metrics <- function(recipe, if (fcst.horizon == 'decadal') { if (!is.null(recipe$Analysis$Time$fcst_year)) { #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') + # fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + # sprintf('%02d', init_month), '01') + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 127e9890..0f4d70a1 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -26,11 +26,12 @@ save_observations <- function(recipe, ## the real initialized date (ask users) # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 1abe84cc..7b68b2c2 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -37,14 +37,16 @@ save_percentiles <- function(recipe, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - if (global_attributes$system == 'Multimodel') { - init_month <- 11 #TODO: put as if init_month is January - } else { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - } - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # if (global_attributes$system == 'Multimodel') { + # init_month <- 11 #TODO: put as if init_month is January + # } else { + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # } + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -59,8 +61,9 @@ save_percentiles <- function(recipe, if (fcst.horizon == 'decadal') { if (!is.null(recipe$Analysis$Time$fcst_year)) { #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') + # fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + # sprintf('%02d', init_month), '01') + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index 5b46f236..3253e7b7 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -44,14 +44,16 @@ save_probabilities <- function(recipe, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - if (global_attributes$system == 'Multimodel') { - init_month <- 11 #TODO: put as if init_month is January - } else { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - } - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) + # if (global_attributes$system == 'Multimodel') { + # init_month <- 11 #TODO: put as if init_month is January + # } else { + # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + # } + # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + # sprintf('%02d', init_month), '-01'), + # cal = calendar) + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + '-01-01'), cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), -- GitLab From f7f58094081234e48a111d07b80625e865361054 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 4 Jan 2024 17:02:44 +0100 Subject: [PATCH 75/91] saving the multimodel forecast --- modules/Multimodel/Multimodel.R | 32 +++++++++++++++++++++++++++ modules/Saving/R/save_forecast.R | 8 +++++-- recipes/recipe_multimodel_decadal.yml | 6 ++--- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 24bc4b51..030261ae 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -30,6 +30,23 @@ Multimodel <- function(recipe) { data$fcst$dims <- dim(data$fcst$data) } + # Saving multimodel + if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { + save_forecast(recipe = recipe, + data_cube = data$hcst, + outdir = outdir[var], + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(recipe = recipe, + data_cube = data$fcst, + outdir = outdir[var], + type = 'fcst') + } + save_observations(recipe = recipe, + data_cube = data$obs, + outdir = outdir[var]) + } + # Probabilistic hindcast and forecast prob <- Probabilities(recipe, data) @@ -77,6 +94,7 @@ Multimodel <- function(recipe) { # Removing the temporary data of models in case user only requests multimodel if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { exp.name <- recipe$Analysis$Datasets$System$models } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { @@ -86,6 +104,20 @@ Multimodel <- function(recipe) { recursive = T) unlink(file.path(recipe$Run$output_dir,"plots/*",gsub('\\.','',exp.name)), recursive = T) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'both' && + recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save == 'none') { + + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- c(exp.name,'Multimodel') + unlink(file.path(recipe$Run$output_dir,"outputs/", + recipe$Analysis$Datasets$Multimodel$createFrom), + recursive = T) + } return(list(data = data, prob = prob)) diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 1c050160..f7afa73c 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -17,7 +17,11 @@ save_forecast <- function(recipe, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq if (global_attributes$system == 'Multimodel'){ - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + if (fcst.horizon == 'decadal'){ + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar + } else { + calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar + } } else { calendar <- archive$System[[global_attributes$system]]$calendar } @@ -130,7 +134,7 @@ save_forecast <- function(recipe, # Get time dimension values and metadata times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time - + # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, "exp") diff --git a/recipes/recipe_multimodel_decadal.yml b/recipes/recipe_multimodel_decadal.yml index 87d4d93f..ce2530e8 100644 --- a/recipes/recipe_multimodel_decadal.yml +++ b/recipes/recipe_multimodel_decadal.yml @@ -6,11 +6,10 @@ Analysis: Horizon: decadal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - {name: tas, freq: monthly_mean, units: C} - - {name: pr, freq: monthly_mean, units: mm, flux: no} Datasets: System: - - {name: MIROC6, member: r1i1p1f1 r2i1p1f1} - {name: EC-Earth3-i4, member: r1i4p1f1 r2i4p1f1 r3i4p1f1} + - {name: CanESM5, member: r1i1p2f1 r2i1p2f1} Multimodel: execute: both # Mandatory: Either both, yes/true or no/false approach: pooled #mean, median @@ -18,14 +17,13 @@ Analysis: Reference: - {name: ERA5} # Mandatory, str: Reference codename. See docu. Time: - fcst_year: '2021' # Optional, int: Forecast year 'YYYY' + fcst_year: '2020' # Optional, int: Forecast year 'YYYY' hcst_start: '2007' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} - - {name: "Germany", latmin: 45, latmax: 56, lonmin: 4, lonmax: 17} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: "r360x180" -- GitLab From 7cc9b9efd3adb1574b393d18749c6cbcc3c1451b Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 17 Jan 2024 13:02:17 +0100 Subject: [PATCH 76/91] Kill multimodel job on failed dependency --- launch_SUNSET.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 70654d79..6149a963 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -156,7 +156,7 @@ if [[ $run_method == "sbatch" ]]; then outfile=${logdir}/run-multimodel-${job_name}.out errfile=${logdir}/run-multimodel-${job_name}.err # Send batch job and capture job ID - job_ID=$(sbatch --parsable --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --job-name="SUNSET_multimodel" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + job_ID=$(sbatch --parsable --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --kill-on-invalid-dep=yes --job-name="SUNSET_multimodel" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) # Add job ID to array multimodel_job_list+=($job_ID) echo "Submitted batch job $job_ID" -- GitLab From 4fe90f4366354a5f15484022b0c6d02b4e2518da Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 19 Jan 2024 12:41:23 +0100 Subject: [PATCH 77/91] working progress --- example_scripts/example_multimodel_nadia.R | 57 +++++++ modules/Multimodel/Dev_Multimodel.R | 178 +++++++++++++++++++++ modules/Multimodel/load_multimodel_split.R | 107 +++++++++++++ 3 files changed, 342 insertions(+) create mode 100644 example_scripts/example_multimodel_nadia.R create mode 100644 modules/Multimodel/Dev_Multimodel.R create mode 100644 modules/Multimodel/load_multimodel_split.R diff --git a/example_scripts/example_multimodel_nadia.R b/example_scripts/example_multimodel_nadia.R new file mode 100644 index 00000000..6ae135fc --- /dev/null +++ b/example_scripts/example_multimodel_nadia.R @@ -0,0 +1,57 @@ + +################################ +### SEASONAL MULTIMODEL TEST ### +################################ + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Multimodel/Dev_Multimodel.R") + +horizon <- 'seasonal' # decadal + +## Cleaning output directory and splitting recipe +sunset_outputs_folder <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/' +# recipe_file <- paste0("recipes/recipe_multimodel_",horizon,".yml") +recipe_file <- "/esarchive/scratch/nmilders/multimodel/recipe_multimodel_seasonal_nadia.yml" # Temporary for testing +# system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_',horizon,'_*')) +system(paste0('Rscript split.R ',recipe_file)) + +## Finding atomic recipes +atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder, pattern = horizon),'/logs/recipes/') +atomic_recipe_folder <- atomic_recipe_folder[length(atomic_recipe_folder)] +atomic_recipes <- list.files(paste0(atomic_recipe_folder), pattern = '.yml') +atomic_recipes <- atomic_recipes[-length(atomic_recipes)] +atomic_recipes_multimodel <- list.files(paste0(atomic_recipe_folder,'multimodel')) +atomic_recipes <- paste0(atomic_recipe_folder,atomic_recipes) +atomic_recipes_multimodel <- paste0(atomic_recipe_folder,'multimodel/',atomic_recipes_multimodel) + +## Running atomic recipes +for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ + # Read recipe + recipe <- read_atomic_recipe(recipe_file) + if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ + # Load datasets and create multimodel + mm <- Multimodel(recipe) + data <- mm$data + probabilities <- mm$prob + } else { + # Load datasets + data <- Loading(recipe) + # Change units + data <- Units(recipe, data) + # Compute anomalies + data <- Anomalies(recipe, data) + # Compute percentiles and probability bins + probabilities <- Probabilities(recipe, data) + } + # Compute skill metrics + skill_metrics <- Skill(recipe, data) + # Plot data + # Visualization(recipe, data, skill_metrics, probabilities, significance = T) +} diff --git a/modules/Multimodel/Dev_Multimodel.R b/modules/Multimodel/Dev_Multimodel.R new file mode 100644 index 00000000..01d630a4 --- /dev/null +++ b/modules/Multimodel/Dev_Multimodel.R @@ -0,0 +1,178 @@ +# This module load the outputs saved for each individual forecast system +# and creates the multimodel ensemble + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Multimodel/load_multimodel.R') + +Multimodel <- function(recipe) { + + # recipe: auto-s2s recipe as provided by read_yaml + + # Loading data saved in the jobs for individual models + if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ + + ## TO DO: + #Create new function load_multimodel.R, until fcst, with startR retrieve false + #Load data with retrieve = F to check ensemble dimensions of each system + #Create array of NA with dimension of maximum ensemble members + #Loop over systems adding data into created array with abind + #Add dimension names since lost with abind + #Add filled array into data object + + # Retrieve data dimension only without loading data + source('modules/Multimodel/load_multimodel_split.R') + dims <- load_multimodel_split(recipe) + + # Create empty array with desired dimensions + hcst_aux <- array(data = NA, dim = dims$dim.hcst) + + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst_aux <- array(data = NA, dim = dims$dim.fcst) + } else { + fcst_aux <- NULL + } + + data_order <- names(dims$dim.hcst) + + # ## Loop over model to load data + for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ + + system_load <- recipe$Analysis$Datasets$System$models[sys] + + recipe_aux <- recipe + recipe_aux$Analysis$Datasets$System$models <- system_load + + data_aux <- load_multimodel(recipe_aux) + + hcst_aux[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) + + ## Error: Not able to combine arrays with different 'ensemble' dimensions + + } + + ## Necessary to do separately for hcst and fcst since can different ensemble dimensions + + # data$hcst$data <- hcst_aux + + # if(!is.null(recipe$Analysis$Time$fcst_year)){ + # data$fcst$data <- fcst_aux + # } else { + # data$fcst$data <- NULL + + } ## close if on split_loading + + ## Load data without splitting + data <- load_multimodel(recipe) + } + + # Creating the multi-model + if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { + + # Deterministic hindcast + data$hcst$data <- CSTools::MergeDims(data = data$hcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- CSTools::MergeDims(data = data$fcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$fcst$dims <- dim(data$fcst$data) + } + + # Saving multimodel + if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { + save_forecast(recipe = recipe, + data_cube = data$hcst, + outdir = outdir[var], + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(recipe = recipe, + data_cube = data$fcst, + outdir = outdir[var], + type = 'fcst') + } + save_observations(recipe = recipe, + data_cube = data$obs, + outdir = outdir[var]) + } + + # Probabilistic hindcast and forecast + prob <- Probabilities(recipe, data) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','median')) { + + # Probabilistic hindcast and forecast + warning('Probabilities for multi-model mean is still under development. + If Skill is used, the results are not correct for the probabilitic metrics. + Probabilities cannot be computed because data has an extra dimension (model). + Also, the function should return the observed probabilities. + Maybe it is better to use GetProbs.') + prob <- NULL #Probabilities(recipe, data) + + # Deterministic hindcast + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$dims <- s2dv::InsertDim(data = data$hcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$dims <- s2dv::InsertDim(data = data$fcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$fcst$dims <- dim(data$fcst$data) + } + + } else {stop('Incorrect multi-model approach')} + + # Removing the temporary data of models in case user only requests multimodel + if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { + + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + unlink(file.path(recipe$Run$output_dir,"outputs/*/*",gsub('\\.','',exp.name)), + recursive = T) + unlink(file.path(recipe$Run$output_dir,"plots/*",gsub('\\.','',exp.name)), + recursive = T) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'both' && + recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save == 'none') { + + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- c(exp.name,'Multimodel') + unlink(file.path(recipe$Run$output_dir,"outputs/", + recipe$Analysis$Datasets$Multimodel$createFrom), + recursive = T) + + } + + return(list(data = data, prob = prob)) +} diff --git a/modules/Multimodel/load_multimodel_split.R b/modules/Multimodel/load_multimodel_split.R new file mode 100644 index 00000000..c1d060b8 --- /dev/null +++ b/modules/Multimodel/load_multimodel_split.R @@ -0,0 +1,107 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") + +load_multimodel_split <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_$var$_$file_date$_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + + hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]), filename) + hcst.path <- gsub(variable[1], "$var$", hcst.path) + hcst.path <- gsub('Multimodel', "$model$", hcst.path) + fcst.path <- obs.path <- hcst.path + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = FALSE) + + dim.hcst <- attr(hcst,'Dimensions') + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = FALSE) + + dim.fcst <- attr(fcst,'Dimensions') + + } else { + dim.fcst <- NULL + } + + return(list(dim.hcst = dim.hcst, dim.fcst = dim.fcst)) +} -- GitLab From 40b3f22cd24a2909fadbcc420e4658ba9eb442f8 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 22 Jan 2024 16:43:49 +0100 Subject: [PATCH 78/91] split model loading --- example_scripts/example_multimodel_nadia.R | 15 +- modules/Multimodel/Dev_Multimodel.R | 178 --------------------- modules/Multimodel/Multimodel.R | 49 +++++- 3 files changed, 59 insertions(+), 183 deletions(-) delete mode 100644 modules/Multimodel/Dev_Multimodel.R diff --git a/example_scripts/example_multimodel_nadia.R b/example_scripts/example_multimodel_nadia.R index 6ae135fc..2d4ccf56 100644 --- a/example_scripts/example_multimodel_nadia.R +++ b/example_scripts/example_multimodel_nadia.R @@ -11,14 +11,14 @@ source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -source("modules/Multimodel/Dev_Multimodel.R") +source("modules/Multimodel/Multimodel.R") horizon <- 'seasonal' # decadal ## Cleaning output directory and splitting recipe -sunset_outputs_folder <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/' +sunset_outputs_folder <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/test/' # recipe_file <- paste0("recipes/recipe_multimodel_",horizon,".yml") -recipe_file <- "/esarchive/scratch/nmilders/multimodel/recipe_multimodel_seasonal_nadia.yml" # Temporary for testing +recipe_file <- "/esarchive/scratch/nmilders/multimodel/recipe_multimodel_seasonal_nadia_test.yml" # Temporary for testing # system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_',horizon,'_*')) system(paste0('Rscript split.R ',recipe_file)) @@ -41,6 +41,12 @@ for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ data <- mm$data probabilities <- mm$prob } else { + + ## Set condition for missing start date in UK-MetOffice-Glosea601 + if (recipe$Analysis$Datasets$System$name == 'UK-MetOffice-Glosea600' && recipe$Analysis$Time$sdate == "0101"){ + recipe$Analysis$Time$hcst_start <- "1994" + } + # Load datasets data <- Loading(recipe) # Change units @@ -48,10 +54,11 @@ for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ # Compute anomalies data <- Anomalies(recipe, data) # Compute percentiles and probability bins - probabilities <- Probabilities(recipe, data) + # probabilities <- Probabilities(recipe, data) } # Compute skill metrics skill_metrics <- Skill(recipe, data) # Plot data # Visualization(recipe, data, skill_metrics, probabilities, significance = T) } + diff --git a/modules/Multimodel/Dev_Multimodel.R b/modules/Multimodel/Dev_Multimodel.R deleted file mode 100644 index 01d630a4..00000000 --- a/modules/Multimodel/Dev_Multimodel.R +++ /dev/null @@ -1,178 +0,0 @@ -# This module load the outputs saved for each individual forecast system -# and creates the multimodel ensemble - -source("modules/Loading/R/dates2load.R") -source("modules/Loading/R/get_timeidx.R") -source("modules/Loading/R/check_latlon.R") -source('modules/Multimodel/load_multimodel.R') - -Multimodel <- function(recipe) { - - # recipe: auto-s2s recipe as provided by read_yaml - - # Loading data saved in the jobs for individual models - if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ - - ## TO DO: - #Create new function load_multimodel.R, until fcst, with startR retrieve false - #Load data with retrieve = F to check ensemble dimensions of each system - #Create array of NA with dimension of maximum ensemble members - #Loop over systems adding data into created array with abind - #Add dimension names since lost with abind - #Add filled array into data object - - # Retrieve data dimension only without loading data - source('modules/Multimodel/load_multimodel_split.R') - dims <- load_multimodel_split(recipe) - - # Create empty array with desired dimensions - hcst_aux <- array(data = NA, dim = dims$dim.hcst) - - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst_aux <- array(data = NA, dim = dims$dim.fcst) - } else { - fcst_aux <- NULL - } - - data_order <- names(dims$dim.hcst) - - # ## Loop over model to load data - for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ - - system_load <- recipe$Analysis$Datasets$System$models[sys] - - recipe_aux <- recipe - recipe_aux$Analysis$Datasets$System$models <- system_load - - data_aux <- load_multimodel(recipe_aux) - - hcst_aux[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) - - ## Error: Not able to combine arrays with different 'ensemble' dimensions - - } - - ## Necessary to do separately for hcst and fcst since can different ensemble dimensions - - # data$hcst$data <- hcst_aux - - # if(!is.null(recipe$Analysis$Time$fcst_year)){ - # data$fcst$data <- fcst_aux - # } else { - # data$fcst$data <- NULL - - } ## close if on split_loading - - ## Load data without splitting - data <- load_multimodel(recipe) - } - - # Creating the multi-model - if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { - - # Deterministic hindcast - data$hcst$data <- CSTools::MergeDims(data = data$hcst$data, - merge_dims = c('model','ensemble'), - rename_dim = 'ensemble', na.rm = TRUE) - data$hcst$dims <- dim(data$hcst$data) - - # Deterministic forecast - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- CSTools::MergeDims(data = data$fcst$data, - merge_dims = c('model','ensemble'), - rename_dim = 'ensemble', na.rm = TRUE) - data$fcst$dims <- dim(data$fcst$data) - } - - # Saving multimodel - if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { - save_forecast(recipe = recipe, - data_cube = data$hcst, - outdir = outdir[var], - type = 'hcst') - if (!is.null(data$fcst)) { - save_forecast(recipe = recipe, - data_cube = data$fcst, - outdir = outdir[var], - type = 'fcst') - } - save_observations(recipe = recipe, - data_cube = data$obs, - outdir = outdir[var]) - } - - # Probabilistic hindcast and forecast - prob <- Probabilities(recipe, data) - - } else if (tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','median')) { - - # Probabilistic hindcast and forecast - warning('Probabilities for multi-model mean is still under development. - If Skill is used, the results are not correct for the probabilitic metrics. - Probabilities cannot be computed because data has an extra dimension (model). - Also, the function should return the observed probabilities. - Maybe it is better to use GetProbs.') - prob <- NULL #Probabilities(recipe, data) - - # Deterministic hindcast - data$hcst$data <- multiApply::Apply(data = data$hcst$data, - target_dims = 'ensemble', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = TRUE, - ncores = recipe$Analysis$ncores)$output1 - data$hcst$data <- multiApply::Apply(data = data$hcst$data, - target_dims = 'model', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = FALSE, - ncores = recipe$Analysis$ncores)$output1 - data$hcst$dims <- s2dv::InsertDim(data = data$hcst$dims, posdim = 6, lendim = 1, name = 'ensemble') - data$hcst$dims <- dim(data$hcst$data) - - # Deterministic forecast - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- multiApply::Apply(data = data$fcst$data, - target_dims = 'ensemble', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = TRUE, - ncores = recipe$Analysis$ncores)$output1 - data$fcst$data <- multiApply::Apply(data = data$fcst$data, - target_dims = 'model', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = FALSE, - ncores = recipe$Analysis$ncores)$output1 - data$fcst$dims <- s2dv::InsertDim(data = data$fcst$dims, posdim = 6, lendim = 1, name = 'ensemble') - data$fcst$dims <- dim(data$fcst$data) - } - - } else {stop('Incorrect multi-model approach')} - - # Removing the temporary data of models in case user only requests multimodel - if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { - - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} - unlink(file.path(recipe$Run$output_dir,"outputs/*/*",gsub('\\.','',exp.name)), - recursive = T) - unlink(file.path(recipe$Run$output_dir,"plots/*",gsub('\\.','',exp.name)), - recursive = T) - - } else if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'both' && - recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save == 'none') { - - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} - exp.name <- c(exp.name,'Multimodel') - unlink(file.path(recipe$Run$output_dir,"outputs/", - recipe$Analysis$Datasets$Multimodel$createFrom), - recursive = T) - - } - - return(list(data = data, prob = prob)) -} diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 030261ae..0a6ccada 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -11,7 +11,54 @@ Multimodel <- function(recipe) { # recipe: auto-s2s recipe as provided by read_yaml # Loading data saved in the jobs for individual models - data <- load_multimodel(recipe) + if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ + + # Retrieve data dimension only without loading data + source('modules/Multimodel/load_multimodel_split.R') + dims <- load_multimodel_split(recipe) + + # Create empty array with desired dimensions + hcst_aux <- array(data = NA, dim = dims$dim.hcst) + + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst_aux <- array(data = NA, dim = dims$dim.fcst) + } else { + fcst_aux <- NULL + } + + data_order <- names(dims$dim.hcst) + + ## Loop over model to load hindcast data + for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ + + system_load <- recipe$Analysis$Datasets$System$models[sys] + + recipe_aux <- recipe + recipe_aux$Analysis$Datasets$System$models <- system_load + + data_aux <- load_multimodel(recipe_aux) + + hcst_aux[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) + + } ## close loop on sys + + data$hcst$data <- hcst_aux + + ## Load forecast data + + if(!is.null(recipe$Analysis$Time$fcst_year)){ + + fcst_aux[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) + + } else { + data$fcst$data <- NULL + } ## close if on fcst_year + + } else { + ## Load data without splitting + data <- load_multimodel(recipe) + + } ## close if on split_loading # Creating the multi-model if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { -- GitLab From e6b5bb55a59010069676fb180df740b244902272 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 24 Jan 2024 17:16:25 +0100 Subject: [PATCH 79/91] included data attributes --- example_scripts/example_multimodel_nadia.R | 3 +- example_scripts/multimodel_seasonal_nadia.R | 39 ++++++++++++ modules/Multimodel/Multimodel.R | 61 ++++++++++--------- ...timodel_split.R => load_multimodel_dims.R} | 2 +- 4 files changed, 75 insertions(+), 30 deletions(-) create mode 100644 example_scripts/multimodel_seasonal_nadia.R rename modules/Multimodel/{load_multimodel_split.R => load_multimodel_dims.R} (99%) diff --git a/example_scripts/example_multimodel_nadia.R b/example_scripts/example_multimodel_nadia.R index 2d4ccf56..ffacfa9d 100644 --- a/example_scripts/example_multimodel_nadia.R +++ b/example_scripts/example_multimodel_nadia.R @@ -39,7 +39,7 @@ for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ # Load datasets and create multimodel mm <- Multimodel(recipe) data <- mm$data - probabilities <- mm$prob + # probabilities <- mm$prob } else { ## Set condition for missing start date in UK-MetOffice-Glosea601 @@ -56,6 +56,7 @@ for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ # Compute percentiles and probability bins # probabilities <- Probabilities(recipe, data) } + # Compute skill metrics skill_metrics <- Skill(recipe, data) # Plot data diff --git a/example_scripts/multimodel_seasonal_nadia.R b/example_scripts/multimodel_seasonal_nadia.R new file mode 100644 index 00000000..e6aa85c3 --- /dev/null +++ b/example_scripts/multimodel_seasonal_nadia.R @@ -0,0 +1,39 @@ + +########################################### +### SEASONAL MULTIMODEL TEST - LAUNCHER ### +########################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Multimodel/Multimodel.R") + +# Read recipe +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +if (recipe$Analysis$Datasets$System$name == 'Multimodel') { + # Load datasets and create multimodel + mm <- Multimodel(recipe) + data <- mm$data + # probabilities <- mm$prob +} else { + # Load datasets + data <- Loading(recipe) + # Change units + data <- Units(recipe, data) + # Compute anomalies + data <- Anomalies(recipe, data) + # Compute percentiles and probability bins + # probabilities <- Probabilities(recipe, data) +} +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Plot data +# Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 0a6ccada..4a7c4a12 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -9,56 +9,61 @@ source('modules/Multimodel/load_multimodel.R') Multimodel <- function(recipe) { # recipe: auto-s2s recipe as provided by read_yaml - + # Loading data saved in the jobs for individual models if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ # Retrieve data dimension only without loading data - source('modules/Multimodel/load_multimodel_split.R') - dims <- load_multimodel_split(recipe) + source('modules/Multimodel/load_multimodel_dims.R') + dims <- load_multimodel_dims(recipe) + data_order <- names(dims$dim.hcst) # Create empty array with desired dimensions - hcst_aux <- array(data = NA, dim = dims$dim.hcst) - + data <- list(hcst = NULL, fcst = NULL, obs = NULL) + data$hcst$data <- array(data = NA, dim = dims$dim.hcst) if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst_aux <- array(data = NA, dim = dims$dim.fcst) - } else { - fcst_aux <- NULL + data$fcst$data <- array(data = NA, dim = dims$dim.fcst) } - data_order <- names(dims$dim.hcst) - - ## Loop over model to load hindcast data + # Loop over system to load hindcast and forecast data for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ - system_load <- recipe$Analysis$Datasets$System$models[sys] - recipe_aux <- recipe + system_load <- recipe$Analysis$Datasets$System$models[sys] recipe_aux$Analysis$Datasets$System$models <- system_load data_aux <- load_multimodel(recipe_aux) - hcst_aux[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) - - } ## close loop on sys + data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) + + if(!is.null(recipe$Analysis$Time$fcst_year)){ + data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) + } + + } # close loop on sys - data$hcst$data <- hcst_aux + # Define obs data + data$obs$data <- data_aux$obs$data - ## Load forecast data + # Include data attributes + data$hcst$attrs <- data_aux$hcst$attrs + data$fcst$attrs <- data_aux$fcst$attrs + data$obs$attrs <- data_aux$obs$attrs + + data$hcst$coords <- data_aux$hcst$coords + data$fcst$coords <- data_aux$fcst$coords + data$obs$coords <- data_aux$obs$coords + + # browser() + + # Remove temporary data_aux + rm(data_aux) - if(!is.null(recipe$Analysis$Time$fcst_year)){ - - fcst_aux[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) - - } else { - data$fcst$data <- NULL - } ## close if on fcst_year - } else { - ## Load data without splitting + # Load data without splitting data <- load_multimodel(recipe) - } ## close if on split_loading + } # close if on split_loading # Creating the multi-model if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { diff --git a/modules/Multimodel/load_multimodel_split.R b/modules/Multimodel/load_multimodel_dims.R similarity index 99% rename from modules/Multimodel/load_multimodel_split.R rename to modules/Multimodel/load_multimodel_dims.R index c1d060b8..a197e16c 100644 --- a/modules/Multimodel/load_multimodel_split.R +++ b/modules/Multimodel/load_multimodel_dims.R @@ -3,7 +3,7 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") -load_multimodel_split <- function(recipe) { +load_multimodel_dims <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name -- GitLab From 912a0ef9b3a1765b54af6e1cd95c14499ce3056d Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 29 Jan 2024 11:49:33 +0100 Subject: [PATCH 80/91] Fixed file removal --- modules/Multimodel/Multimodel.R | 36 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 4a7c4a12..2a1a407a 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -49,22 +49,25 @@ Multimodel <- function(recipe) { data$hcst$attrs <- data_aux$hcst$attrs data$fcst$attrs <- data_aux$fcst$attrs data$obs$attrs <- data_aux$obs$attrs - data$hcst$coords <- data_aux$hcst$coords data$fcst$coords <- data_aux$fcst$coords data$obs$coords <- data_aux$obs$coords - # browser() - # Remove temporary data_aux rm(data_aux) } else { + # Load data without splitting data <- load_multimodel(recipe) } # close if on split_loading + # Get file names to remove + files_hcst <- data$hcst$attrs$source_files + files_fcst <- data$fcst$attrs$source_files + files_obs <- data$obs$attrs$source_files + # Creating the multi-model if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { @@ -144,31 +147,36 @@ Multimodel <- function(recipe) { } else {stop('Incorrect multi-model approach')} + # Removing the temporary data of models in case user only requests multimodel + + # Keep only multimodel data if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { + # Remove createFrom files + unlink(files_hcst) + unlink(files_fcst) + unlink(files_obs) + + # Remove Skill and plots file of systems if (tolower(recipe$Analysis$Horizon) == 'seasonal') { exp.name <- recipe$Analysis$Datasets$System$models } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') } else {stop('Multimodel not implemented for this horizon.')} - unlink(file.path(recipe$Run$output_dir,"outputs/*/*",gsub('\\.','',exp.name)), + unlink(file.path(recipe$Run$output_dir,"outputs/Skill/*",gsub('\\.','',exp.name)), recursive = T) unlink(file.path(recipe$Run$output_dir,"plots/*",gsub('\\.','',exp.name)), recursive = T) + # Keep mutlimodel and systems data } else if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'both' && recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save == 'none') { - - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} - exp.name <- c(exp.name,'Multimodel') - unlink(file.path(recipe$Run$output_dir,"outputs/", - recipe$Analysis$Datasets$Multimodel$createFrom), - recursive = T) + + # Remove createFrom files only + unlink(files_hcst) + unlink(files_fcst) + unlink(files_obs) } -- GitLab From e117f52026155a54ade345da2bb582ea832b3935 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 5 Feb 2024 13:05:46 +0100 Subject: [PATCH 81/91] cleaning files fixed --- example_scripts/example_multimodel_nadia.R | 65 -- example_scripts/multimodel_seasonal_nadia.R | 39 -- modules/Multimodel/Multimodel.R | 202 ++---- modules/Multimodel/build_multimodel.R | 69 ++ modules/Multimodel/clean_multimodel.R | 42 ++ ...ad_multimodel_dims.R => dims_multimodel.R} | 2 +- modules/Multimodel/load_multimodel_splitted.R | 58 ++ modules/Scorecards/Scorecards.R | 615 +++++++++++++++--- 8 files changed, 757 insertions(+), 335 deletions(-) delete mode 100644 example_scripts/example_multimodel_nadia.R delete mode 100644 example_scripts/multimodel_seasonal_nadia.R create mode 100644 modules/Multimodel/build_multimodel.R create mode 100644 modules/Multimodel/clean_multimodel.R rename modules/Multimodel/{load_multimodel_dims.R => dims_multimodel.R} (99%) create mode 100644 modules/Multimodel/load_multimodel_splitted.R diff --git a/example_scripts/example_multimodel_nadia.R b/example_scripts/example_multimodel_nadia.R deleted file mode 100644 index ffacfa9d..00000000 --- a/example_scripts/example_multimodel_nadia.R +++ /dev/null @@ -1,65 +0,0 @@ - -################################ -### SEASONAL MULTIMODEL TEST ### -################################ - -# Load modules -source("modules/Loading/Loading.R") -source("modules/Units/Units.R") -source("modules/Calibration/Calibration.R") -source("modules/Anomalies/Anomalies.R") -source("modules/Skill/Skill.R") -source("modules/Saving/Saving.R") -source("modules/Visualization/Visualization.R") -source("modules/Multimodel/Multimodel.R") - -horizon <- 'seasonal' # decadal - -## Cleaning output directory and splitting recipe -sunset_outputs_folder <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/test/' -# recipe_file <- paste0("recipes/recipe_multimodel_",horizon,".yml") -recipe_file <- "/esarchive/scratch/nmilders/multimodel/recipe_multimodel_seasonal_nadia_test.yml" # Temporary for testing -# system(paste0('rm -r ',sunset_outputs_folder,'recipe_multimodel_',horizon,'_*')) -system(paste0('Rscript split.R ',recipe_file)) - -## Finding atomic recipes -atomic_recipe_folder <- paste0(sunset_outputs_folder,list.files(sunset_outputs_folder, pattern = horizon),'/logs/recipes/') -atomic_recipe_folder <- atomic_recipe_folder[length(atomic_recipe_folder)] -atomic_recipes <- list.files(paste0(atomic_recipe_folder), pattern = '.yml') -atomic_recipes <- atomic_recipes[-length(atomic_recipes)] -atomic_recipes_multimodel <- list.files(paste0(atomic_recipe_folder,'multimodel')) -atomic_recipes <- paste0(atomic_recipe_folder,atomic_recipes) -atomic_recipes_multimodel <- paste0(atomic_recipe_folder,'multimodel/',atomic_recipes_multimodel) - -## Running atomic recipes -for (recipe_file in c(atomic_recipes,atomic_recipes_multimodel)){ - # Read recipe - recipe <- read_atomic_recipe(recipe_file) - if (recipe$Analysis$Datasets$System$name == 'Multimodel'){ - # Load datasets and create multimodel - mm <- Multimodel(recipe) - data <- mm$data - # probabilities <- mm$prob - } else { - - ## Set condition for missing start date in UK-MetOffice-Glosea601 - if (recipe$Analysis$Datasets$System$name == 'UK-MetOffice-Glosea600' && recipe$Analysis$Time$sdate == "0101"){ - recipe$Analysis$Time$hcst_start <- "1994" - } - - # Load datasets - data <- Loading(recipe) - # Change units - data <- Units(recipe, data) - # Compute anomalies - data <- Anomalies(recipe, data) - # Compute percentiles and probability bins - # probabilities <- Probabilities(recipe, data) - } - - # Compute skill metrics - skill_metrics <- Skill(recipe, data) - # Plot data - # Visualization(recipe, data, skill_metrics, probabilities, significance = T) -} - diff --git a/example_scripts/multimodel_seasonal_nadia.R b/example_scripts/multimodel_seasonal_nadia.R deleted file mode 100644 index e6aa85c3..00000000 --- a/example_scripts/multimodel_seasonal_nadia.R +++ /dev/null @@ -1,39 +0,0 @@ - -########################################### -### SEASONAL MULTIMODEL TEST - LAUNCHER ### -########################################### - -# Load modules -source("modules/Loading/Loading.R") -source("modules/Units/Units.R") -source("modules/Calibration/Calibration.R") -source("modules/Anomalies/Anomalies.R") -source("modules/Skill/Skill.R") -source("modules/Saving/Saving.R") -source("modules/Visualization/Visualization.R") -source("modules/Multimodel/Multimodel.R") - -# Read recipe -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -recipe <- read_atomic_recipe(recipe_file) - -if (recipe$Analysis$Datasets$System$name == 'Multimodel') { - # Load datasets and create multimodel - mm <- Multimodel(recipe) - data <- mm$data - # probabilities <- mm$prob -} else { - # Load datasets - data <- Loading(recipe) - # Change units - data <- Units(recipe, data) - # Compute anomalies - data <- Anomalies(recipe, data) - # Compute percentiles and probability bins - # probabilities <- Probabilities(recipe, data) -} -# Compute skill metrics -skill_metrics <- Skill(recipe, data) -# Plot data -# Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 2a1a407a..192e9292 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -1,10 +1,18 @@ # This module load the outputs saved for each individual forecast system # and creates the multimodel ensemble +## TO DO: +## Remove empty folders after cleaning files +## Return probabilities + source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") source('modules/Multimodel/load_multimodel.R') +source('modules/Multimodel/load_multimodel_splitted.R') +source('modules/Multimodel/dims_multimodel.R') +source('modules/Multimodel/build_multimodel.R') +source('modules/Multimodel/clean_multimodel.R') Multimodel <- function(recipe) { @@ -12,173 +20,55 @@ Multimodel <- function(recipe) { # Loading data saved in the jobs for individual models if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ - - # Retrieve data dimension only without loading data - source('modules/Multimodel/load_multimodel_dims.R') - dims <- load_multimodel_dims(recipe) - data_order <- names(dims$dim.hcst) + # Load data splitting by system + data_aux <- load_multimodel_splitted(recipe) - # Create empty array with desired dimensions - data <- list(hcst = NULL, fcst = NULL, obs = NULL) - data$hcst$data <- array(data = NA, dim = dims$dim.hcst) - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- array(data = NA, dim = dims$dim.fcst) - } + data <- data_aux$data - # Loop over system to load hindcast and forecast data - for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ - - recipe_aux <- recipe - system_load <- recipe$Analysis$Datasets$System$models[sys] - recipe_aux$Analysis$Datasets$System$models <- system_load - - data_aux <- load_multimodel(recipe_aux) - - data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) - - if(!is.null(recipe$Analysis$Time$fcst_year)){ - data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) - } - - } # close loop on sys + files_hcst <- data_aux$files_hcst + files_fcst <- data_aux$files_fcst + files_obs <- data_aux$files_obs - # Define obs data - data$obs$data <- data_aux$obs$data - - # Include data attributes - data$hcst$attrs <- data_aux$hcst$attrs - data$fcst$attrs <- data_aux$fcst$attrs - data$obs$attrs <- data_aux$obs$attrs - data$hcst$coords <- data_aux$hcst$coords - data$fcst$coords <- data_aux$fcst$coords - data$obs$coords <- data_aux$obs$coords - - # Remove temporary data_aux rm(data_aux) } else { - # Load data without splitting - data <- load_multimodel(recipe) - - } # close if on split_loading + data <- load_multimodel(recipe) + files_hcst <- data$hcst$attrs$source_files + files_fcst <- data$fcst$attrs$source_files + files_obs <- data$obs$attrs$source_files + } - # Get file names to remove - files_hcst <- data$hcst$attrs$source_files - files_fcst <- data$fcst$attrs$source_files - files_obs <- data$obs$attrs$source_files - - # Creating the multi-model - if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { - - # Deterministic hindcast - data$hcst$data <- CSTools::MergeDims(data = data$hcst$data, - merge_dims = c('model','ensemble'), - rename_dim = 'ensemble', na.rm = TRUE) - data$hcst$dims <- dim(data$hcst$data) - - # Deterministic forecast - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- CSTools::MergeDims(data = data$fcst$data, - merge_dims = c('model','ensemble'), - rename_dim = 'ensemble', na.rm = TRUE) - data$fcst$dims <- dim(data$fcst$data) - } - - # Saving multimodel - if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { - save_forecast(recipe = recipe, - data_cube = data$hcst, - outdir = outdir[var], - type = 'hcst') - if (!is.null(data$fcst)) { - save_forecast(recipe = recipe, - data_cube = data$fcst, - outdir = outdir[var], - type = 'fcst') - } - save_observations(recipe = recipe, - data_cube = data$obs, - outdir = outdir[var]) - } - - # Probabilistic hindcast and forecast - prob <- Probabilities(recipe, data) - - } else if (tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','median')) { - - # Probabilistic hindcast and forecast - warning('Probabilities for multi-model mean is still under development. - If Skill is used, the results are not correct for the probabilitic metrics. - Probabilities cannot be computed because data has an extra dimension (model). - Also, the function should return the observed probabilities. - Maybe it is better to use GetProbs.') - prob <- NULL #Probabilities(recipe, data) - - # Deterministic hindcast - data$hcst$data <- multiApply::Apply(data = data$hcst$data, - target_dims = 'ensemble', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = TRUE, - ncores = recipe$Analysis$ncores)$output1 - data$hcst$data <- multiApply::Apply(data = data$hcst$data, - target_dims = 'model', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = FALSE, - ncores = recipe$Analysis$ncores)$output1 - data$hcst$dims <- s2dv::InsertDim(data = data$hcst$dims, posdim = 6, lendim = 1, name = 'ensemble') - data$hcst$dims <- dim(data$hcst$data) - - # Deterministic forecast - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- multiApply::Apply(data = data$fcst$data, - target_dims = 'ensemble', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = TRUE, - ncores = recipe$Analysis$ncores)$output1 - data$fcst$data <- multiApply::Apply(data = data$fcst$data, - target_dims = 'model', - fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), - na.rm = FALSE, - ncores = recipe$Analysis$ncores)$output1 - data$fcst$dims <- s2dv::InsertDim(data = data$fcst$dims, posdim = 6, lendim = 1, name = 'ensemble') - data$fcst$dims <- dim(data$fcst$data) + # Building the multi-model + multimodel_aux <- build_multimodel(data, recipe) + data <- multimodel_aux$data + prob <- multimodel_aux$prob + rm(multimodel_aux) + + # Saving multimodel + if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { + save_forecast(recipe = recipe, + data_cube = data$hcst, + outdir = NULL, + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(recipe = recipe, + data_cube = data$fcst, + outdir = NULL, + type = 'fcst') } - - } else {stop('Incorrect multi-model approach')} - + save_observations(recipe = recipe, + data_cube = data$obs, + outdir = NULL) + } # Removing the temporary data of models in case user only requests multimodel + clean_multimodel(recipe, + files_hcst, + files_fcst, + files_obs) - # Keep only multimodel data - if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { - - # Remove createFrom files - unlink(files_hcst) - unlink(files_fcst) - unlink(files_obs) - - # Remove Skill and plots file of systems - if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - exp.name <- recipe$Analysis$Datasets$System$models - } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { - exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') - } else {stop('Multimodel not implemented for this horizon.')} - unlink(file.path(recipe$Run$output_dir,"outputs/Skill/*",gsub('\\.','',exp.name)), - recursive = T) - unlink(file.path(recipe$Run$output_dir,"plots/*",gsub('\\.','',exp.name)), - recursive = T) - - # Keep mutlimodel and systems data - } else if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'both' && - recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save == 'none') { - - # Remove createFrom files only - unlink(files_hcst) - unlink(files_fcst) - unlink(files_obs) - - } + return(list(data = data, + prob = prob)) - return(list(data = data, prob = prob)) } diff --git a/modules/Multimodel/build_multimodel.R b/modules/Multimodel/build_multimodel.R new file mode 100644 index 00000000..ae0eff56 --- /dev/null +++ b/modules/Multimodel/build_multimodel.R @@ -0,0 +1,69 @@ + +#TODO: return also the probabilities + +build_multimodel <- function(data, recipe) { + + if (tolower(recipe$Analysis$Datasets$Multimodel$approach) == 'pooled') { + + # Deterministic hindcast + data$hcst$data <- CSTools::MergeDims(data = data$hcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- CSTools::MergeDims(data = data$fcst$data, + merge_dims = c('model','ensemble'), + rename_dim = 'ensemble', na.rm = TRUE) + data$fcst$dims <- dim(data$fcst$data) + } + + # Probabilistic hindcast and forecast + prob <- Probabilities(recipe, data) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% c('mean','median')) { + + # Probabilistic hindcast and forecast + stop('Probabilities for multi-model mean is still under development. + If Skill is used, the results are not correct for the probabilitic metrics. + Probabilities cannot be computed because data has an extra dimension (model). + Also, the function should return the observed probabilities. + Maybe it is better to use GetProbs.') + prob <- NULL #Probabilities(recipe, data) + + # Deterministic hindcast + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$data <- multiApply::Apply(data = data$hcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$hcst$dims <- s2dv::InsertDim(data = data$hcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$hcst$dims <- dim(data$hcst$data) + + # Deterministic forecast + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'ensemble', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = TRUE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$data <- multiApply::Apply(data = data$fcst$data, + target_dims = 'model', + fun = tolower(recipe$Analysis$Datasets$Multimodel$approach), + na.rm = FALSE, + ncores = recipe$Analysis$ncores)$output1 + data$fcst$dims <- s2dv::InsertDim(data = data$fcst$dims, posdim = 6, lendim = 1, name = 'ensemble') + data$fcst$dims <- dim(data$fcst$data) + } + + } else {stop('Incorrect multi-model approach')} + + return(list(data = data, + prob = prob)) +} diff --git a/modules/Multimodel/clean_multimodel.R b/modules/Multimodel/clean_multimodel.R new file mode 100644 index 00000000..816ef0c2 --- /dev/null +++ b/modules/Multimodel/clean_multimodel.R @@ -0,0 +1,42 @@ + +clean_multimodel <- function(recipe, files_hcst, files_fcst, files_obs) { + + # Names of the models + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + exp.name <- recipe$Analysis$Datasets$System$models + } else if (tolower(recipe$Analysis$Horizon) == 'decadal') { + exp.name <- sapply(recipe$Analysis$Datasets$System$models, '[[', 'name') + } else {stop('Multimodel not implemented for this horizon.')} + exp.name <- gsub('\\.','',exp.name) + + # To remove the observations from all models' folders + for (m in exp.name[-1]){ + files_obs <- c(files_obs, gsub(exp.name[1],m,files_obs)) + } + + if (tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% c('true','yes')) { + + # Keep only multimodel data + + # Remove createFrom files + unlink(files_hcst) + unlink(files_fcst) + unlink(files_obs) + + # Remove Skill and plots file of systems + unlink(file.path(recipe$Run$output_dir,"outputs/*",exp.name), + recursive = T) + unlink(file.path(recipe$Run$output_dir,"plots/*",exp.name), + recursive = T) + + } else if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'both' && + recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save == 'none') { + + # Remove createFrom files only + unlink(files_hcst) + unlink(files_fcst) + unlink(files_obs) + + } + +} diff --git a/modules/Multimodel/load_multimodel_dims.R b/modules/Multimodel/dims_multimodel.R similarity index 99% rename from modules/Multimodel/load_multimodel_dims.R rename to modules/Multimodel/dims_multimodel.R index a197e16c..f9076153 100644 --- a/modules/Multimodel/load_multimodel_dims.R +++ b/modules/Multimodel/dims_multimodel.R @@ -3,7 +3,7 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") -load_multimodel_dims <- function(recipe) { +dims_multimodel <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive ref.name <- recipe$Analysis$Datasets$Reference$name diff --git a/modules/Multimodel/load_multimodel_splitted.R b/modules/Multimodel/load_multimodel_splitted.R new file mode 100644 index 00000000..5b9a642f --- /dev/null +++ b/modules/Multimodel/load_multimodel_splitted.R @@ -0,0 +1,58 @@ + +load_multimodel_splitted <- function(recipe){ + + # Retrieve data dimension only without loading data + dims <- dims_multimodel(recipe) + data_order <- names(dims$dim.hcst) + + # Create empty array with desired dimensions + data <- list(hcst = NULL, fcst = NULL, obs = NULL) + data$hcst$data <- array(data = NA, dim = dims$dim.hcst) + if (!is.null(recipe$Analysis$Time$fcst_year)) { + data$fcst$data <- array(data = NA, dim = dims$dim.fcst) + } + files_hcst <- list() + files_fcst <- list() + files_obs <- list() + + + # Loop over system to load hindcast and forecast data + for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ + + recipe_aux <- recipe + system_load <- recipe$Analysis$Datasets$System$models[sys] + recipe_aux$Analysis$Datasets$System$models <- system_load + + data_aux <- load_multimodel(recipe_aux) + + data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) + + if(!is.null(recipe$Analysis$Time$fcst_year)){ + data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) + } + + files_hcst <- append(files_hcst, data_aux$hcst$attrs$source_files, after = length(files_hcst)) + files_fcst <- append(files_fcst, data_aux$fcst$attrs$source_files, after = length(files_fcst)) + files_obs <- append(files_obs, data_aux$obs$attrs$source_files, after = length(files_obs)) + + } # close loop on sys + + # Define obs data + data$obs$data <- data_aux$obs$data + + # Include data attributes + data$hcst$attrs <- data_aux$hcst$attrs + data$fcst$attrs <- data_aux$fcst$attrs + data$obs$attrs <- data_aux$obs$attrs + data$hcst$coords <- data_aux$hcst$coords + data$fcst$coords <- data_aux$fcst$coords + data$obs$coords <- data_aux$obs$coords + + # Remove temporary data_aux + rm(data_aux) + + return(list(data = data, + files_hcst = files_hcst, + files_fcst = files_fcst, + files_obs = files_obs)) +} diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 0dbcd921..527fe387 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -10,127 +10,574 @@ source('modules/Scorecards/R/tmp/SCTransform.R') source('modules/Scorecards/R/tmp/ScorecardsSingle.R') source('modules/Scorecards/R/tmp/ScorecardsMulti.R') source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/SCPlotScorecard.R') +source('modules/Scorecards/R/tmp/VizScorecard.R') +## Temporary for new ESviz function +source('modules/Scorecards/R/tmp/ColorBarContinuous.R') +source('modules/Scorecards/R/tmp/ClimPalette.R') +.IsColor <- s2dv:::.IsColor +.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs -## TODO: Change function name to 'Scorecards'? ## Define function Scorecards <- function(recipe) { - ## set parameters - input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + ## Parameters for loading data files + + # input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp + input.path <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/recipe_multimodel_seasonal_nadia_20240130182422/outputs/' + + skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") + # stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") + skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { - start.months <- 1:12 + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } } + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) + metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + ncores <- 1 # recipe$Analysis$ncores - ## Define skill scores in score aggregation has been requested - - if(metric.aggregation == 'score'){ - if('rps' %in% metrics.load){ - metrics.load <- c(metrics.load, 'rps_clim') - } - if('crps' %in% metrics.load){ - metrics.load <- c(metrics.load, 'crps_clim') - } + if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha } - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - - ## Define skill scores in score aggregation has been requested + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } - if(metric.aggregation == 'score'){ - if('rpss' %in% metrics.visualize){ - metrics.visualize[metrics.visualize == 'rpss'] <- 'rpss_score_aggr' - } - if('crpss' %in% metrics.visualize){ - metrics.visualize[metrics.visualize == 'crpss'] <- 'crpss_score_aggr' - } + if(is.null(recipe$Analysis$remove_NAs)){ + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs } - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + ## Parameters for scorecard layout table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - ncores <- 1 # recipe$Analysis$ncores + legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks + legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - ## Load data files - loaded_metrics <- LoadMetrics(system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - metrics = metrics.load, - start.months = start.months, - forecast.months = forecast.months, - inf_to_na = inf.to.na, - input.path = input.path) + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ + plot.legend <- TRUE + } else { + plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ + columns.width <- 1.2 + } else { + columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width + } + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ + legend.white.space <- 6 + } else { + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ + legend.height <- 50 + } else { + legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height + } - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ + label.scale <- 1.4 + } else { + label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ + round.decimal <- 2 + } else { + round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ + font.size <- 1.1 + } else { + font.size <- recipe$Analysis$Workflow$Scorecards$font_size + } + + ## Define if difference scorecard is to be plotted + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ + calculate.diff <- FALSE + } else { + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + } + + ####### SKILL AGGREGATION ####### + if(metric.aggregation == 'skill'){ + + ## Load data files + loaded_metrics <- LoadMetrics(input_path = skill.input.path, + system = system, + reference = reference, + var = var, + metrics = metrics.visualize, ## metrics.load + period = period, + start_months = start.months, + forecast_months = forecast.months, + calib_method = calib.method, + syear = NULL, + inf_to_na = inf.to.na + ) + + ## Spatial Aggregation of metrics + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + + ### Convert loaded metrics to array for already aggregated data + metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics + forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months + start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months + regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) + + aggregated_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) + + + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } + + ## Add attributes + attributes(aggregated_metrics)$metrics <- metrics.load + attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(aggregated_metrics)$regions <- regions + attributes(aggregated_metrics)$system.name <- names(loaded_metrics) + attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + + + } else { + ## Calculate weighted mean of spatial aggregation + aggregated_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + metric.aggregation = metric.aggregation, + ncores = ncores) + } ## close if on region + metrics_significance <- NULL - ### Convert loaded metrics to array for allready aggregated data - metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) + } ## close if on skill + + ###### SCORE AGGREGATION ###### + if(metric.aggregation == 'score'){ + + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + + ## Define arrays to filled with data + aggregated_metrics <- array(data = NA, + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) + metrics_significance <- array(data = NA, + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } + for (sys in 1:length(system)){ + # sys_num <- which(system == sys) + for (ref in 1:length(reference)){ + # ref_num <- which(refence == ref) + for (met in metrics.visualize) { + + if(met == 'rpss'){ + ## Load data from saved files + rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_syear', + calib_method = calib.method, syear = TRUE) + + rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'rps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + rps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' + names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Save metric result in arrays + aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + ## Load data from saved files + crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_syear', + calib_method = calib.method, syear = TRUE) + + crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'crps_clim_syear', + calib_method = calib.method, syear = TRUE) + + ## Remove dat and var dimensions + crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Spatially aggregate data + crps_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), + lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' + names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' + + ## Temporally aggregate data + crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Save metric result in arrays + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + ## Load data from saved files + cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'cov', + calib_method = calib.method, syear = NULL) + + std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_hcst', + calib_method = calib.method, syear = NULL) + + std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'std_obs', + calib_method = calib.method, syear = NULL) + + + n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'n_eff', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + cov_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = cov, + region = regions[[X]], + lon = as.vector(attributes(cov)$Variables$dat1$longitude), + lat = as.vector(attributes(cov)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' + - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + std_hcst_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_hcst, + region = regions[[X]], + lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), + lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' + + std_obs_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_obs, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' + + n_eff_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = n_eff, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), + lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' + n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Calculate correlation + enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) + + ## Drop unwanted dimensions + enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + ## Calculate significance of corr + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + smonths = length(start.months), + region = length(regions))) + + + for (time in 1:dim(sign_corr)[['time']]){ + for (mon in 1:dim(sign_corr)[['smonths']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE + && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ + sign_corr[time, mon, reg] <- TRUE + } else { + sign_corr[time, mon, reg] <- FALSE + } + + } + } + } - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - metric.aggregation = metric.aggregation, - ncores = ncores) - }## close if + ## Save metric result in arrays + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'mean_bias', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + mean_bias_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = mean_bias, + region = regions[[X]], + lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), + lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + forecast_months = forecast.months, + metrics = 'enssprerr', + calib_method = calib.method, syear = NULL) + + ## Calculate spatial aggregation + enssprerr_spatial_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = enssprerr, + region = regions[[X]], + lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), + lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' + + ## Drop unwanted dimensions + enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + + ## Save metric result in array + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) + + } ## close on enssprerr + + } ## close loop on metric + } ## close if on reference + } ## close if on system + + #Include metric attributes + attributes(aggregated_metrics)$metrics <- metrics.visualize + + ## set NAs to False + metrics_significance[is.na(metrics_significance)] <- FALSE + + } ## close if on score + + + ####### PLOT SCORECARDS ########## ## Create simple scorecard tables ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + sign = metrics_significance, system = system, reference = reference, var = var, @@ -142,9 +589,17 @@ Scorecards <- function(recipe) { metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, col1.width = col1.width, col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, output.path = output.path) ## Create multi system/reference scorecard tables @@ -152,6 +607,7 @@ Scorecards <- function(recipe) { ## Metrics input must be in the same order as function SC_spatial_aggregation if(length(system) > 1 || length(reference) > 1){ scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + sign = metrics_significance, system = system, reference = reference, var = var, @@ -159,10 +615,21 @@ Scorecards <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = attributes(regions)$names, + region.names = names(regions), metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, output.path = output.path) } ## close if -- GitLab From 87a286f161fcadc3d627f357048a421e93ed7daf Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 9 Feb 2024 10:29:55 +0100 Subject: [PATCH 82/91] Fix pipeline (change filenames of decadal saved files) --- tests/testthat/test-decadal_monthly_1.R | 8 ++++---- tests/testthat/test-decadal_monthly_2.R | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 3346e529..7586d3d6 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -251,10 +251,10 @@ test_that("4. Saving", { outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( all(basename(list.files(outputs, recursive = T)) %in% -c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", - "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", - "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", - "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), +c("tas_1991.nc", "tas_1992.nc", "tas_1993.nc", "tas_1994.nc", "tas_2021.nc", + "tas-obs_1991.nc", "tas-obs_1992.nc", "tas-obs_1993.nc", "tas-obs_1994.nc", + "tas-percentiles.nc", "tas-probs_1991.nc", "tas-probs_1992.nc", + "tas-probs_1993.nc", "tas-probs_1994.nc", "tas-probs_2021.nc", "tas-skill.nc")), TRUE ) # open the files and check values/attributes? diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 9adc16b6..23821dbb 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -248,10 +248,10 @@ test_that("4. Saving", { outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( all(basename(list.files(outputs, recursive = T)) %in% -c("tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", - "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", - "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", - "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), +c("tas_1990.nc", "tas_1991.nc", "tas_1992.nc", "tas_2020.nc", "tas_2021.nc", + "tas-obs_1990.nc", "tas-obs_1991.nc", "tas-obs_1992.nc", + "tas-percentiles.nc", "tas-probs_1990.nc", "tas-probs_1991.nc", + "tas-probs_1992.nc", "tas-probs_2020.nc", "tas-probs_2021.nc", "tas-skill.nc")), TRUE ) expect_equal( -- GitLab From 1ed7a5b68806c057bc1e69fe1098317224ccd634 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 14 Feb 2024 10:16:59 +0100 Subject: [PATCH 83/91] Integrate multimodel dependencies in Autosubmit --- autosubmit/auto-multimodel.sh | 18 +++ autosubmit/auto-verification-CERISE.sh | 6 +- autosubmit/auto-verification.sh | 6 +- autosubmit/conf_esarchive/jobs.yml | 15 +- recipes/recipe_multimodel_seasonal.yml | 11 +- split.R | 5 +- tools/check_recipe.R | 203 +++++++++++++------------ tools/divide_recipe.R | 47 ++++-- tools/write_autosubmit_conf.R | 73 ++++++++- 9 files changed, 258 insertions(+), 126 deletions(-) create mode 100644 autosubmit/auto-multimodel.sh diff --git a/autosubmit/auto-multimodel.sh b/autosubmit/auto-multimodel.sh new file mode 100644 index 00000000..a9912666 --- /dev/null +++ b/autosubmit/auto-multimodel.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +############ AUTOSUBMIT INPUTS ############ +proj_dir=%PROJDIR% +outdir=%common.OUTDIR% +script=%common.SCRIPT% +SPLIT=%SPLIT% +############################### + +cd $proj_dir + +source split_to_recipe +# atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/multimodel/atomic_recipe_sys-Multimodel${recipe}.yml + +source MODULES + +Rscript ${script} ${atomic_recipe} diff --git a/autosubmit/auto-verification-CERISE.sh b/autosubmit/auto-verification-CERISE.sh index caf2dd0e..675b41d4 100644 --- a/autosubmit/auto-verification-CERISE.sh +++ b/autosubmit/auto-verification-CERISE.sh @@ -9,8 +9,10 @@ CHUNK=%CHUNK% cd $proj_dir -atomic_recipe_number=$(printf "%02d" $CHUNK) -atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${atomic_recipe_number}.yml +source chunk_to_recipe + +# atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${recipe}.yml ## Workaround to avoid bug in conda activate/source activate when running ## inside bash script diff --git a/autosubmit/auto-verification.sh b/autosubmit/auto-verification.sh index 0089e322..e909dbfb 100644 --- a/autosubmit/auto-verification.sh +++ b/autosubmit/auto-verification.sh @@ -9,8 +9,10 @@ CHUNK=%CHUNK% cd $proj_dir -atomic_recipe_number=$(printf "%02d" $CHUNK) -atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${atomic_recipe_number}.yml +source chunk_to_recipe + +# atomic_recipe_number=$(printf "%02d" $CHUNK) +atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${recipe}.yml source MODULES diff --git a/autosubmit/conf_esarchive/jobs.yml b/autosubmit/conf_esarchive/jobs.yml index a3c8934b..7e2a1948 100644 --- a/autosubmit/conf_esarchive/jobs.yml +++ b/autosubmit/conf_esarchive/jobs.yml @@ -5,7 +5,19 @@ JOBS: WALLCLOCK: NOTIFY_ON: PLATFORM: nord3v2 - PROCESSORS: + PROCESSORS: + # SPLITS: # n_atomic_recipes, number of atomic recipes + multimodel: + FILE: autosubmit/auto-multimodel.sh + RUNNING: once + WALLCLOCK: + NOTIFY_ON: + PLATFORM: nord3v2 + PROCESSORS: + DEPENDENCIES: + verification: + SPLITS_FROM: + SPLITS: # n_atomic_recipes/n_models = n_multimodels scorecards: FILE: autosubmit/auto-scorecards.sh WALLCLOCK: 00:10 @@ -13,4 +25,5 @@ JOBS: NOTIFY_ON: PROCESSORS: 1 DEPENDENCIES: verification + ## TODO: Add scorecards-multimodel with multimodel dependency? diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index 4addb764..b2442d1f 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -64,16 +64,17 @@ Run: filesystem: esarchive output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ # replace with the directory where your code is - autosubmit: no + script: + autosubmit: yes # fill only if using autosubmit auto_conf: - script: /esarchive/scratch/cdelgado/gitlat/SUNSET/main_multimodel_seasonal.R # replace with the path to your script - expid: XXXX # replace with your EXPID - hpc_user: bsc32924 # replace with your hpc username + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc32762 # replace with your hpc username wallclock: 02:00 # hh:mm processors_per_job: 4 platform: nord3v2 email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: carlos.delgado@bsc.es # replace with your email address + email_address: victoria.agudetse@bsc.es # replace with your email address notify_completed: yes # notify me by email when a job finishes notify_failed: yes # notify me by email when a job fails diff --git a/split.R b/split.R index 23607aad..faadafd6 100755 --- a/split.R +++ b/split.R @@ -34,7 +34,10 @@ recipe <- prepare_outputs(recipe_file = arguments$recipe, run_parameters <- divide_recipe(recipe) if (!is.null(recipe$Run$autosubmit) && (recipe$Run$autosubmit)) { - write_autosubmit_conf(recipe, run_parameters$n_atomic_recipes) + write_autosubmit_conf(recipe = recipe, + nchunks = run_parameters$n_atomic_recipes, + chunk_to_recipe = run_parameters$chunk_to_recipe, + split_to_recipe = run_parameters$split_to_recipe) sink(arguments$tmpfile, append = FALSE) # Run with... cat("autosubmit") diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 5061f146..4bea7dab 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -23,21 +23,21 @@ check_recipe <- function(recipe) { if (!("Analysis" %in% names(recipe))) { error(recipe$Run$logger, "The recipe must contain an element called 'Analysis'.") - error_status <- T + error_status <- TRUE } if (!all(PARAMS %in% names(recipe$Analysis))) { error(recipe$Run$logger, paste0("The element 'Analysis' in the recipe must contain all of ", "the following: ", paste(PARAMS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", "following: ", paste(HORIZONS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } # Check time settings if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -48,7 +48,7 @@ check_recipe <- function(recipe) { paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_SEASONAL, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] @@ -57,7 +57,7 @@ check_recipe <- function(recipe) { paste0("The element 'Time' in the recipe must contain all of the ", "following: ", paste(TIME_SETTINGS_DECADAL, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else { archive <- NULL @@ -82,81 +82,94 @@ check_recipe <- function(recipe) { # Check system names if (!is.null(archive)) { if (!all(recipe$Analysis$Datasets$System$name %in% - c(names(archive$System),'Multimodel'))) { + c(names(archive$System), 'Multimodel'))) { error(recipe$Run$logger, "The specified System name was not found in the archive.") - error_status <- T + error_status <- TRUE } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% names(archive$Reference))) { error(recipe$Run$logger, "The specified Reference name was not found in the archive.") - error_status <- T + error_status <- TRUE } } # Check multimodel - if (!is.null(recipe$Analysis$Datasets$Multimodel) && - !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false')){ - if (!tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% - c('yes','true','no','false','both')){ - error(recipe$Run$logger, - paste("The specified execution for the multimodel is not valid.", - "Please specify yes/true, no/false or both.")) - error_status <- T - } - if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% - c('pooled')){ #,'mean','median')){ - error(recipe$Run$logger, - paste("The specified approach for the multimodel is not valid.", - "Please specify pooled.")) #, mean or median.")) - error_status <- T - } - if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% - c('calibration','anomalies','indicators')){ - error(recipe$Run$logger, - paste("The specified 'createFrom' for the multimodel is not valid.", - "Please specify Calibration, Anomalies, Indicators.")) - error_status <- T + if (is.null(recipe$Analysis$Datasets$Multimodel) || + (is.logical(recipe$Analysis$Datasets$Multimodel) && + !(recipe$Analysis$Datasets$Multimodel))) { + recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) + } + if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { + multimodel <- FALSE + } else { + multimodel <- TRUE + } + MULTIMODEL_METHODS <- c("pooled") ## to be added: mean, median... + MULTIMODEL_CREATEFROM <- c("calibration", "anomalies", "indicators") + if (multimodel) { + if (!is.null(recipe$Analysis$Datasets$Multimodel)) { + if (!tolower(recipe$Analysis$Datasets$Multimodel$execute) %in% + c('true', 'false', 'both')) { + error(recipe$Run$logger, + paste("The specified execution for the multimodel is not valid.", + "Please specify yes/true, no/false or 'both'.")) + error_status <- TRUE + } + if (!tolower(recipe$Analysis$Datasets$Multimodel$approach) %in% + MULTIMODEL_METHODS) { + error(recipe$Run$logger, + paste("The specified approach for the multimodel is not valid.", + "Please specify pooled.")) #, mean or median.")) + error_status <- TRUE + } + if (!tolower(recipe$Analysis$Datasets$Multimodel$createFrom) %in% + MULTIMODEL_CREATEFROM) { + error(recipe$Run$logger, + paste("The specified 'createFrom' for the multimodel is not valid.", + "Please specify Calibration, Anomalies, Indicators.")) + error_status <- TRUE + } } } else { - recipe$Analysis$Datasets$Multimodel <- 'no' + recipe$Analysis$Datasets$Multimodel <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { error(recipe$Run$logger, "The element 'ftime_min' must be an integer larger than 0.") - error_status <- T + error_status <- TRUE } if ((!(recipe$Analysis$Time$ftime_max > 0)) || (!is.integer(recipe$Analysis$Time$ftime_max))) { error(recipe$Run$logger, "The element 'ftime_max' must be an integer larger than 0.") - error_status <- T + error_status <- TRUE } if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { error(recipe$Run$logger, "'ftime_max' cannot be smaller than 'ftime_min'.") - error_status <- T + error_status <- TRUE } # Check consistency of hindcast years if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_start > 0))) { error(recipe$Run$logger, "The element 'hcst_start' must be a valid year.") - error_status <- T + error_status <- TRUE } if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_end > 0))) { error(recipe$Run$logger, "The element 'hcst_end' must be a valid year.") - error_status <- T + error_status <- TRUE } if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { error(recipe$Run$logger, "'hcst_end' cannot be smaller than 'hcst_start'.") - error_status <- T + error_status <- TRUE } ## TODO: Is this needed? if (is.null(recipe$Analysis$Time$fcst_year) || @@ -201,18 +214,14 @@ check_recipe <- function(recipe) { if (length(recipe$Analysis$Regrid) != 2) { error(recipe$Run$logger, "The 'Regrid' element must specify the 'method' and 'type'.") - error_status <- T + error_status <- TRUE } - if (!is.null(recipe$Analysis$Datasets$Multimodel) && - !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false')){ - if (recipe$Analysis$Regrid$type == 'to_system' && - tolower(recipe$Analysis$Datasets$Multimodel$execute) - %in% c('both','yes','true')) { + + if (recipe$Analysis$Regrid$type == 'to_system' && multimodel) { error(recipe$Run$logger, paste0("The 'Regrid$type' cannot be 'to_system' if ", "'Multimodel$execute' is yes/true or both.")) - error_status <- T - } + error_status <- TRUE } # TODO: Add Workflow checks? # ... @@ -220,7 +229,7 @@ check_recipe <- function(recipe) { if (length(recipe$Analysis$Horizon) > 1) { error(recipe$Run$logger, "Only one single Horizon can be specified in the recipe") - error_status <- T + error_status <- TRUE } ## TODO: Refine this @@ -252,7 +261,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } if (length(recipe$Analysis$Region) > 1) { @@ -269,7 +278,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } ## TODO: Implement multiple regions # nregions <- length(recipe$Analysis$Region) @@ -280,7 +289,7 @@ check_recipe <- function(recipe) { # paste0("Each region defined in element 'Region' ", # "should have 4 elements: ", # paste(limits, collapse = ", "), ".")) - # error_status <- T + # error_status <- TRUE # } # if (length(recipe$Analysis$Region) > 1) { # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { @@ -307,7 +316,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Calibration$method)) { error(recipe$Run$logger, "The 'Calibration' element 'method' must be specified.") - error_status <- T + error_status <- TRUE } SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only") if ((is.null(recipe$Analysis$Workflow$Calibration$save)) || @@ -316,7 +325,7 @@ check_recipe <- function(recipe) { paste0("Please specify which Calibration module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_CALIB, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } # Anomalies @@ -325,12 +334,12 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, "Parameter 'compute' must be defined under 'Anomalies'.") - error_status <- T + error_status <- TRUE } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { error(recipe$Run$logger, paste("Parameter 'Anomalies:compute' must be a logical value", "(True/False or yes/no).")) - error_status <- T + error_status <- TRUE } else if ((recipe$Analysis$Workflow$Anomalies$compute)) { # Cross-validation check if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) { @@ -338,7 +347,7 @@ check_recipe <- function(recipe) { paste("If anomaly computation is requested, parameter", "'cross_validation' must be defined under 'Anomalies', and it must be a logical value (True/False or yes/no).")) - error_status <- T + error_status <- TRUE } # Saving checks SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") @@ -348,7 +357,7 @@ check_recipe <- function(recipe) { paste0("Please specify which Anomalies module outputs you want ", "to save with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } } @@ -375,21 +384,21 @@ check_recipe <- function(recipe) { paste0("The type of Downscaling request in the recipe is not ", "available. It must be one of the following: ", paste(DOWNSCAL_TYPES, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && (is.null(downscal_params$target_grid))) { error(recipe$Run$logger, paste("A target grid is required for the downscaling method", "requested in the recipe.")) - error_status <- T + error_status <- TRUE } if (downscal_params$type == "int") { if (is.null(downscal_params$int_method)) { error(recipe$Run$logger, paste("Downscaling type 'int' was requested, but no", "interpolation method is provided in the recipe.")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) { @@ -398,32 +407,32 @@ check_recipe <- function(recipe) { paste("Downscaling type", downscal_params$type, "was requested in the recipe, but no", "interpolation method is provided.")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type == "intbc") { if (is.null(downscal_params$bc_method)) { error(recipe$Run$logger, paste("Downscaling type 'intbc' was requested in the recipe, but", "no bias correction method is provided.")) - error_status <- T + error_status <- TRUE } else if (!(downscal_params$bc_method %in% BC_METHODS)) { error(recipe$Run$logger, paste0("The accepted Bias Correction methods for the downscaling", " module are: ", paste(BC_METHODS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type == "intlr") { if (length(downscal_params$lr_method) == 0) { error(recipe$Run$logger, paste("Downscaling type 'intlr' was requested in the recipe, but", "no linear regression method was provided.")) - error_status <- T + error_status <- TRUE } else if (!(downscal_params$lr_method %in% LR_METHODS)) { error(recipe$Run$logger, paste0("The accepted linear regression methods for the", " downscaling module are: ", paste(LR_METHODS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } else if (downscal_params$type == "analogs") { if (is.null(downscal_params$nanalogs)) { @@ -436,19 +445,19 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe, but", "no interpolation method was provided.")) - error_status <- T + error_status <- TRUE } if (is.null(downscal_params$log_reg_method)) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe,", "but no logistic regression method is provided.")) - error_status <- T + error_status <- TRUE } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { error(recipe$Run$logger, paste0("The accepted logistic regression methods for the ", "downscaling module are: ", paste(LOGREG_METHODS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } } @@ -462,12 +471,12 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("Indices uses Anomalies as input, but Anomalies are missing", "in the recipe.")) - error_status <- T + error_status <- TRUE } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, paste0("Indices uses Anomalies as input, but the parameter", "'Anomalies:compute' is set as no/False.")) - error_status <- T + error_status <- TRUE } recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices)) if (!all(recipe_indices %in% indices)) { @@ -475,7 +484,7 @@ check_recipe <- function(recipe) { paste0("Some of the indices under 'Indices' are not available.", "The available Indices are: 'NAO', 'Nino1+2', 'Nino3', ", "'Nino3.4' and 'Nino4'.")) - error_status <- T + error_status <- TRUE } # Check that variables correspond with indices requested if (("nao" %in% recipe_indices) && @@ -484,7 +493,7 @@ check_recipe <- function(recipe) { paste0("It is not possible to compute the NAO with some of the ", "variables requested. To compute the NAO, please make sure", "your recipe requests only psl and/or z500.")) - error_status <- T + error_status <- TRUE } if ((any(nino_indices %in% recipe_indices)) && (!all(recipe_variables %in% c("tos", "sst")))) { @@ -492,7 +501,7 @@ check_recipe <- function(recipe) { paste0("It is not possible to compute El Nino indices with some ", "of the variables requested. To compute El Nino, please ", "make sure your recipe requests only tos.")) - error_status <- T + error_status <- TRUE } } @@ -506,7 +515,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Skill'.") - error_status <- T + error_status <- TRUE } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] @@ -515,7 +524,7 @@ check_recipe <- function(recipe) { paste0("Some of the metrics requested under 'Skill' are not ", "available in SUNSET. Check the documentation to see the ", "full list of accepted skill metrics.")) - error_status <- T + error_status <- TRUE } } # Saving checks @@ -526,7 +535,7 @@ check_recipe <- function(recipe) { paste0("Please specify whether you want to save the Skill metrics ", "with the 'save' parameter. The options are: ", paste(SAVING_OPTIONS_SKILL, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } @@ -535,12 +544,12 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, "Parameter 'percentiles' must be defined under 'Probabilities'.") - error_status <- T + error_status <- TRUE } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, paste("Parameter 'Probabilities:percentiles' expects a list.", "See documentation in the wiki for examples.")) - error_status <- T + error_status <- TRUE } # Saving checks SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") @@ -551,7 +560,7 @@ check_recipe <- function(recipe) { "and probability bins with the 'save' parameter. The ", "options are: ", paste(SAVING_OPTIONS_PROBS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } @@ -563,7 +572,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { error(recipe$Run$logger, "The 'plots' element must be defined under 'Visualization'.") - error_status <- T + error_status <- TRUE } else { plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] @@ -571,7 +580,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("The options available for the plots are: ", paste(PLOT_OPTIONS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } } # Check multi_panel option @@ -584,7 +593,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("Parameter 'Visualization:multi_panel' must be a logical ", "value: either 'yes/True' or 'no/False'")) - error_status <- T + error_status <- TRUE } # Check projection if (is.null(recipe$Analysis$Workflow$Visualization$projection)) { @@ -603,7 +612,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("Parameter Visualization:mask_terciles must be one of: ", "yes/True, no/False, 'both'")) - error_status <- T + error_status <- TRUE } if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { warn(recipe$Run$logger, @@ -622,7 +631,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Scorecards'.") - error_status <- T + error_status <- TRUE } else { sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, ", | |,")[[1]] @@ -630,7 +639,7 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste0("All of the metrics requested under 'Scorecards' must ", "be requested in the 'Skill' section.")) - error_status <- T + error_status <- TRUE } } } @@ -649,28 +658,28 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("Recipe element 'Run' must contain", "all of the following fields:", paste(RUN_FIELDS, collapse=", "), ".")) - error_status <- T + error_status <- TRUE } if (!is.character(recipe$Run$output_dir)) { error(recipe$Run$logger, paste("The Run element 'output_dir' in", recipe$name, "file", "should be a character string indicating the path where", "the outputs should be saved.")) - error_status <- T + error_status <- TRUE } if (!is.character(recipe$Run$code_dir)) { error(recipe$Run$logger, paste("The Run element 'code_dir' in", recipe$name, "file ", "should be a character string indicating the path", "where the code is.")) - error_status <- T + error_status <- TRUE } if (!is.logical(recipe$Run$Terminal)) { error(recipe$Run$logger, paste("The Run element 'Terminal' in", recipe$name, "file ", "should be a boolean value indicating whether or not to", "print the logs in the terminal.")) - error_status <- T + error_status <- TRUE } ## TODO: Review this case, since default value is allowed if (!is.character(recipe$Run$Loglevel) || @@ -679,7 +688,7 @@ check_recipe <- function(recipe) { paste("The Run element 'Loglevel' in", recipe$name, "file", "should be a character string specifying one of the levels available:", paste0(LOG_LEVELS, collapse='/'))) - error_status <- T + error_status <- TRUE } # --------------------------------------------------------------------- @@ -701,22 +710,22 @@ check_recipe <- function(recipe) { if (!("auto_conf" %in% names(recipe$Run))) { error(recipe$Run$logger, "The 'auto_conf' is missing from the 'Run' section of the recipe.") - error_status <- T + error_status <- TRUE } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) { error(recipe$Run$logger, paste0("The element 'Run:auto_conf' must contain all of the ", "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) - error_status <- T + error_status <- TRUE } # Check that the script is not NULL and exists if (is.null(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, "A script must be provided to run the recipe with autosubmit.") - error_status <- T + error_status <- TRUE } else if (!file.exists(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, "Could not find the file for the script in 'auto_conf'.") - error_status <- T + error_status <- TRUE } # Check that the experiment ID exists if (is.null(recipe$Run$auto_conf$expid)) { @@ -728,30 +737,30 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("autosubmit expid -H", auto_specs$platform, "-d ")) - error_status <- T + error_status <- TRUE } else if (!dir.exists(paste0(auto_specs$experiment_dir, recipe$Run$auto_conf$expid))) { error(recipe$Run$logger, paste0("No folder in ", auto_specs$experiment_dir, " for the EXPID", recipe$Run$auto_conf$expid, ". Please make sure it is correct.")) - error_status <- T + error_status <- TRUE } if ((recipe$Run$auto_conf$email_notifications) && (is.null(recipe$Run$auto_conf$email_address))) { error(recipe$Run$logger, "Autosubmit notifications are enabled but email address is empty!") - error_status <- T + error_status <- TRUE } if (is.null(recipe$Run$auto_conf$hpc_user)) { error(recipe$Run$logger, "The 'Run:auto_conf:hpc_user' field can not be empty.") - error_status <- T + error_status <- TRUE } else if ((recipe$Run$filesystem == "esarchive") && (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { error(recipe$Run$logger, "Please check your hpc_user ID. It should look like: 'bsc32xxx'") - error_status <- T + error_status <- TRUE } } diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 214362d2..3b2e6eee 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -171,33 +171,48 @@ divide_recipe <- function(recipe) { } } # Rest of horizons # Save all recipes in separate YAML files + chunk <- 1 + split <- 1 + chunk_to_recipe <- list() + split_to_recipe <- list() + total_models <- length(recipe$Analysis$Datasets$System) for (reci in 1:length(all_recipes)) { - ## TODO: Sort dependencies - # if (reci < 10) { - # recipe_number <- paste0("0", reci) - # } else { - # recipe_number <- reci - # } - recipe_number <- paste0('var-',all_recipes[[reci]]$Analysis$Variables$name, - '_sys-',gsub('\\.', '', all_recipes[[reci]]$Analysis$Datasets$System$name), - '_ref-',all_recipes[[reci]]$Analysis$Datasets$Reference$name, - '_reg-',all_recipes[[reci]]$Analysis$Region$name, - '_sdate-',all_recipes[[reci]]$Analysis$Time$sdate) + ## TODO: Document + recipe_model <- paste0("sys-", + gsub('\\.', '', all_recipes[[reci]]$Analysis$Datasets$System$name)) + # + recipe_split <- paste0("_ref-", all_recipes[[reci]]$Analysis$Datasets$Reference$name, + "_var-", all_recipes[[reci]]$Analysis$Variables$name, + "_reg-", all_recipes[[reci]]$Analysis$Region$name, + "_sdate-", all_recipes[[reci]]$Analysis$Time$sdate) + recipe_name <- paste0(recipe_model, recipe_split) + + if (all_recipes[[reci]]$Analysis$Datasets$System$name == 'Multimodel') { recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/multimodel/") + split_to_recipe[split] <- recipe_split + split <- split + 1 } else { recipe_dir <- paste0(recipe$Run$output_dir, "/logs/recipes/") + chunk_to_recipe[chunk] <- recipe_name + chunk <- chunk + 1 } write_yaml(all_recipes[[reci]], - paste0(recipe_dir, "atomic_recipe_", recipe_number, ".yml")) + paste0(recipe_dir, "atomic_recipe_", recipe_name, ".yml")) } + + # Print information for user info(recipe$Run$logger, - paste("The main recipe has been divided into", length(all_recipes), - "atomic recipes.")) + paste("The main recipe has been divided into", length(chunk_to_recipe), + "single model atomic recipes, plus", length(split_to_recipe), + "multi-model atomic recipes.")) text <- paste0("Check output directory ", recipe$Run$output_dir, "/logs/recipes/ to see all the individual atomic recipes.") info(recipe$Run$logger, text) ## TODO: Change returns? - return(list(n_atomic_recipes = length(all_recipes), - outdir = recipe$Run$output_dir)) + return(list(n_atomic_recipes = length(chunk_to_recipe), # length(all_recipes) + outdir = recipe$Run$output_dir, + chunk_to_recipe = chunk_to_recipe, + split_to_recipe = split_to_recipe)) } + diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index 95ca93f0..0bffaa43 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -1,5 +1,19 @@ -# Function to write autosubmit configuration from an Auto-S2S recipe -write_autosubmit_conf <- function(recipe, nchunks) { +# Function to write autosubmit configuration from an SUNSET recipe. The function +# reads the corresponding AS configuration file templates and fills them with +# the information needed to run the experiment. The modified configuration +# files are saved in the `conf/` folder of the Autosubmit experimet. +# +# recipe: the SUNSET recipe +# nchunks: the number of 'chunks' to be processed by Autosubmit, as returned +# by divide_recipe(). +# chunk_to_recipe: list with the correspondence between the chunk number and +# the name of the atomic recipe, as returned by divide_recipe(). +# split_to_recipe: list with the correspondence between the split number and +# the name of the multi-model atomic recipe, as returned by divide_recipe(). + +write_autosubmit_conf <- function(recipe, nchunks, + chunk_to_recipe, + split_to_recipe) { # Experiment ID expid <- recipe$Run$auto_conf$expid # Directory with the experiment templates @@ -8,6 +22,7 @@ write_autosubmit_conf <- function(recipe, nchunks) { auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]] # Output directory dest_dir <- paste0(auto_specs$experiment_dir, expid, "/conf/") + proj_dir <- paste0(auto_specs$experiment_dir, expid, "/proj/auto-s2s/") # Modify the configuration files according to the info in the recipe for (file in list.files(template_dir)) { conf_type <- strsplit(file, split = "[.]")[[1]][1] @@ -35,6 +50,12 @@ write_autosubmit_conf <- function(recipe, nchunks) { } else if (conf_type == "jobs") { # Section 3: jobs ## wallclock, notify_on, platform?, processors, + # Create bash file to associate chunk number to recipe name + chunk_file <- paste0(proj_dir, "chunk_to_recipe") + .create_bash_file(fileout = chunk_file, + dictionary = chunk_to_recipe, + variable = "CHUNK") + # Define job parameters conf$JOBS$verification$WALLCLOCK <- recipe$Run$auto_conf$wallclock if (recipe$Run$auto_conf$notify_completed) { conf$JOBS$verification$NOTIFY_ON <- paste(conf$JOBS$verification$NOTIFY_ON, @@ -52,6 +73,7 @@ write_autosubmit_conf <- function(recipe, nchunks) { (!recipe$Analysis$Workflow$Scorecards$execute)) { conf$JOBS$scorecards <- NULL } else { + ## TODO: Add multimodel dependency if (recipe$Run$auto_conf$notify_completed) { conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, "COMPLETED") @@ -61,6 +83,41 @@ write_autosubmit_conf <- function(recipe, nchunks) { "FAILED") } } + # Only include Multimodel job if sections exists in the recipe + # is set to execute = 'True' or 'both' + if (!is.null(recipe$Analysis$Datasets$Multimodel) && + tolower(recipe$Analysis$Datasets$Multimodel$execute) == "false") { + conf$JOBS$multimodel <- NULL + } else { + # Create bash file to associate split number to recipe name + split_file <- paste0(proj_dir, "split_to_recipe") + .create_bash_file(fileout = split_file, + dictionary = split_to_recipe, + variable = "SPLIT") + # Define multimodel dependencies in the format required by AS config + mm_dependencies <- lapply(split_to_recipe, grep, chunk_to_recipe) + mm_dependencies <- lapply(mm_dependencies, paste, collapse = ",") + names(mm_dependencies) <- paste(1:length(mm_dependencies)) + for (split in names(mm_dependencies)) { + conf$JOBS$multimodel$DEPENDENCIES$verification$SPLITS_FROM[[split]]$CHUNKS_TO <- + mm_dependencies[[split]] + } + # 'Splits' parameter should be the number of mulimodel jobs + conf$JOBS$multimodel$SPLITS <- length(mm_dependencies) + # Define the rest of the parameters + if (recipe$Run$auto_conf$notify_completed) { + conf$JOBS$multimodel$NOTIFY_ON <- paste(conf$JOBS$multimodel$NOTIFY_ON, + "COMPLETED") + } + if (recipe$Run$auto_conf$notify_failed) { + conf$JOBS$multimodel$NOTIFY_ON <- paste(conf$JOBS$multimodel$NOTIFY_ON, + "FAILED") + } + + conf$JOBS$multimodel$PROCESSORS <- recipe$Run$auto_conf$processors_per_job + conf$JOBS$multimodel$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives + conf$JOBS$multimodel$WALLCLOCK <- recipe$Run$auto_conf$wallclock + } } else if (conf_type == "platforms") { # Section 4: platform configuration ## nord3v2 configuration... platform name? user, processors_per_node @@ -107,3 +164,15 @@ write_autosubmit_conf <- function(recipe, nchunks) { print(paste("nohup autosubmit run", expid, "& disown")) } } + +.create_bash_file <- function(fileout, dictionary, variable) { + file_connection <- file(fileout) + script_lines <- paste0("case $", variable, " in") + for (item in 1:length(dictionary)) { + script_command <- paste0(" ", item, ") recipe='", dictionary[[item]], "' ;;") + script_lines <- c(script_lines, script_command) + } + script_lines <- c(script_lines, "esac") + writeLines(script_lines, file_connection) + close(file_connection) +} -- GitLab From f09d51cc79d15d55eb56d2895591f9d6d4f8af51 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 14 Feb 2024 16:11:39 +0100 Subject: [PATCH 84/91] Update AS version --- conf/autosubmit.yml | 4 ++-- tools/write_autosubmit_conf.R | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/conf/autosubmit.yml b/conf/autosubmit.yml index 4ff15ffd..3e3f1220 100644 --- a/conf/autosubmit.yml +++ b/conf/autosubmit.yml @@ -1,7 +1,7 @@ esarchive: platform: nord3v2 - module_version: autosubmit/4.0.0b-foss-2015a-Python-3.7.3 - auto_version: 4.0.0 + module_version: autosubmit/4.0.98-foss-2015a-Python-3.7.3 + auto_version: 4.0.98 conf_format: yaml experiment_dir: /esarchive/autosubmit/ userID: bsc32 diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index 0bffaa43..a31df62b 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -34,6 +34,7 @@ write_autosubmit_conf <- function(recipe, nchunks, # Section 1: autosubmit.conf ## expid, email notifications and address conf$config$EXPID <- expid + conf$config$AUTOSUBMIT_VERSION <- auto_specs$auto_version if (recipe$Run$auto_conf$email_notifications) { conf$mail$NOTIFICATIONS <- "True" } else { -- GitLab From 2c5839429f16b2a3009c24624ad589db206f5f0f Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 20 Feb 2024 12:10:51 +0100 Subject: [PATCH 85/91] including multimodel split loading example recipe --- .../examples/recipe_multimodel_splitting.yml | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 recipes/examples/recipe_multimodel_splitting.yml diff --git a/recipes/examples/recipe_multimodel_splitting.yml b/recipes/examples/recipe_multimodel_splitting.yml new file mode 100644 index 00000000..091da3e3 --- /dev/null +++ b/recipes/examples/recipe_multimodel_splitting.yml @@ -0,0 +1,67 @@ +Description: + Author: Carlos Delgado Torres + Info: Test for seasonal multi-model + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + Datasets: + System: + - {name: ECMWF-SEAS5} + - {name: CMCC-SPS3.5} + Multimodel: + execute: both # Mandatory: Either both, yes/true or no/false + approach: pooled #mean, median + createFrom: Anomalies + split_loading: TRUE + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' ## MMDD + - '0201' + # fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Spain", latmin: 34, latmax: 44, lonmin: -10, lonmax: 6} + # - {name: "Germany", latmin: 45, latmax: 56, lonmin: 4, lonmax: 17} + Regrid: + method: conservative + type: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss enssprerr + save: 'all' + cross_validation: no + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/nmilders/scorecards_data/multimodel/test/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ + autosubmit: no -- GitLab From 068407a32f6df4a527df61290b69582d331fc1ff Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 20 Feb 2024 16:25:41 +0100 Subject: [PATCH 86/91] Cleaning unnecessary files from cerise and scorecard development branches --- GetProbs.R | 345 ----------- conf/archive.yml | 54 -- conf/grid_description/griddes_system602.txt | 18 - conf/grid_description/griddes_system8.txt | 18 - conf/slurm_templates/run_scorecards.sh | 5 +- datadownloading/dl-era5.bash | 127 ---- datadownloading/dl-seasonal.bash | 126 ---- full_NAO_scorecards.R | 194 ------ full_ecvs_scorecards.R | 268 --------- modules/Loading/Loading.R | 6 +- modules/Loading/R/GRIB/GrbLoad.R | 347 +++++------ modules/Loading/R/load_GRIB.R | 228 ++++---- modules/Scorecards/R/tmp/LoadMetrics.R | 3 +- modules/Scorecards/Scorecards.R | 615 +++----------------- modules/Scorecards/execute_scorecards.R | 20 +- modules/Skill/R/CRPS_clim.R | 1 - modules/Skill/R/RPS_clim.R | 24 +- recipe_NAO_scorecards.yml | 60 -- recipe_ecvs_scorecards_seasonal.yml | 107 ---- recipe_tas_scorecards_seasonal.yml | 91 --- 20 files changed, 369 insertions(+), 2288 deletions(-) delete mode 100644 GetProbs.R delete mode 100644 conf/grid_description/griddes_system602.txt delete mode 100644 conf/grid_description/griddes_system8.txt delete mode 100755 datadownloading/dl-era5.bash delete mode 100755 datadownloading/dl-seasonal.bash delete mode 100644 full_NAO_scorecards.R delete mode 100644 full_ecvs_scorecards.R delete mode 100644 recipe_NAO_scorecards.yml delete mode 100644 recipe_ecvs_scorecards_seasonal.yml delete mode 100644 recipe_tas_scorecards_seasonal.yml diff --git a/GetProbs.R b/GetProbs.R deleted file mode 100644 index 9509a601..00000000 --- a/GetProbs.R +++ /dev/null @@ -1,345 +0,0 @@ -#'Compute probabilistic forecasts or the corresponding observations -#' -#'Compute probabilistic forecasts from an ensemble based on the relative -#'thresholds, or the probabilistic observations (i.e., which probabilistic -#'category was observed). A reference period can be specified to calculate the -#'absolute thresholds between each probabilistic category. The absolute -#'thresholds can be computed in cross-validation mode. If data is an ensemble, -#'the probabilities are calculated as the percentage of members that fall into -#'each category. For observations (or forecast without member dimension), 1 -#'means that the event happened, while 0 indicates that the event did not -#'happen. Weighted probabilities can be computed if the weights are provided for -#'each ensemble member and time step. The absolute thresholds can also be -#'provided directly for probabilities calculation. -#' -#'@param data A named numerical array of the forecasts or observations with, at -#' least, time dimension. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast, or NULL if there is no member -#' dimension (e.g., for observations, or for forecast with only one ensemble -#' member). The default value is 'member'. -#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to -#' 1) between the categories. The default value is c(1/3, 2/3), which -#' corresponds to tercile equiprobable categories. -#'@param abs_thresholds A numeric array or vector of the absolute thresholds in -#' the same units as \code{data}. If an array is provided, it should have at -#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different -#' thresholds for different locations, i.e. lon and lat dimensions), they -#' should match the dimensions of \code{data}, except the member dimension -#' which should not be included. The default value is NULL and, in this case, -#' 'prob_thresholds' is used for calculating the probabilities. -#'@param bin_dim_abs A character string of the dimension name of -#' 'abs_thresholds' array in which category limits are stored. It will also be -#' the probabilistic category dimension name in the output. The default value -#' is 'bin'. -#'@param indices_for_quantiles A vector of the indices to be taken along -#' 'time_dim' for computing the absolute thresholds between the probabilistic -#' categories. If NULL (default), the whole period is used. It is only used -#' when 'prob_thresholds' is provided. -#'@param weights A named numerical array of the weights for 'data' with -#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value -#' is NULL. The ensemble should have at least 70 members or span at least 10 -#' time steps and have more than 45 members if consistency between the weighted -#' and unweighted methodologies is desired. -#'@param cross.val A logical indicating whether to compute the thresholds -#' between probabilistic categories in cross-validation mode. The default value -#' is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest -#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of -#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. -#' -#'@examples -#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) -#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', -#' indices_for_quantiles = 4:17) -#' -#'# abs_thresholds is provided -#'abs_thr1 <- c(-0.2, 0.3) -#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) -#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', -#' prob_thresholds = NULL, abs_thresholds = abs_thr1) -#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', -#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') -#' -#'@import multiApply -#'@importFrom easyVerification convert2prob -#'@export -GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', - indices_for_quantiles = NULL, - prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, - bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { - - # Check inputs - ## data - if (is.null(data)) { - stop("Parameter 'data' cannot be NULL.") - } - if (!is.numeric(data)) { - stop("Parameter 'data' must be a numeric array.") - } - if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { - stop("Parameter 'data' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(data))) { - stop("Parameter 'time_dim' is not found in 'data' dimensions.") - } - ## memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(data))) { - stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", - "dimension exists, set it as NULL.") - } - } - ## bin_dim_abs - if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { - stop('Parameter "bin_dim_abs" must be a character string.') - } - ## prob_thresholds, abs_thresholds - if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { - .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", - "Only the first one is used.")) - abs_thresholds <- NULL - } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { - stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") - } - if (!is.null(prob_thresholds)) { - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") - } - ## indices_for_quantiles - if (is.null(indices_for_quantiles)) { - indices_for_quantiles <- 1:dim(data)[time_dim] - } else { - if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { - stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") - } else if (length(indices_for_quantiles) > dim(data)[time_dim] | - max(indices_for_quantiles) > dim(data)[time_dim] | - any(indices_for_quantiles < 1)) { - stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") - } - } - - } else { # abs_thresholds - - if (is.null(dim(abs_thresholds))) { # a vector - dim(abs_thresholds) <- length(abs_thresholds) - names(dim(abs_thresholds)) <- bin_dim_abs - } - # bin_dim_abs - if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { - stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") - } - if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { - stop("Parameter abs_thresholds' cannot have member dimension.") - } - dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] - if (any(!dim_name_abs %in% names(dim(data)))) { - stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") - } else { - if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { - stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") - } - } - if (!is.null(indices_for_quantiles)) { - warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") - } - abs_target_dims <- bin_dim_abs - if (time_dim %in% names(dim(abs_thresholds))) { - abs_target_dims <- c(bin_dim_abs, time_dim) - } - - } - - ## weights - if (!is.null(weights)) { - if (!is.array(weights) | !is.numeric(weights)) - stop("Parameter 'weights' must be a named numeric array.") - -# if (is.null(dat_dim)) { - if (!is.null(memb_dim)) { - lendim_weights <- 2 - namesdim_weights <- c(time_dim, memb_dim) - } else { - lendim_weights <- 1 - namesdim_weights <- c(time_dim) - } - if (length(dim(weights)) != lendim_weights | - any(!names(dim(weights)) %in% namesdim_weights)) { - stop(paste0("Parameter 'weights' must have dimension ", - paste0(namesdim_weights, collapse = ' and '), ".")) - } - if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { - stop(paste0("Parameter 'weights' must have the same dimension length as ", - paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) - } - weights <- Reorder(weights, namesdim_weights) - -# } else { -# if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) -# stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") -# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | -# dim(weights)[time_dim] != dim(exp)[time_dim] | -# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { -# stop(paste0("Parameter 'weights' must have the same dimension lengths ", -# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) -# } -# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) -# } - } - ## cross.val - if (!is.logical(cross.val) | length(cross.val) > 1) { - stop("Parameter 'cross.val' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - if (is.null(abs_thresholds)) { - res <- Apply(data = list(data = data), - target_dims = c(time_dim, memb_dim), - output_dims = c(bin_dim_abs, time_dim), - fun = .GetProbs, - prob_thresholds = prob_thresholds, - indices_for_quantiles = indices_for_quantiles, - weights = weights, cross.val = cross.val, ncores = ncores)$output1 - } else { - res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), - target_dims = list(c(time_dim, memb_dim), abs_target_dims), - output_dims = c(bin_dim_abs, time_dim), - fun = .GetProbs, - prob_thresholds = NULL, - indices_for_quantiles = NULL, - weights = NULL, cross.val = FALSE, ncores = ncores)$output1 - } - - return(res) -} - -.GetProbs <- function(data, indices_for_quantiles, - prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, - weights = NULL, cross.val = FALSE) { - # .GetProbs() is used in RPS, RPSS, ROCSS - # data - ## if data is exp: [sdate, memb] - ## if data is obs: [sdate, (memb)] - # weights: [sdate, (memb)], same as data - # if abs_thresholds is not NULL: [bin, (sdate)] - - # Add dim [memb = 1] to data if it doesn't have memb_dim - if (length(dim(data)) == 1) { - dim(data) <- c(dim(data), 1) - if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) - } - - # Calculate absolute thresholds - if (is.null(abs_thresholds)) { - if (cross.val) { - quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) - for (i_time in 1:dim(data)[1]) { - if (is.null(weights)) { - quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], - weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y - } - } - - } else { - if (is.null(weights)) { - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], - weights[indices_for_quantiles, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y - } - quantiles <- array(rep(quantiles, dim(data)[1]), - dim = c(bin = length(quantiles), dim(data)[1])) - } - - } else { # abs_thresholds provided - quantiles <- abs_thresholds - if (length(dim(quantiles)) == 1) { - quantiles <- InsertDim(quantiles, len = dim(data)[1], - pos = 2, name = names(dim(data))[1]) - } - } - # quantiles: [bin-1, sdate] - - # Probabilities - probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] - for (i_time in 1:dim(data)[1]) { - if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) - } else { - if (is.null(weights)) { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], - threshold = quantiles[, i_time])) - } else { - sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - # find any quantiles that are outside the data range - integrated_probs <- array(dim = dim(quantiles)) - for (i_quant in 1:dim(quantiles)[1]) { - # for thresholds falling under the distribution - if (quantiles[i_quant, i_time] < min(sorted_data)) { - integrated_probs[i_quant, i_time] <- 0 - # for thresholds falling over the distribution - } else if (max(sorted_data) < quantiles[i_quant, i_time]) { - integrated_probs[i_quant, i_time] <- 1 - } else { - integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, - quantiles[i_quant, i_time], "linear")$y - } - } - probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) - if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { - stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) - } - } - } - } - - return(probs) -} - -.sorted_distributions <- function(data_vector, weights_vector) { - weights_vector <- as.vector(weights_vector) - data_vector <- as.vector(data_vector) - weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 - sorter <- order(data_vector) - sorted_weights <- weights_vector[sorter] - cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights - cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 - cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 - return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) -} - - diff --git a/conf/archive.yml b/conf/archive.yml index cdb57e9a..61f62be2 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,57 +1,3 @@ -cerise: - src: "/ec/res4/scratch/cyce/cerise/data/esarchive/" - System: - CMCC-SPS3.5: - name: "CMCC-SPS3.5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "exp/cmcc/system35/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", - "sfcWind":"monthly_mean/sfcWind_f6h/", "tos":"monthly_mean/tos_f6h", - "tasmin":"monthly_mean/tasmin_f24h/", "tasmax":"monthly_mean/tasmax_f24h/", - "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} - nmember: - hcst: 40 - calendar: "proleptic_gregorian" - time_stamp_lag: "+1" - reference_grid: "conf/grid_description/griddes_system35c3s.txt" - Meteo-France-System8: - name: "Meteo-France System 8" - institution: "Meteo-France" - src: "exp/meteo_france/system8/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tos":"monthly_mean/tos_f6h", - "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", - "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} - nmember: - hcst: 25 - time_stamp_lag: "+1" - calendar: "proleptic_gregorian" - reference_grid: "conf/grid_description/griddes_system8.txt" - UKMO-System602: - name: "UKMO-S602" - institution: "UK MetOffice" - src: "exp/ukmo/system602/" - monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tos":"monthly_mean/tos_f6h", - "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", - "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", - "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} - nmember: - hcst: 28 - time_stamp_lag: "+1" - calendar: "proleptic_gregorian" - reference_grid: "conf/grid_description/griddes_system602.txt" - Reference: - ERA5: - name: "ERA5" - institution: "European Centre for Medium-Range Weather Forecasts" - src: "recon/ecmwf/era5/" - monthly_mean: {"tas":"monthly_mean/tas_f1h/", "prlr":"monthly_mean/prlr_f1h/", - "psl":"monthly_mean/psl_f1h/", "sfcWind":"monthly_mean/sfcWind_f1h/", - "tos":"monthly_mean/tos_f1h/", - "tasmax":"monthly_mean/tasmax_f1h/", - "tasmin":"monthly_mean/tasmin_f1h/", "tdps":"monthly_mean/tdps_f1h/"} - calendar: "standard" - reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" esarchive: src: "/esarchive/" System: diff --git a/conf/grid_description/griddes_system602.txt b/conf/grid_description/griddes_system602.txt deleted file mode 100644 index a024e8d5..00000000 --- a/conf/grid_description/griddes_system602.txt +++ /dev/null @@ -1,18 +0,0 @@ -# -# gridID 1 -# -gridtype = lonlat -gridsize = 64800 -xsize = 360 -ysize = 180 -xname = lon -xlongname = "longitude" -xunits = "degrees_east" -yname = lat -ylongname = "latitude" -yunits = "degrees_north" -xfirst = 0.5 -xinc = 1 -yfirst = 89.5 -yinc = -1 - diff --git a/conf/grid_description/griddes_system8.txt b/conf/grid_description/griddes_system8.txt deleted file mode 100644 index a024e8d5..00000000 --- a/conf/grid_description/griddes_system8.txt +++ /dev/null @@ -1,18 +0,0 @@ -# -# gridID 1 -# -gridtype = lonlat -gridsize = 64800 -xsize = 360 -ysize = 180 -xname = lon -xlongname = "longitude" -xunits = "degrees_east" -yname = lat -ylongname = "latitude" -yunits = "degrees_north" -xfirst = 0.5 -xinc = 1 -yfirst = 89.5 -yinc = -1 - diff --git a/conf/slurm_templates/run_scorecards.sh b/conf/slurm_templates/run_scorecards.sh index 9a2b5c82..9abcac17 100644 --- a/conf/slurm_templates/run_scorecards.sh +++ b/conf/slurm_templates/run_scorecards.sh @@ -14,10 +14,7 @@ set -vx recipe=$1 outdir=$2 -#source MODULES -module load conda/22.11.1-2 -conda activate condaCerise -export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib +source MODULES # Execute scorecards Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} diff --git a/datadownloading/dl-era5.bash b/datadownloading/dl-era5.bash deleted file mode 100755 index f6b33f86..00000000 --- a/datadownloading/dl-era5.bash +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/bash - - - -#module load ecmwfapi -module load conda/22.11.1-2 -conda activate cdsapi -#export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib - -set -v - - -#workdir=$1 -#model=$2 -# freq=$3 -freq="mon" -workdir=$SCRATCH/cerise/data/ - - if [[ $freq == "mon" ]]; then - freq_dir="monthly_mean" - suffix="_f1h" - else - freq_dir="1hourly" - suffix="" - fi - - mkdir -p $workdir/esarchive/recon/ecmwf/era5/$freq_dir - cd $workdir/esarchive/recon/ecmwf/era5/$freq_dir - - for year in $( seq 1979 2023); do - for mon in $( seq -w 1 12); do - - if [[ $freq == "mon" ]]; then - - for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do - case $var in - "sfcWind") code="10m_wind_speed";; - "tas") code="2m_temperature";; - "tos") code="sea_surface_temperature";; - "psl") code="mean_sea_level_pressure";; - "prlr") code="total_precipitation";; - "tdps") code="2m_dewpoint_temperature";; - esac - - mkdir -p ${var}${suffix} - - if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then -python << EOF -import cdsapi - -c = cdsapi.Client() - -c.retrieve( - 'reanalysis-era5-single-levels-monthly-means', - { - 'format': 'grib', - 'product_type': 'monthly_averaged_reanalysis', - 'variable': '$code', - 'year': '$year', - 'month': '$mon', - 'time': '00:00', - }, - '${var}${suffix}/${var}_${year}${mon}.grib') -EOF - fi #file exists - done #var - else #freq 6h - for var in uas vas tas tos psl prlr tasmin tasmax tdps; do - case $var in - "sfcWind") code="10m_wind_speed";; - "tas") code="2m_temperature";; - "uas") code="10m_u_component_of_wind";; - "vas") code="10m_v_component_of_wind";; - "tos") code="sea_surface_temperature";; - "psl") code="mean_sea_level_pressure";; - "prlr") code="total_precipitation";; - "tdps") code="2m_dewpoint_temperature";; - esac - - mkdir -p ${var}${suffix} - if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then -python << EOF - -import cdsapi - -c = cdsapi.Client() - -c.retrieve( - 'reanalysis-era5-single-levels', - { - 'product_type': 'reanalysis', - 'format': 'grib', - 'variable': $code, - 'time': [ - '00:00', '01:00', '02:00', - '03:00', '04:00', '05:00', - '06:00', '07:00', '08:00', - '09:00', '10:00', '11:00', - '12:00', '13:00', '14:00', - '15:00', '16:00', '17:00', - '18:00', '19:00', '20:00', - '21:00', '22:00', '23:00', - ], - 'day': [ - '01', '02', '03', - '04', '05', '06', - '07', '08', '09', - '10', '11', '12', - '13', '14', '15', - '16', '17', '18', - '19', '20', '21', - '22', '23', '24', - '25', '26', '27', - '28', '29', '30', - ], - 'year': '$year', - 'month': '$mon', - }, - '${var}${suffix}/${var}_${year}${mon}.grb') - -EOF - - fi #file exists - done #var - fi #freq - done #year - done #mon diff --git a/datadownloading/dl-seasonal.bash b/datadownloading/dl-seasonal.bash deleted file mode 100755 index 3efea020..00000000 --- a/datadownloading/dl-seasonal.bash +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash - - - -#module load ecmwfapi -module load conda/22.11.1-2 -conda activate condaCerise -export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib - -set -v - - -#workdir=$1 -#model=$2 -# freq=$3 -freq="mon" -workdir=$SCRATCH/cerise/data/ - - -for model in ecmwf cmcc meteo_france dwd ukmo; do -#model="ecmwf" #cmcc, meteo_france, dwd, cmcc, ukmo - - case $model in - - "ecmwf") origin="ecmf" ; system="51";; - "meteo_france") origin="lfpw" ; system="8";; - "dwd") origin="edzw" ; system="21";; - "cmcc") origin="cmcc" ; system="35" ;; - "ukmo") origin="egrr" ; system="602" ;; - esac - - - if [[ $freq == "mon" ]]; then - freq_dir="monthly_mean" - suffix="_f6h" - else - freq_dir="6hourly" - suffix="" - fi - - mkdir -p $workdir/esarchive/exp/$model/system$system/$freq_dir - cd $workdir/esarchive/exp/$model/system$system/$freq_dir - - for year in $( seq 1979 2023); do - for mon in $( seq -w 1 12); do - - if [[ $freq == "mon" ]]; then - - for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do - case $var in - "sfcWind") code="207.128";; - "tas") code="167.128";; - "tos") code="34.128";; - "psl") code="151.128";; - "prlr") code="228.172";; - "tasmin") code="52.128";; - "tasmax") code="51.128";; - "tdps") code="168.128";; - esac - - mkdir -p ${var}${suffix} - - if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then -mars << EOF -retrieve, -class=c3, -date=$year-$mon-01, -expver=1, -fcmonth=1/2/3/4/5/6, -levtype=sfc, -method=1, -number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, -origin=$origin, -param=$code, -stream=msmm, -system=$system, -time=00:00:00, -type=fcmean, -expect=any, -target='${var}${suffix}/${var}_${year}${mon}01.grb' - -EOF - cdo shifttime,-15days ${var}${suffix}/${var}_${year}${mon}01.grb ${var}${suffix}/${var}_${year}${mon}01.grb2 - mv ${var}${suffix}/${var}_${year}${mon}01.grb2 ${var}${suffix}/${var}_${year}${mon}01.grb - fi #file exists - done #var - else #freq 6h - for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do - case $var in - "sfcWind") code="207.128";; - "tas") code="167.128";; - "tos") code="34.128";; - "psl") code="151.128";; - "prlr") code="228.172";; - "tasmin") code="52.128";; - "tasmax") code="51.128";; - "tdps") code="168.128";; - esac - - mkdir -p ${var}${suffix} - if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then -mars << EOF -retrieve, -class=c3, -date=$year-$mon-01 -expver=1, -step=1/to/5160/by/6, -levtype=sfc, -method=1, -number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, -origin=$origin, -param=$code, -stream=mmsf, -system=$system, -time=00:00:00, -type=fc, -expect=any, -target="${var}${suffix}/${var}_${year}${mon}01.grb" -EOF - - fi #file exists - done #var - fi #freq - done #year - done #mon -done #model diff --git a/full_NAO_scorecards.R b/full_NAO_scorecards.R deleted file mode 100644 index 3e24c6ad..00000000 --- a/full_NAO_scorecards.R +++ /dev/null @@ -1,194 +0,0 @@ - -source("modules/Loading/Loading.R") -#source("modules/Units/Units.R") -source("modules/Saving/Saving.R") -source("modules/Visualization/Visualization.R") -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -#recipe_file <- "recipe_NAO_scorecards.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) -# Load datasets -data <- Loading(recipe) -#data <- Units(recipe, data) -# Full-cross-val workflow -sdate_dim <- dim(data$hcst$data)['syear'] -cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) -# Paralelized: -loops <- array(1:length(cross), c(loop = length(cross))) - - source("/esarchive/scratch/nperez/git/s2dv/R/NAO.R") - source("/esarchive/scratch/nperez/git/s2dv/R/Utils.R") - source("/esarchive/scratch/nperez/git/s2dv/R/EOF.R") - source("/esarchive/scratch/nperez/git/s2dv/R/ProjectField.R") - - -res <- Apply(list(loops), target = NULL, - fun = function(t) { - # subset years: - # training - obs_tr <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$train.dexes) - hcst_tr <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$train.dexes) - # eval years - hcst_ev <- Subset(data$hcst$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - obs_ev <- Subset(data$obs$data, along = 'syear', - indices = cross[[t]]$eval.dexes) - # compute climatology: - clim_obs_tr <- MeanDims(obs_tr, 'syear') - clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) - # compute anomalies: - ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr) - ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr) - ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr) - ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr) - # compute NAO: - nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, - ftime_avg = NULL, time_dim = 'syear', - memb_dim = 'ensemble', - space_dim = c('latitude', 'longitude'), - ftime_dim = 'time', - lat = data$obs$attrs$Variable$metadata$lat, - lon = data$obs$attrs$Variable$metadata$lon) - - nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_obs_ev, - ftime_avg = NULL, time_dim = 'syear', - memb_dim = 'ensemble', - space_dim = c('latitude', 'longitude'), - ftime_dim = 'time', - lat = data$obs$attrs$Variable$metadata$lat, - lon = data$obs$attrs$Variable$metadata$lon)$exp_cor - #Standarisation: - # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? - nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), - target_dims = c('syear', 'ensemble'), - fun = function(x, y) { - sd <- sqrt(var(as.vector(x), na.rm = TRUE)) - means <- mean(as.vector(x), na.rm = TRUE) - res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, - ncores = 1)$output1 - nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), - target_dims = list('syear', c('syear','ensemble')), - fun = function(x, y) { - sd <- sqrt(var(as.vector(x), na.rm = TRUE)) - means <- mean(as.vector(x), na.rm = TRUE) - res <- apply(y, c(1,2), - function(z) {(z-means)/sd})}, - ncores = 1)$output1 - nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', - fun = function(x) { - sd <- sqrt(var(as.vector(x), na.rm = TRUE)) - means <- mean(as.vector(x), na.rm = TRUE) - res <- apply(x, 1, - function(z) {(z-means)/sd})}, - ncores = 1, output_dims = 'syear')$output1 - nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), - fun = function(x) { - sd <- sqrt(var(as.vector(x), na.rm = TRUE)) - means <- mean(as.vector(x), na.rm = TRUE) - res <- apply(x, c(1,2), function (z) {(z-means)/sd})}, ncores = 1)$output1 - #Category limits - lims_nao_hcst_tr <- Apply(nao_hcst_tr, target_dims = c('syear', 'ensemble'), - fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs')$output1 - lims_nao_obs_tr <- Apply(nao_obs_tr, target_dims = 'syear', - fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs')$output1 - - return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, - #ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, - #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, - nao_hcst_tr = nao_hcst_tr, nao_hcst_ev = nao_hcst_ev, - nao_obs_ev = nao_obs_ev, nao_obs_tr = nao_obs_tr, - lims_nao_hcst_tr = lims_nao_hcst_tr, lims_nao_obs_tr = lims_nao_obs_tr)) - }, ncores = recipe$Analysis$ncores) -# RPS -source("/esarchive/scratch/nperez/git2/s2dv/R/GetProbs.R") -nao_hcst_probs_ev <- GetProbs(res$nao_hcst_ev, time_dim = 'loop', - prob_thresholds = NULL, - prob_dim = 'probs', indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_ano_hcst_tr, - ncores = recipe$Analysis$ncores) -nao_obs_probs_ev <- GetProbs(res$nao_obs_ev, time_dim = 'loop', - prob_thresholds = NULL, - prob_dim = 'probs', indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_ano_obs_tr, - ncores = recipe$Analysis$ncores) -rps <- RPS(exp = nao_hcst_probs_ev, obs = nao_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'loop', - ncores = recipe$Analysis$ncores) -# RPSS -rpss <- RPSS(exp = nao_hcst_probs_ev, obs = nao_obs_probs_ev, - time_dim = 'loop', memb_dim = NULL, - cat_dim = 'probs', - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) -# CRPS -crps <- CRPS(exp = res$nao_hcst_ev, obs = res$nao_obs_ev, - time_dim = 'loop', memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) -# CRPSS -crpss <- CRPSS(exp = res$nao_hcst_ev, obs = res$nao_obs_ev, - memb_dim = 'ensemble', - time_dim = 'loop', clim.cross.val = TRUE, - ncores = recipe$Analysis$ncores) - -# Corr -enscorr <- s2dv::Corr(res$nao_hcst_ev, res$nao_obs_ev, - dat_dim = 'dat', - time_dim = 'loop', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) -# Spread error ratio -obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, - drop = "selected") -enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = res$ano_hcst_ev, - obs = obs_noensdim, - tdim = which(names(dim(res$ano_hcst_ev))=='loop'), - ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), - na.rm = TRUE, - ncpus = recipe$Analysis$ncores) - -skill_metrics <- list(mean_bias = mean_bias, enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, enssprerr = enssprerr, - #rps = rps, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") -# Compute save metrics -source("modules/Saving/Saving.R") -Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) -recipe$Run$output_dir <- original - -source("modules/Visualization/Visualization.R") -#PlotEquiMap(mean_bias[1,1,1,1,1,,c(182:360, 1:181)], -# lon = -179:180, -# lat = data$hcst$attrs$Variable$metadata$latitude, filled.co = F, -# fileout = "/esarchive/scratch/nperez/test.png") -skill_metrics <- lapply(skill_metrics, function(x) { - Subset(x, along = 'longitude', indices = c(182:360, 1:181)) - }) -data$hcst$coords$longitude <- -179:180 -Visualization(recipe, data, skill_metrics, significance = TRUE) diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R deleted file mode 100644 index fec1a53d..00000000 --- a/full_ecvs_scorecards.R +++ /dev/null @@ -1,268 +0,0 @@ - -source("modules/Loading/Loading.R") -#source("modules/Units/Units.R") -source("modules/Saving/Saving.R") -source("modules/Units/Units.R") -source("modules/Visualization/Visualization.R") -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -#recipe_file <- "recipe_tas_scorecards_seasonal.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) -# Load datasets -data <- Loading(recipe) -data <- Units(recipe, data) -# Full-cross-val workflow -sdate_dim <- dim(data$hcst$data)['syear'] -nmemb <- dim(data$hcst$data)['ensemble'] -nftime <- dim(data$hcst$data)['time'] -nlats <- dim(data$hcst$data)['latitude'] -nlons <- dim(data$hcst$data)['longitude'] - -info(recipe$Run$logger, - paste("ftime", nftime)) -info(recipe$Run$logger, - paste(dim(data$obs$data))) - - -cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) -# Paralelized: -#loops <- array(1:length(cross), c(loop = length(cross))) -#res <- Apply(list(loops), target = NULL, -# fun = function(t) { -ano_hcst_ev_res <- array(NA, c(nftime, nlats, - nlons, nmemb, - sdate_dim)) -ano_obs_ev_res <- array(NA, c(nftime, nlats, - nlons, ensemble = 1, sdate_dim)) -ano_obs_tr_res <- array(NA, c(sample = sdate_dim - 1, nftime, - nlats, nlons, ensemble = 1, sdate_dim)) -lims_ano_hcst_tr_res <- array(NA, c(probs = 2, nftime, nlats, - nlons, sdate_dim)) -lims_ano_obs_tr_res <- array(NA, c(probs = 2, nftime, nlats, - nlons, sdate_dim)) -info(recipe$Run$logger, - paste(dim(data$hcst$data))) -info(recipe$Run$logger, - paste(names(dim(data$hcst$data)))) - - - -#crps_clim_res <- array(NA, c(nftime, nlats, nlons, ensemble = 1, sdate_dim)) -for (t in 1:sdate_dim) { - info(recipe$Run$logger, - paste("crossval:", t)) - - # subset years: Subset works at BSC not at Athos - # training - obs_tr <- data$obs$data[1,1,1,1,cross[[t]]$train.dexes,,,,] - #Subset(data$obs$data, along = 'syear', - # indices = cross[[t]]$train.dexes) - hcst_tr <- data$hcst$data[1,1,1,1,cross[[t]]$train.dexes,,,,] - #Subset(data$hcst$data, along = 'syear', - # indices = cross[[t]]$train.dexes) - # eval years - hcst_ev <- data$hcst$data[1,1,1,1,cross[[t]]$eval.dexes,,,,] - #Subset(data$hcst$data, along = 'syear', - # indices = cross[[t]]$eval.dexes) - obs_ev <- data$obs$data[1,1,1,1,cross[[t]]$eval.dexes,,,,] - #Subset(data$obs$data, along = 'syear', - # indices = cross[[t]]$eval.dexes) - info(recipe$Run$logger, - paste(names(dim(hcst_ev)))) - info(recipe$Run$logger, - paste(dim(hcst_ev))) - - info(recipe$Run$logger, - paste(names(dim(obs_tr)))) - info(recipe$Run$logger, - paste(dim(obs_tr))) - - dim(obs_tr) <- c(syear = (as.numeric(sdate_dim) - 1), nftime, nlats, nlons) - dim(obs_ev) <- c(syear = 1, nftime, nlats, nlons) - dim(hcst_tr) <- c(syear = (as.numeric(sdate_dim) - 1), nftime, nlats, nlons, nmemb) - dim(hcst_ev) <- c(syear = 1, nftime, nlats, nlons, nmemb) - # compute climatology: - clim_obs_tr <- MeanDims(obs_tr, 'syear') - clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) - # compute anomalies: - info(recipe$Run$logger, - paste("dims:", dim(clim_hcst_tr))) - - ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, - ncores = recipe$Analysis$ncores) - ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, - ncores = recipe$Analysis$ncores) - ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, - ncores = recipe$Analysis$ncores) - ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, - ncores = recipe$Analysis$ncores) - - # CRPS_clim: - # build the reference forecast usnig the observational trainig sample - #ref <- array(data = rep(ano_obs_tr[1,1,1,1,,1,1,1,1], each = 1), - # dim = c(sdate = 1, member = sdate_dim)) - #crps_clim <- SpecsVerification::enscrps_cpp(ens = ref, - # obs = ano_obs_ev[1,1,1,1,,1,1,1,1], R_new = NA) -# crps_clim <- Apply(list(ano_obs_ev, ano_obs_tr), target_dims = 'syear', -# fun = function(x,y) { -# ref <- array(y, c(syear = 1, ensemble = length(y))) -# res <- SpecsVerification::enscrps_cpp(ens = ref, -# obs = x, R_new = NA) #No fair -# }, ncores = recipe$Analysis$ncores)$output1 - - rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", - "hcst_ev") - #Category limits - lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), - fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 - lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), - fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, - output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 -#mem <- mem_used() - gc() - ano_hcst_ev_res[,,,,t] <- ano_hcst_ev - ano_obs_ev_res[,,,,t] <- ano_obs_ev - ano_obs_tr_res[,,,,,t] <- ano_obs_tr - lims_ano_hcst_tr_res[,,,,t] <- lims_ano_hcst_tr - lims_ano_obs_tr_res[,,,,t] <- lims_ano_obs_tr -# crps_clim_res[,,,,t] <- crps_clim - res <- list(ano_hcst_ev = ano_hcst_ev_res, - ano_obs_ev = ano_obs_ev_res, - ano_obs_tr = ano_obs_tr_res, #required as reference forecast for the CRPSS - lims_ano_hcst_tr = lims_ano_hcst_tr_res, - lims_ano_obs_tr = lims_ano_obs_tr_res)#, -# crps_clim = crps_clim_res) -} -# return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, -# ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, -# #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, -# lims_ano_hcst_tr = lims_ano_hcst_tr, -# lims_ano_obs_tr = lims_ano_obs_tr)) -# }, ncores =recipe$Analysis$ncores) -# RPS -source("GetProbs.R") -ano_hcst_probs_ev <- GetProbs(res$ano_hcst_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', abs_thresholds = res$lims_ano_hcst_tr, - ncores = recipe$Analysis$ncores) -ano_obs_probs_ev <- GetProbs(res$ano_obs_ev, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'probs', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = res$lims_ano_obs_tr, - ncores = recipe$Analysis$ncores) -rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, - cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', - ncores = recipe$Analysis$ncores) -source("modules/Skill/R/RPS_clim.R") -rps_clim <- Apply(list(ano_obs_probs_ev), - target_dims = c('probs', 'syear'), - RPS_clim, bin_dim_abs = 'probs', cross.val = FALSE)$output1 -# RPSS -rpss <- RPSS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'probs', - # We should use a line like this - #abs_threshold = res$lims_ano_hcst_tr, - #prob_threshold = c(1/3, 2/3), - cross.val = FALSE, - ncores = recipe$Analysis$ncores) -# CRPS -crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) -# Este no sé como se calcula????: -# Aquí no se puede porque estaría incluyendo información de los otros años -#source("modules/Skill/R/CRPS_clim.R") -# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí -# el resultado es igual a dentro del bucle. -crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, - time_dim = 'syear', memb_dim = 'sample.syear', - ncores = recipe$Analysis$ncores) - - -# CRPSS -ref <- res$ano_obs_tr -dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, - nftime, nlats, nlons, sdate_dim) -crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, - memb_dim = 'ensemble', - time_dim = 'syear', clim.cross.val = FALSE, - ncores = recipe$Analysis$ncores) - - -# Corr -enscorr <- s2dv::Corr(res$ano_hcst_ev, res$ano_obs_ev, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = 0.05, - ncores = recipe$Analysis$ncores) - -# Mean Bias -#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, -mean_bias <- Bias(data$hcst$data, data$obs$data, - time_dim = 'syear', - memb_dim = 'ensemble', - ncores = recipe$Analysis$ncores) -# Spread error ratio -obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, - drop = "selected") -enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = res$ano_hcst_ev, - obs = obs_noensdim, - tdim = which(names(dim(res$ano_hcst_ev))=='syear'), - ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), - na.rm = FALSE, - ncpus = recipe$Analysis$ncores) -skill_metrics <- list(mean_bias = mean_bias, enscorr = enscorr$corr, - enscorr_significance = enscorr$sign, enssprerr = enssprerr, - rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, - rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, - crpss = crpss$crpss, crpss_significance = crpss$sign) -skill_metrics <- lapply(skill_metrics, function(x) { - InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) -original <- recipe$Run$output_dir -recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") -# Compute save metrics -source("modules/Saving/Saving.R") -#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) - save_metrics(recipe = recipe, - skill = skill_metrics, - data_cube = data$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - -recipe$Run$output_dir <- original - -source("modules/Visualization/Visualization.R") -#PlotEquiMap(mean_bias[1,1,1,1,1,,c(182:360, 1:181)], -# lon = -179:180, -# lat = data$hcst$attrs$Variable$metadata$latitude, filled.co = F, -# fileout = "/esarchive/scratch/nperez/test.png") -if (data$hcst$coords$longitude[1] != 0) { - skill_metrics <- lapply(skill_metrics, function(x) { - Subset(x, along = 'longitude', indices = c(182:360, 1:181)) - }) -} - info(recipe$Run$logger, - paste("lons:", data$hcst$coords$longitude)) - info(recipe$Run$logger, - paste("lons:", data$obs$coords$longitude)) - - -data$hcst$coords$longitude <- -179:180 -Visualization(recipe, data, skill_metrics, significance = TRUE) - -source("tools/add_logo.R") -add_logo(recipe, "tools/BSC_logo_95.jpg") - diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 22677811..6c4002ee 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -5,7 +5,7 @@ source("modules/Loading/load_datasets.R") Loading <- function(recipe) { # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) - if (tolower(recipe$Run$filesystem) %in% c("cerise", "mars")) { + if (tolower(recipe$Run$filesystem) == "mars") { source("modules/Loading/R/load_GRIB.R") data <- load_GRIB(recipe) } else if (tolower(recipe$Run$filesystem) == "sample") { @@ -18,10 +18,10 @@ Loading <- function(recipe) { if(recipe$Analysis$Variables$name == 'tas-tos') { source("modules/Loading/R/load_tas_tos.R") data <- load_tas_tos(recipe) - } else { + } else { source("modules/Loading/R/load_seasonal.R") data <- load_seasonal(recipe) - } + } } else if (time_horizon == "decadal") { source("modules/Loading/R/load_decadal.R") data <- load_decadal(recipe) diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index fda8ea84..7a3f4414 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -7,192 +7,191 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, regrid = NULL) { library(gribr) - + result <- vector('list', length = length(dat)) times <- vector('list', length = length(dat)) times <- lapply(times, '[<-', rep(NA, length(time_step))) #NOTE: length is 0 (slower in loop?) -# times <- lapply(times, '[<-', .POSIXct(rep(NA, length(time_step)), tz = 'UTC')) - + # times <- lapply(times, '[<-', .POSIXct(rep(NA, length(time_step)), tz = 'UTC')) + for (dat_i in 1:length(dat)) { - + file_to_load <- grib_open(dat[[dat_i]]) - + #---------------------------------------- # HOW TO FIND THE VALUE OF EACH FTIME STEP? #---------------------------------------- #NOTE: ValidityTime is not considered now. So if the time frequency is less than daily, it has problem. - -# # METHOD 1: Get first message to figure out the validityDate/Time of each message -# #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". + + # METHOD 1: Get first message to figure out the validityDate/Time of each message + #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". gm1 <- grib_get_message(file_to_load, 1) -# first_ftime <- as.character(gm1$validityDate) -# first_ftime_hour <- gm1$validityTime -# # For monthly data -# #NOTE: may not be correct because it is calculated by the first message -# cdo_time_attr <- clock::add_months(as.POSIXct(first_ftime, -# format = "%Y%m%d", tz = 'UTC'), time_step - 1) -# cdo_time <- format(cdo_time_attr, "%Y%m%d") - - # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) - #TODO: Change to method 1 because can't predict what cdo will produce - cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) - cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] - cdo_time <- cdo_time[which(cdo_time != "")] - # Check if there is member dim or not - has.memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) - if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) - cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" - cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') - cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) - cdo_time <- gsub('-', '', cdo_time) + first_ftime <- as.character(gm1$validityDate) + first_ftime_hour <- gm1$validityTime + # For monthly data + #NOTE: may not be correct because it is calculated by the first message + cdo_time_attr <- clock::add_months(as.POSIXct(paste0(first_ftime, ' ', first_ftime_hour), + format = "%Y%m%d %H", tz = 'UTC'), time_step - 1) + cdo_time <- format(cdo_time_attr, "%Y%m%d") + + # # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) + # #TODO: Change to method 1 because can't predict what cdo will produce + # cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) + # cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] + # cdo_time <- cdo_time[which(cdo_time != "")] + ## # Check if there is member dim or not + ## has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) + # if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) + # cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" + # cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') + # cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) + # cdo_time <- gsub('-', '', cdo_time) #---------------------------------------- - + # all members + ftimes: length should be memb*ftime (e.g., 51*7) ## Method 1: use grib_select and real values to filter memb_ftime <- grib_select(file_to_load, list(validityDate = cdo_time)) if (inherits(memb_ftime, 'gribMessage')) memb_ftime <- list(memb_ftime) - -# ## Method 2: Calculate which messages are the desired ones -# gm <- grib_get_message(file_to_load, time_step) -# if (length(time_step) == 1) { -# gm <- list(gm) -# } - - ################################################################## - # Get data as an array [longitude, latitude, (memb*)time] - ################################################################## - if (grepl("reduced", gm1$gridType)) { - #NOTE: Need to call gribr::grib_expand_grids because I don't know how to make .Call("gribr_redtoreg") work outside that function - # https://github.com/nawendt/gribr/blob/main/src/redtoreg.c - values_l <- vector('list', length = length(memb_ftime)) - for (gm_i in 1:length(memb_ftime)) { - values_l[[gm_i]] <- grib_expand_grids(memb_ftime[[gm_i]]) + + # ## Method 2: Calculate which messages are the desired ones + # gm <- grib_get_message(file_to_load, time_step) + # if (length(time_step) == 1) { + # gm <- list(gm) + # } + + ################################################################## + # Get data as an array [longitude, latitude, (memb*)time] + ################################################################## + if (grepl("reduced", gm1$gridType)) { + #NOTE: Need to call gribr::grib_expand_grids because I don't know how to make .Call("gribr_redtoreg") work outside that function + # https://github.com/nawendt/gribr/blob/main/src/redtoreg.c + values_l <- vector('list', length = length(memb_ftime)) + for (gm_i in 1:length(memb_ftime)) { + values_l[[gm_i]] <- grib_expand_grids(memb_ftime[[gm_i]]) + } + result[[dat_i]] <- array(unlist(values_l), dim = c(longitude = gm1$Nj * 2, latitude = gm1$Nj, time = length(values_l))) + # Save memory + rm(values_l); gc() + + } else { + result[[dat_i]] <- .grib_expand_grids(memb_ftime) } - result[[dat_i]] <- array(unlist(values_l), dim = c(longitude = gm1$Nj * 2, latitude = gm1$Nj, time = length(values_l))) - # Save memory - rm(values_l); gc() - - } else { - result[[dat_i]] <- .grib_expand_grids(memb_ftime) - } - - ################################################################## - # Get metadata - ################################################################## - ## (1-1) Everything from the first message of first file - if (dat_i == 1) { + + ################################################################## + # Get metadata + ################################################################## ## (1-1) Everything from the first message of first file -# dims <- dim(result[[dat_i]]) -# attributes(result) <- gm1 -# # turn result into array again -# dim(result[[dat_i]]) <- dims - - ## (1-2) Only save the necessary attributes - attr(result, 'edition') <- gm1$edition - attr(result, 'shortName') <- gm1$shortName - #NOTE: Tune varaible name!! - attr(result, 'shortName') <- tune_var_name(gm1$shortName, dat[[dat_i]]) - attr(result, 'name') <- gm1$name - attr(result, 'units') <- gm1$units -# attr(result, 'validityDate') <- gm1$validityDate -# attr(result, 'validityTime') <- gm1$validityTime - - ## (2) Lat and lon - latlon <- grib_latlons(gm1, expand = TRUE) - attr(result, 'latitude') <- unique(as.vector(c(latlon$lats))) - attr(result, 'longitude') <- unique(as.vector(c(latlon$lons))) - # Save memory (though it's small) - rm(latlon); gc() - - #NOTE: Find another way to check regular grid; Ni/Nj not always exist -# if (has.key(gm1, "Nx") && has.key(gm1, "Ny")) { -# nx <- gm1$Nx -# ny <- gm1$Ny -# } else { -# nx <- gm1$Ni -# ny <- gm1$Nj -# } -# if (length(lats) != ny | length(lons) != nx) { -# stop("Latitude and Longitude seem to be non-regular grid.") -# } - - } - -#-------------------------------- -#NOTE: Just use cdo_time -# ## (3) Date and time: Need to get from each massage -# for (time_i in 1:length(time_step)) { -# gm1 <- gm[[time_i]] -# #NOTE: What's the correct time? -## dates <- gm1$validityDate #Date of validity of the forecast -## times <- gm1$validityTime -## dates <- gm1$dataDate # Reference date -# times[[dat_i]][time_i] <- as.POSIXct( -# lubridate::ymd_hms(paste0(paste(gm1$year,gm1$month,gm1$day, '-'), ' ', -# paste(gm1$hour, gm1$minute, gm1$second, ':'))) -# ) -# } - times[[dat_i]] <- cdo_time_attr -#-------------------------------- - - ################################################################## - # regrid - ################################################################## - if (!is.null(regrid)) { - # result[[dat_i]]: [longitude, latitude, time] - res_data <- s2dv::CDORemap(result[[dat_i]], lons = attr(result, 'longitude'), lats = attr(result, 'latitude'), - grid = regrid$type, method = regrid$method, force_remap = TRUE) - if (dat_i == length(dat)) { - attr(result, 'longitude') <- res_data$lons - attr(result, 'latitude') <- res_data$lats + if (dat_i == 1) { + ## (1-1) Everything from the first message of first file + # dims <- dim(result[[dat_i]]) + # attributes(result) <- gm1 + # # turn result into array again + # dim(result[[dat_i]]) <- dims + + ## (1-2) Only save the necessary attributes + attr(result, 'edition') <- gm1$edition + attr(result, 'shortName') <- gm1$shortName + #NOTE: Tune varaible name!! + if (gm1$shortName == '2t') attr(result, 'shortName') <- 'tas' + attr(result, 'name') <- gm1$name + attr(result, 'units') <- gm1$units + # attr(result, 'validityDate') <- gm1$validityDate + # attr(result, 'validityTime') <- gm1$validityTime + + ## (2) Lat and lon + latlon <- grib_latlons(gm1, expand = TRUE) + attr(result, 'latitude') <- unique(as.vector(c(latlon$lats))) + attr(result, 'longitude') <- unique(as.vector(c(latlon$lons))) + # Save memory (though it's small) + rm(latlon); gc() + + #NOTE: Find another way to check regular grid; Ni/Nj not always exist + # if (has.key(gm1, "Nx") && has.key(gm1, "Ny")) { + # nx <- gm1$Nx + # ny <- gm1$Ny + # } else { + # nx <- gm1$Ni + # ny <- gm1$Nj + # } + # if (length(lats) != ny | length(lons) != nx) { + # stop("Latitude and Longitude seem to be non-regular grid.") + # } + } - result[[dat_i]] <- res_data$data_array - } - - - ################################################################## - # Save memory - rm(memb_ftime); rm(gm1); gc() - grib_close(file_to_load) # Doesn't impact memory - ################################################################## -} #for loop dat - + + #-------------------------------- + #NOTE: Just use cdo_time + # ## (3) Date and time: Need to get from each massage + # for (time_i in 1:length(time_step)) { + # gm1 <- gm[[time_i]] + # #NOTE: What's the correct time? + ## dates <- gm1$validityDate #Date of validity of the forecast + ## times <- gm1$validityTime + ## dates <- gm1$dataDate # Reference date + # times[[dat_i]][time_i] <- as.POSIXct( + # lubridate::ymd_hms(paste0(paste(gm1$year,gm1$month,gm1$day, '-'), ' ', + # paste(gm1$hour, gm1$minute, gm1$second, ':'))) + # ) + # } + times[[dat_i]] <- cdo_time_attr + #-------------------------------- + + ################################################################## + # regrid + ################################################################## + if (!is.null(regrid)) { + # result[[dat_i]]: [longitude, latitude, time] + res_data <- s2dv::CDORemap(result[[dat_i]], lons = attr(result, 'longitude'), lats = attr(result, 'latitude'), + grid = regrid$type, method = regrid$method, force_remap = TRUE) + if (dat_i == length(dat)) { + attr(result, 'longitude') <- res_data$lons + attr(result, 'latitude') <- res_data$lats + } + result[[dat_i]] <- res_data$data_array + } + + + ################################################################## + # Save memory + rm(memb_ftime); rm(gm1); gc() + grib_close(file_to_load) # Doesn't impact memory + ################################################################## + } #for loop dat + # Turn result list into array attr <- attributes(result) res_dim <- c(dim(result[[1]]), syear = length(result)) #[longitude, latitude, (memb*)time, syear] result <- unlist(result) dim(result) <- res_dim - + # Generate date/time attributes times <- array(unlist(times), dim = c(time = length(time_step), syear = length(dat), sday = 1, sweek = 1)) times <- s2dv::Reorder(times, c('sday', 'sweek', 'syear', 'time')) if (!is.null(syear_time_dim)) dim(times) <- syear_time_dim times <- as.POSIXct(times, origin = '1970-01-01', tz = 'UTC') - + # Reshape and reorder array - if (is.null(has.memb) | has.memb == FALSE) { # obs doesn't have memb; reshape syear/time dim + if (is.null(has.memb)) { # obs doesn't have memb; reshape syear/time dim result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude")) result <- array(result, dim = c(dat = 1, var = 1, syear_time_dim, dim(result)[3:4], - ensemble = 1)) + ensemble = 1)) } else { - nmemb <- as.numeric(dim(result)[3])/length(time_step) - result <- array(result, dim = c(dim(result)[1:2], ensemble = nmemb, + result <- array(result, dim = c(dim(result)[1:2], ensemble = has.memb, time = length(time_step), dim(result)[4])) result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude", "ensemble")) dim(result) <- c(dat = 1, var = 1, sday = 1, sweek = 1, dim(result)) } - + # Add attributes back attr$dim <- dim(result) attributes(result) <- attr attr(result, 'time') <- times - + # Save memory rm(times); rm(attr); gc() - + return(result) } @@ -202,7 +201,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, .grib_expand_grids <- function(gribMessages, vector = FALSE) { # gribMessages is a list of multiple messages gribMessage <- gribMessages[[1]] - + if (gribr::has.key(gribMessage, "Nx") && gribr::has.key(gribMessage, "Ny")) { nx <- gribMessage$Nx ny <- gribMessage$Ny @@ -210,11 +209,11 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, nx <- gribMessage$Ni ny <- gribMessage$Nj } - + if (is.null(nx) || is.null(ny)) { stop("Unsupported grid type: ", gribMessage$gridType) } - + if (grepl("reduced", gribMessage$gridType)) { #TODO: This part is not used now. nx <- ny * 2 @@ -222,60 +221,28 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, gribMessage$values) values <- matrix(values, nx, ny, byrow = gribMessage$jPointsAreConsecutive) - -# values_l <- vector('list', length = length(gribMessages)) -# for (gm_i in 1:length(gribMessages)) { -# values <- .Call("gribr_redtoreg", nx, gribMessages[[gm_i]]$pl, -# gribMessages[[gm_i]]$values) -# values <- matrix(values, nx, ny, -# byrow = gribMessage$jPointsAreConsecutive) -# values_l[[gm_i]] <- values -# } - + + # values_l <- vector('list', length = length(gribMessages)) + # for (gm_i in 1:length(gribMessages)) { + # values <- .Call("gribr_redtoreg", nx, gribMessages[[gm_i]]$pl, + # gribMessages[[gm_i]]$values) + # values <- matrix(values, nx, ny, + # byrow = gribMessage$jPointsAreConsecutive) + # values_l[[gm_i]] <- values + # } + } else { -# values <- matrix(gribMessage$values, nx, ny, -# byrow = gribMessage$jPointsAreConsecutive) + # values <- matrix(gribMessage$values, nx, ny, + # byrow = gribMessage$jPointsAreConsecutive) values_l <- lapply(gribMessages, '[[', 'values') values_l <- lapply(values_l, matrix, nx, ny, byrow = gribMessage$jPointsAreConsecutive) values <- array(unlist(values_l), dim = c(longitude = nx, latitude = ny, time = length(values_l))) } - + if (vector) { values <- as.numeric(values) } - + values } - -tune_var_name <- function(x, y) { - if (x == '2t') { - res <- "tas" - } else if (x == 'tprate') { - res <- "prlr" - } else if (x == "10si") { - res <- "sfcWind" - } else if (x == "2d") { - res <- "tdps" - } else if (x == "msl") { - res <- "psl" - } else if (x == "sst") { - res <- "tos" - } else if (x == "tp") { - if (grepl("prlr", y)) { - res <- "prlr" - } else if (grepl("tasmax", y)) { - res <- "tasmax" - } else if (grepl("tasmin", y)) { - res <- "tasmin" - } - } else if (x == "mx2t24") { - res <- "tasmax" - } else if (x == "mn2t24") { - res <- "tasmin" - } else { - res <- x - warning("Conversion name", x, "needed?") - } - return(res) -} diff --git a/modules/Loading/R/load_GRIB.R b/modules/Loading/R/load_GRIB.R index 796327cb..0dd5f919 100644 --- a/modules/Loading/R/load_GRIB.R +++ b/modules/Loading/R/load_GRIB.R @@ -6,10 +6,10 @@ source('modules/Loading/R/GRIB/GrbLoad.R') source('tools/libs.R') load_GRIB <- function(recipe) { - + # Set params #------------------------------------------------------------------- - + # get recipe info hcst.inityear <- recipe$Analysis$Time$hcst_start hcst.endyear <- recipe$Analysis$Time$hcst_end @@ -22,12 +22,12 @@ load_GRIB <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax # can only be 360 ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name + variable <- recipe$Analysis$Variables$name #'tas' store.freq <- recipe$Analysis$Variables$freq - + regrid.method <- recipe$Analysis$Regrid$method regrid.type <- recipe$Analysis$Regrid$type - + # get MARS datasets dict: archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] exp_descrip <- archive$System[[exp.name]] @@ -40,24 +40,24 @@ load_GRIB <- function(recipe) { #NOTE: We can use this info in GrbLoad() to substitute param 'has.memb' fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst - + info(recipe$Run$logger, "========== PARAMETERS RETRIEVED. ==========") - + # Load hindcast #------------------------------------------------------------------- - -## original file dir -#exp_path <- "/esarchive/exp/ecmwf/system5_m1/original_files/fcmean_od_sfc_msmm_ecmf/" -## soft link to original file dir -#exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas/" #files are not correct -# The correct files -#exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/" - - hcst.path <- paste0(archive$src, hcst.dir, freq.hcst) + + ## original file dir + #exp_path <- "/esarchive/exp/ecmwf/system5_m1/original_files/fcmean_od_sfc_msmm_ecmf/" + ## soft link to original file dir + #exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas/" #files are not correct + # The correct files + #exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/" + + hcst.path <- paste0(archive$src, hcst.dir) hcst.year <- paste0(as.numeric(hcst.inityear):as.numeric(hcst.endyear)) hcst.files <- paste0(hcst.path, variable, '_', hcst.year, hcst.sdate, '.grb') - + if (!regrid.type %in% c('none', 'to_system')) { if (regrid.type == 'to_reference') { regrid_list <- c(method = regrid.method, type = reference_descrip$reference_grid) @@ -67,15 +67,15 @@ load_GRIB <- function(recipe) { } else { regrid_list <- NULL } - + .log_memory_usage(recipe$Run$logger, when = "Before loading the data") hcst <- GrbLoad(dat = as.list(hcst.files), time_step = hcst.ftime, has.memb = hcst.nmember, - syear_time_dim = NULL, regrid = regrid_list) + syear_time_dim = NULL, regrid = regrid_list) gc() - + info(recipe$Run$logger, "========== HCST LOADED. ==========") - + # Load forecast #------------------------------------------------------------------- if (!is.null(fcst.year)) { @@ -87,76 +87,74 @@ load_GRIB <- function(recipe) { } else { fcst <- NULL } - + info(recipe$Run$logger, "========== FCST LOADED. ==========") - + # Load reference #------------------------------------------------------------------- -#obs_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_era5_tas/" - obs.path <- paste0(archive$src, obs.dir, freq.obs, "/") + #obs_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_era5_tas/" + obs.path <- paste0(archive$src, obs.dir) # Use hcst time attr to load obs hcst_times <- attr(hcst, 'time') hcst_times_strings <- format(hcst_times, '%Y%m') - + obs.files <- paste0(obs.path, variable, '_', hcst_times_strings, '.grb') - - if (!regrid.type %in% c('none')) { - if (regrid.type == 'to_reference') { - regrid_list <- list(method = regrid.method, type = reference_descrip$reference_grid) - } else if (regrid.type == 'to_system') { - regrid_list <- list(method = regrid.method, type = exp_descrip$reference_grid) + + if (!regrid.type %in% c('none', 'to_reference')) { + if (regrid.type == 'to_system') { + regrid_list <- c(method = regrid.method, type = exp_descrip$reference_grid) } else { # e.g., "r360x181" regrid_list <- list(method = regrid.method, type = regrid.type) } } else { regrid_list <- NULL } - + #NOTE: only 1 time step in each obs file obs <- GrbLoad(dat = as.list(obs.files), time_step = 1, has.memb = NULL, syear_time_dim = dim(hcst_times), regrid = regrid_list) gc() - + .log_memory_usage(recipe$Run$logger, when = "After loading the data") info(recipe$Run$logger, "========== OBS LOADED. ==========") - - -################################################################################# - -#dim(hcst) -# syear time latitude longitude ensemble -# 4 3 640 1280 51 - -##BEFORE TRANSFER TO S2DV_CUBE -#str(hcst) -# num [1:4, 1:3, 1:640, 1:1280, 1:51] 252 252 252 252 251 ... -# - attr(*, "edition")= num 1 -# - attr(*, "shortName")= chr "2t" -# - attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... -# - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... - -#dim(attr(hcst, 'time')) -#syear time -# 4 3 - -##BEFORE TRANSFER TO S2DV_CUBE -#str(obs) -# num [1:4, 1:3, 1:640, 1:1280] 251 251 251 251 251 ... -# - attr(*, "edition")= num 1 -# - attr(*, "shortName")= chr "2t" -# - attr(*, "longitude")= num [1:1280] 0 0.281 0.562 0.844 1.125 ... -# - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... - -################################################################################# - + + + ################################################################################# + + #dim(hcst) + # syear time latitude longitude ensemble + # 4 3 640 1280 51 + + ##BEFORE TRANSFER TO S2DV_CUBE + #str(hcst) + # num [1:4, 1:3, 1:640, 1:1280, 1:51] 252 252 252 252 251 ... + # - attr(*, "edition")= num 1 + # - attr(*, "shortName")= chr "2t" + # - attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... + # - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... + + #dim(attr(hcst, 'time')) + #syear time + # 4 3 + + ##BEFORE TRANSFER TO S2DV_CUBE + #str(obs) + # num [1:4, 1:3, 1:640, 1:1280] 251 251 251 251 251 ... + # - attr(*, "edition")= num 1 + # - attr(*, "shortName")= chr "2t" + # - attr(*, "longitude")= num [1:1280] 0 0.281 0.562 0.844 1.125 ... + # - attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # - attr(*, "time")= POSIXct[1:12], format: "2000-12-01" "2001-12-01" ... + + ################################################################################# + info(recipe$Run$logger, "========== REGRID DONE. ==========") - - + + # Turn into s2dv_cube #------------------------------------------------------------------- # hcst @@ -165,7 +163,7 @@ load_GRIB <- function(recipe) { metadata_list[[variable]] <- list(long_name = attr(hcst, 'name'), units = attr(hcst, 'units')) load_parameters_list <- list(dat1 = list(file_date = list(paste0(hcst.year, hcst.sdate)))) - + hcst <- s2dv_cube(data = array(hcst, dim = dim(hcst)), coords = list(dat = 'dat1', var = variable, @@ -183,15 +181,15 @@ load_GRIB <- function(recipe) { load_parameters = load_parameters_list, # extra attrs gribEdition = attr(hcst, 'edition')) - + # fcst if (!is.null(fcst)) { metadata_list <- vector("list", length = 1) names(metadata_list) <- variable metadata_list[[variable]] <- list(long_name = attr(fcst, 'name'), - units = attr(fcst, 'units')) + units = attr(fcst, 'units')) load_parameters_list <- list(dat1 = list(file_date = list(paste0(fcst.year, hcst.sdate)))) - + fcst <- s2dv_cube(data = array(fcst, dim = dim(fcst)), coords = list(dat = 'dat1', var = variable, @@ -209,14 +207,14 @@ load_GRIB <- function(recipe) { load_parameters = load_parameters_list, gribEdition = attr(fcst, 'edition')) } - + # obs metadata_list <- vector("list", length = 1) names(metadata_list) <- variable metadata_list[[variable]] <- list(long_name = attr(obs, 'name'), units = attr(obs, 'units')) load_parameters_list <- list(dat1 = list(file_date = list(hcst_times_strings))) - + obs <- s2dv_cube(data = array(obs, dim = dim(obs)), coords = list(dat = 'dat1', var = variable, @@ -227,55 +225,55 @@ load_GRIB <- function(recipe) { time = hcst.ftime, latitude = attr(obs, 'latitude'), longitude = attr(obs, 'longitude'), - ensemble = 1), + ensemble = 1), varName = attr(obs, 'shortName'), metadata = metadata_list, Dates = attributes(obs)$time, source_files = obs.files, load_parameters = load_parameters_list, gribEdition = attr(obs, 'edition')) - - -#str(hcst) -#List of 4 -# $ data : num [1, 1, 1, 1, 1:2, 1:2, 1:640, 1:1280, 1:51] 252 253 248 251 251 ... -# ..- attr(*, "edition")= num 1 -# ..- attr(*, "shortName")= chr "2t" -# ..- attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... -# ..- attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# ..- attr(*, "time")= POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... -# $ dims : Named int [1:9] 1 1 1 1 2 2 640 1280 51 -# ..- attr(*, "names")= chr [1:9] "dat" "var" "sday" "sweek" ... -# $ coords:List of 9 -# ..$ dat : chr "dat1" -# .. ..- attr(*, "indices")= logi FALSE -# ..$ var : chr "tas" -# .. ..- attr(*, "indices")= logi FALSE -# ..$ sday : num 1 -# .. ..- attr(*, "indices")= logi FALSE -# ..$ sweek : num 1 -# .. ..- attr(*, "indices")= logi FALSE -# ..$ syear : chr [1:2] "2000" "2001" -# .. ..- attr(*, "indices")= logi FALSE -# ..$ time : int [1:2] 1 2 -# .. ..- attr(*, "indices")= logi FALSE -# ..$ latitude : num [1:640] 89.8 89.5 89.2 88.9 88.7 ... -# .. ..- attr(*, "indices")= logi FALSE -# ..$ longitude: num [1:1280] 0 0.281 0.563 0.844 1.125 ... -# .. ..- attr(*, "indices")= logi FALSE -# ..$ ensemble : int [1:51] 1 2 3 4 5 6 7 8 9 10 ... -# .. ..- attr(*, "indices")= logi FALSE -# $ attrs :List of 4 -# ..$ Dates : POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... -# ..$ Variable :List of 1 -# .. ..$ varName: chr "2t" -# ..$ source_files: chr [1:2] "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20001101.grb" "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20011101.grb" -# ..$ gribEdition : num 1 -# - attr(*, "class")= chr "s2dv_cube" + + + #str(hcst) + #List of 4 + # $ data : num [1, 1, 1, 1, 1:2, 1:2, 1:640, 1:1280, 1:51] 252 253 248 251 251 ... + # ..- attr(*, "edition")= num 1 + # ..- attr(*, "shortName")= chr "2t" + # ..- attr(*, "longitude")= num [1:1280] 0 0.281 0.563 0.844 1.125 ... + # ..- attr(*, "latitude")= num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # ..- attr(*, "time")= POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... + # $ dims : Named int [1:9] 1 1 1 1 2 2 640 1280 51 + # ..- attr(*, "names")= chr [1:9] "dat" "var" "sday" "sweek" ... + # $ coords:List of 9 + # ..$ dat : chr "dat1" + # .. ..- attr(*, "indices")= logi FALSE + # ..$ var : chr "tas" + # .. ..- attr(*, "indices")= logi FALSE + # ..$ sday : num 1 + # .. ..- attr(*, "indices")= logi FALSE + # ..$ sweek : num 1 + # .. ..- attr(*, "indices")= logi FALSE + # ..$ syear : chr [1:2] "2000" "2001" + # .. ..- attr(*, "indices")= logi FALSE + # ..$ time : int [1:2] 1 2 + # .. ..- attr(*, "indices")= logi FALSE + # ..$ latitude : num [1:640] 89.8 89.5 89.2 88.9 88.7 ... + # .. ..- attr(*, "indices")= logi FALSE + # ..$ longitude: num [1:1280] 0 0.281 0.563 0.844 1.125 ... + # .. ..- attr(*, "indices")= logi FALSE + # ..$ ensemble : int [1:51] 1 2 3 4 5 6 7 8 9 10 ... + # .. ..- attr(*, "indices")= logi FALSE + # $ attrs :List of 4 + # ..$ Dates : POSIXct[1:4], format: "2000-12-01" "2001-12-01" ... + # ..$ Variable :List of 1 + # .. ..$ varName: chr "2t" + # ..$ source_files: chr [1:2] "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20001101.grb" "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/tas_20011101.grb" + # ..$ gribEdition : num 1 + # - attr(*, "class")= chr "s2dv_cube" .log_memory_usage(recipe$Run$logger, when = "After regridding") info(recipe$Run$logger, "##### GRIB DATA LOADED SUCCESSFULLY #####") - + return(list(hcst = hcst, fcst = fcst, obs = obs)) - + } diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index c2afed29..fa12d610 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -115,10 +115,12 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, ## Define empty list to saved data all_metrics <- sapply(system, function(x) NULL) + names(all_metrics) <- system ## Load data for each system for (sys in 1:length(system)) { ## Define empty list to saved data by_reference <- sapply(reference, function(x) NULL) + names(by_reference) <- reference ## Load data for each reference for (ref in 1:length(reference)) { ## Call function to load metrics data @@ -157,7 +159,6 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, var, "-skill_", period, "_s", m, # mod.pressure, ".nc")}) allfiles_exist <- sapply(allfiles, file.exists) - warning(paste("Exist", allfiles_exist)) # Check dims files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 527fe387..3206c1b1 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -10,574 +10,127 @@ source('modules/Scorecards/R/tmp/SCTransform.R') source('modules/Scorecards/R/tmp/ScorecardsSingle.R') source('modules/Scorecards/R/tmp/ScorecardsMulti.R') source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/VizScorecard.R') +source('modules/Scorecards/R/tmp/SCPlotScorecard.R') -## Temporary for new ESviz function -source('modules/Scorecards/R/tmp/ColorBarContinuous.R') -source('modules/Scorecards/R/tmp/ClimPalette.R') -.IsColor <- s2dv:::.IsColor -.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs +## TODO: Change function name to 'Scorecards'? ## Define function Scorecards <- function(recipe) { - ## Parameters for loading data files - - # input.path <- "/esarchive/scratch/nmilders/scorecards_data/syear/testing/" #temp - input.path <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/recipe_multimodel_seasonal_nadia_20240130182422/outputs/' - - skill.input.path <- paste0(input.path, "Skill/") #paste0(recipe$Run$output_dir, "/outputs/Skill/") - # stats.input.path <- paste0(input.path, "Statistics/") #paste0(recipe$Run$output_dir, "/outputs/Statistics/") - skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") + ## set parameters + input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) + system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { - start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { + start.months <- 1:12 } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) - if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ - error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") - } } - start.months <- sprintf("%02d", start.months) - period <- paste0(start.year, "-", end.year) - - ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - ncores <- 1 # recipe$Analysis$ncores - if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ - alpha <- 0.05 - } else { - alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha - } + ## Define skill scores in score aggregation has been requested - if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ - inf.to.na <- FALSE - } else { - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + if(metric.aggregation == 'score'){ + if('rps' %in% metrics.load){ + metrics.load <- c(metrics.load, 'rps_clim') + } + if('crps' %in% metrics.load){ + metrics.load <- c(metrics.load, 'crps_clim') + } } - if(is.null(recipe$Analysis$remove_NAs)){ - na.rm <- FALSE - } else { - na.rm <- recipe$Analysis$remove_NAs + metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + + ## Define skill scores in score aggregation has been requested + + if(metric.aggregation == 'score'){ + if('rpss' %in% metrics.visualize){ + metrics.visualize[metrics.visualize == 'rpss'] <- 'rpss_score_aggr' + } + if('crpss' %in% metrics.visualize){ + metrics.visualize[metrics.visualize == 'crpss'] <- 'crpss_score_aggr' + } } - ## Parameters for scorecard layout + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks - legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - - if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ - plot.legend <- TRUE - } else { - plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ - columns.width <- 1.2 - } else { - columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ - legend.white.space <- 6 - } else { - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ - legend.height <- 50 - } else { - legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ - label.scale <- 1.4 - } else { - label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ - round.decimal <- 2 - } else { - round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ - font.size <- 1.1 - } else { - font.size <- recipe$Analysis$Workflow$Scorecards$font_size - } + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + ncores <- 1 # recipe$Analysis$ncores - ## Define if difference scorecard is to be plotted - if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ - calculate.diff <- FALSE - } else { - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - } + ## Load data files + loaded_metrics <- LoadMetrics(system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + metrics = metrics.load, + start.months = start.months, + forecast.months = forecast.months, + inf_to_na = inf.to.na, + input.path = input.path) - ####### SKILL AGGREGATION ####### - if(metric.aggregation == 'skill'){ - ## Load data files - loaded_metrics <- LoadMetrics(input_path = skill.input.path, - system = system, - reference = reference, - var = var, - metrics = metrics.visualize, ## metrics.load - period = period, - start_months = start.months, - forecast_months = forecast.months, - calib_method = calib.method, - syear = NULL, - inf_to_na = inf.to.na - ) - - ## Spatial Aggregation of metrics - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) - - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) - - - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } - - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) - - - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - metric.aggregation = metric.aggregation, - ncores = ncores) - } ## close if on region - metrics_significance <- NULL + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - } ## close if on skill - - ###### SCORE AGGREGATION ###### - if(metric.aggregation == 'score'){ - - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - - ## Define arrays to filled with data - aggregated_metrics <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) + ### Convert loaded metrics to array for allready aggregated data + metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics + forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months + start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months + regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) - metrics_significance <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) + aggregated_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) - for (sys in 1:length(system)){ - # sys_num <- which(system == sys) - for (ref in 1:length(reference)){ - # ref_num <- which(refence == ref) - for (met in metrics.visualize) { - - if(met == 'rpss'){ - ## Load data from saved files - rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_syear', - calib_method = calib.method, syear = TRUE) - - rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'rps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - rps_syear <- Subset(rps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - rps_clim_syear <- Subset(rps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Spatially aggregate data - rps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - rps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(rps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear_spatial_aggr))[length(dim(rps_syear_spatial_aggr))] <- 'region' - names(dim(rps_clim_syear_spatial_aggr))[length(dim(rps_clim_syear_spatial_aggr))] <- 'region' - - ## Temporally aggregate data - rps_temp_aggr <- Apply(data = rps_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_temp_aggr <- Apply(data = rps_clim_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_temp_aggr / rps_clim_temp_aggr - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear_spatial_aggr, rps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'smonths','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'smonths','region')) - - } ## close if on rpss - - if(met == 'crpss'){ - - ## Load data from saved files - crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_syear', - calib_method = calib.method, syear = TRUE) - - crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'crps_clim_syear', - calib_method = calib.method, syear = TRUE) - - ## Remove dat and var dimensions - crps_syear <- Subset(crps_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - crps_clim_syear <- Subset(crps_clim_syear, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Spatially aggregate data - crps_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - crps_clim_syear_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$Variables$dat1$longitude), - lat = as.vector(attributes(crps_clim_syear)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear_spatial_aggr))[length(dim(crps_syear_spatial_aggr))] <- 'region' - names(dim(crps_clim_syear_spatial_aggr))[length(dim(crps_clim_syear_spatial_aggr))] <- 'region' - - ## Temporally aggregate data - crps_temp_aggr <- Apply(data = crps_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_temp_aggr <- Apply(data = crps_clim_syear_spatial_aggr, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_temp_aggr / crps_clim_temp_aggr - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear_spatial_aggr, crps_clim_syear_spatial_aggr, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'smonths','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'smonths','region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - ## Load data from saved files - cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'cov', - calib_method = calib.method, syear = NULL) - - std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_hcst', - calib_method = calib.method, syear = NULL) - - std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'std_obs', - calib_method = calib.method, syear = NULL) - - - n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'n_eff', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - cov_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = cov, - region = regions[[X]], - lon = as.vector(attributes(cov)$Variables$dat1$longitude), - lat = as.vector(attributes(cov)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov_spatial_aggr))[length(dim(cov_spatial_aggr))] <- 'region' - + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } - std_hcst_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_hcst, - region = regions[[X]], - lon = as.vector(attributes(std_hcst)$Variables$dat1$longitude), - lat = as.vector(attributes(std_hcst)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_hcst_spatial_aggr))[length(dim(std_hcst_spatial_aggr))] <- 'region' - - std_obs_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_obs, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_obs_spatial_aggr))[length(dim(std_obs_spatial_aggr))] <- 'region' - - n_eff_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = n_eff, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$Variables$dat1$longitude), - lat = as.vector(attributes(std_obs)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(n_eff_spatial_aggr))[length(dim(n_eff_spatial_aggr))] <- 'region' - n_eff_spatial_aggr <- Subset(n_eff_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Calculate correlation - enscorr <- cov_spatial_aggr / (std_hcst_spatial_aggr * std_obs_spatial_aggr) - - ## Drop unwanted dimensions - enscorr <- Subset(enscorr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') + ## Add attributes + attributes(aggregated_metrics)$metrics <- metrics.load + attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(aggregated_metrics)$regions <- regions + attributes(aggregated_metrics)$system.name <- names(loaded_metrics) + attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) - ## Calculate significance of corr - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff_spatial_aggr-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff_spatial_aggr-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - smonths = length(start.months), - region = length(regions))) - - - for (time in 1:dim(sign_corr)[['time']]){ - for (mon in 1:dim(sign_corr)[['smonths']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE - && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ - sign_corr[time, mon, reg] <- TRUE - } else { - sign_corr[time, mon, reg] <- FALSE - } - - } - } - } - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'smonths','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'smonths','region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'mean_bias', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - mean_bias_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = mean_bias, - region = regions[[X]], - lon = as.vector(attributes(mean_bias)$Variables$dat1$longitude), - lat = as.vector(attributes(mean_bias)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(mean_bias_spatial_aggr))[length(dim(mean_bias_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - mean_bias_spatial_aggr <- Subset(mean_bias_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias_spatial_aggr, order = c('time', 'smonths','region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - forecast_months = forecast.months, - metrics = 'enssprerr', - calib_method = calib.method, syear = NULL) - - ## Calculate spatial aggregation - enssprerr_spatial_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = enssprerr, - region = regions[[X]], - lon = as.vector(attributes(enssprerr)$Variables$dat1$longitude), - lat = as.vector(attributes(enssprerr)$Variables$dat1$latitude), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(enssprerr_spatial_aggr))[length(dim(enssprerr_spatial_aggr))] <- 'region' - - ## Drop unwanted dimensions - enssprerr_spatial_aggr <- Subset(enssprerr_spatial_aggr, along = c('dat', 'var'), indices = list(1,1) , drop = 'selected') - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr_spatial_aggr, order = c('time', 'smonths','region')) - - } ## close on enssprerr - - } ## close loop on metric - } ## close if on reference - } ## close if on system - - #Include metric attributes - attributes(aggregated_metrics)$metrics <- metrics.visualize - - ## set NAs to False - metrics_significance[is.na(metrics_significance)] <- FALSE - - } ## close if on score - + } else { + ## Calculate weighted mean of spatial aggregation + aggregated_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + metric.aggregation = metric.aggregation, + ncores = ncores) + }## close if - ####### PLOT SCORECARDS ########## - ## Create simple scorecard tables ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation scorecard_single <- ScorecardsSingle(data = aggregated_metrics, - sign = metrics_significance, system = system, reference = reference, var = var, @@ -589,17 +142,9 @@ Scorecards <- function(recipe) { metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, col1.width = col1.width, col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, output.path = output.path) ## Create multi system/reference scorecard tables @@ -607,7 +152,6 @@ Scorecards <- function(recipe) { ## Metrics input must be in the same order as function SC_spatial_aggregation if(length(system) > 1 || length(reference) > 1){ scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, - sign = metrics_significance, system = system, reference = reference, var = var, @@ -615,21 +159,10 @@ Scorecards <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = names(regions), + region.names = attributes(regions)$names, metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, - legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, output.path = output.path) } ## close if diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 2c54c48f..24731e9b 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -13,19 +13,25 @@ recipe$Run$output_dir <- output_dir ## Loop over variables datasets <- recipe$Analysis$Datasets ## TODO: Improve dependency system? -for (system in 1:length(datasets$System)) { - for (reference in 1:length(datasets$Reference)) { - for (variable in 1:length(recipe$Analysis$Variables)) { +for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe <- recipe + scorecard_recipe$Analysis$Datasets$System <- - recipe$Analysis$Datasets$System[[system]] + unlist(recipe$Analysis$Datasets$System) + + ## Include multimodel in systems + if(isTRUE(scorecard_recipe$Datasets$Multimodel$execute) || + scorecard_recipe$Datasets$Multimodel$execute == 'both' || + scorecard_recipe$Datasets$Multimodel$execute == 'yes'){ + scorecard_recipe$Analysis$Datasets$System <- + paste0(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') + } + scorecard_recipe$Analysis$Datasets$Reference <- - recipe$Analysis$Datasets$Reference[[reference]] + unlist(recipe$Analysis$Datasets$Reference) scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] # Plot Scorecards Scorecards(scorecard_recipe) - } - } } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/R/CRPS_clim.R b/modules/Skill/R/CRPS_clim.R index 50d63642..b66cab78 100644 --- a/modules/Skill/R/CRPS_clim.R +++ b/modules/Skill/R/CRPS_clim.R @@ -25,4 +25,3 @@ CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross. return(crps_ref) } } - diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R index 9390ed13..601a10c3 100644 --- a/modules/Skill/R/RPS_clim.R +++ b/modules/Skill/R/RPS_clim.R @@ -1,19 +1,12 @@ # RPS version for climatology - -RPS_clim <- function(obs, indices_for_clim = NULL, - prob_thresholds = c(1/3, 2/3), cross.val = T, - bin_dim_abs = NULL, return_mean = TRUE) { +RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3), cross.val = TRUE) { if (is.null(indices_for_clim)){ indices_for_clim <- 1:length(obs) } - if (is.null(bin_dim_abs)) { - obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: - prob_thresholds = prob_thresholds, weights = NULL, - cross.val = cross.val) - } else { - obs_probs <- obs - } + + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) # clim_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) @@ -22,11 +15,6 @@ RPS_clim <- function(obs, indices_for_clim = NULL, probs_clim_cumsum <- apply(clim_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) - - if (return_mean == TRUE) { - return(mean(rps_ref)) - } else { - return(rps_ref) - } + + return(mean(rps_ref)) } - diff --git a/recipe_NAO_scorecards.yml b/recipe_NAO_scorecards.yml deleted file mode 100644 index 88929bc7..00000000 --- a/recipe_NAO_scorecards.yml +++ /dev/null @@ -1,60 +0,0 @@ -Description: - Author: Nuria Perez-Zanon - Info: Cerise phase 0 assessment NAO index - -Analysis: - Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal - Variables: - name: psl - freq: monthly_mean - Datasets: - System: - name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s - Multimodel: no # Mandatory, bool: Either yes/true or no/false - Reference: - name: ERA5 # Mandatory, str: Reference codename. See docu. - Time: - sdate: '0301' ## MMDD - # fcst_year: # Optional, int: Forecast year 'YYYY' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' - ftime_min: 2 # Mandatory, int: First leadtime time step in months - ftime_max: 2 # Mandatory, int: Last leadtime time step in months - Region: - latmin: 20 # Mandatory, int: minimum latitude - latmax: 80 # Mandatory, int: maximum latitude - lonmin: -80 # Mandatory, int: minimum longitude - lonmax: 40 # Mandatory, int: maximum longitude - Regrid: - method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_system" - #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. - Workflow: - Anomalies: - compute: yes - cross_validation: no - save: none - Indices: - NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} - Calibration: - method: raw # Mandatory, str: Calibration method. See docu. - save: none - Skill: - metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr - save: 'all' - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. - save: none - Indicators: - index: no - ncores: 4 # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: scorecards - logo: yes -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/nperez/cs_oper/ - code_dir: /esarchive/scratch/nperez/git3/sunset/ - - diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml deleted file mode 100644 index 47c91f6c..00000000 --- a/recipe_ecvs_scorecards_seasonal.yml +++ /dev/null @@ -1,107 +0,0 @@ -Description: - Author: Nuria Perez-Zanon - Info: Cerise phase 0 assessment for ECVs - -Analysis: - Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal - Variables: - - {name: tas, freq: monthly_mean, units: K} - - {name: tasmin, freq: monthly_mean} - - {name: tasmax, freq: monthly_mean} - - {name: tos, freq: monthly_mean} - - {name: sfcWind, freq: monthly_mean} - - {name: tdps, freq: monthly_mean} - - {name: psl, freq: monthly_mean} - #- {name: prlr, freq: monthly_mean, units: ms-1, flux: yes} - Datasets: - System: - - {name: Meteo-France-System8} - - {name: CMCC-SPS3.5} - - {name: UKMO-System602} - Multimodel: no # Mandatory, bool: Either yes/true or no/false - Reference: - - {name: ERA5} # Mandatory, str: Reference codename. See docu. - Time: - sdate: - - '0101' - - '0201' - - '0301' - - '0401' - - '0501' - - '0601' - - '0701' - - '0801' - - '0901' - - '1001' - - '1101' - - '1201' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' - ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months - Region: - - {name: "EU", latmin: 0, latmax: 20, lonmin: 0, lonmax: 45} - Regrid: - method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_system" - #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. - Workflow: - Anomalies: - compute: yes - cross_validation: no - save: none - Calibration: - method: raw # Mandatory, str: Calibration method. See docu. - cross_validation: yes - save: none - Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr - save: 'all' - cross_validation: yes - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. - save: 'all' - Indicators: - index: no - Visualization: - plots: skill_metrics #forecast_ensemble_mean most_likely_terciles - multi_panel: no - Scorecards: - execute: yes # yes/no - regions: - Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} - Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} - Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} - start_months: 'all' - metric: mean_bias enscorr rpss crpss EnsSprErr - metric_aggregation: 'skill' - inf_to_na: yes - table_label: NULL - fileout_label: NULL - col1_width: NULL - col2_width: NULL - calculate_diff: FALSE - ncores: 4 # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: scorecards - logo: yes -Run: - Loglevel: INFO - Terminal: yes - filesystem: cerise - output_dir: /perm/cyce/phase0/ # replace with the directory where you want to save the outputs - code_dir: /ec/res4/scratch/cyce/cerise/sunset/ # replace with the directory where your code is - autosubmit: no - # fill only if using autosubmit - auto_conf: - script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script - expid: a68v # replace with your EXPID - hpc_user: bsc32339 # replace with your hpc username - wallclock: 02:00 # hh:mm - processors_per_job: 4 - platform: nord3v2 - email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: nuria.perez@bsc.es # replace with your email address - notify_completed: yes # notify me by email when a job finishes - notify_failed: yes # notify me by email when a job fails - diff --git a/recipe_tas_scorecards_seasonal.yml b/recipe_tas_scorecards_seasonal.yml deleted file mode 100644 index 9a5d5580..00000000 --- a/recipe_tas_scorecards_seasonal.yml +++ /dev/null @@ -1,91 +0,0 @@ -Description: - Author: nperez - Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) - -Analysis: - Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal - Variables: - name: tasmin - freq: monthly_mean - Datasets: - System: - name: Meteo-France-System8 - Multimodel: no # Mandatory, bool: Either yes/true or no/false - Reference: - name: ERA5 # Mandatory, str: Reference codename. See docu. - Time: - sdate: '0101' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' - ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 2 # Mandatory, int: Last leadtime time step in months - Region: - latmin: 0 - latmax: 90 - lonmin: 0 - lonmax: 15.5 - Regrid: - method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_system" - #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. - Workflow: - Anomalies: - compute: yes - cross_validation: no - save: none - Calibration: - method: raw # Mandatory, str: Calibration method. See docu. - cross_validation: yes - save: none - Skill: - metric: mean_bias EnsCorr rpss crpss EnsSprErr - save: 'all' - cross_validation: yes - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. - save: none - Indicators: - index: no - Visualization: - plots: skill_metrics #forecast_ensemble_mean most_likely_terciles - multi_panel: no - dots: both - #projection: robinson - Scorecards: - execute: no # yes/no - regions: - Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} - Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} - Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} - start_months: NULL - metric: mean_bias enscorr rpss crpss EnsSprErr - metric_aggregation: 'skill' - #inf_to_na: yes - table_label: NULL - fileout_label: NULL - col1_width: NULL - col2_width: NULL - calculate_diff: FALSE - ncores: 4 # Optional, int: number of cores, defaults to 1 - remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE - Output_format: scorecards - logo: yes -Run: - Loglevel: INFO - Terminal: yes - filesystem: cerise - output_dir: /perm/cyce/phase0/ # replace with the directory where you want to save the outputs - code_dir: /esarchive/scratch/nperez/git3/sunset/ # replace with the directory where your code is - autosubmit: no - # fill only if using autosubmit - auto_conf: - script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script - expid: a68v # replace with your EXPID - hpc_user: bsc32339 # replace with your hpc username - wallclock: 02:00 # hh:mm - processors_per_job: 4 - platform: nord3v2 - email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: nuria.perez@bsc.es # replace with your email address - notify_completed: yes # notify me by email when a job finishes - notify_failed: yes # notify me by email when a job fails -- GitLab From 8a974deed58c49497bbd8f16ac916e1af37cc6e3 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 21 Feb 2024 11:34:48 +0100 Subject: [PATCH 87/91] adjustments for general recipe input --- modules/Scorecards/Scorecards.R | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 3206c1b1..e0b55641 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -22,20 +22,23 @@ Scorecards <- function(recipe) { output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name + system <- recipe$Analysis$Datasets$System + reference <- recipe$Analysis$Datasets$Reference var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all') { - start.months <- 1:12 + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } } - + regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} @@ -74,7 +77,7 @@ Scorecards <- function(recipe) { col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff ncores <- 1 # recipe$Analysis$ncores - + ## Load data files loaded_metrics <- LoadMetrics(system = system, reference = reference, -- GitLab From bf93485b0704680cfeae4668533c3f804a51b8bb Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 23 Feb 2024 12:19:57 +0100 Subject: [PATCH 88/91] adjustetments for multimodel scorecard --- modules/Scorecards/R/tmp/ScorecardsMulti.R | 168 +++++++++++--------- modules/Scorecards/R/tmp/ScorecardsSingle.R | 78 ++++----- modules/Scorecards/execute_scorecards.R | 18 ++- 3 files changed, 142 insertions(+), 122 deletions(-) diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 89f1df44..99909f09 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -82,9 +82,11 @@ ScorecardsMulti <- function(data, attributes(input_data)$metrics <- metrics ## Transform data for scorecards by forecast month (types 11 & 12) - transformed_data <- SCTransform(data = input_data, - sdate_dim = 'sdate', - ftime_dim = 'time') + if(length(start.months) >= length(forecast.months)){ + transformed_data <- SCTransform(data = input_data, + sdate_dim = 'sdate', + ftime_dim = 'time') + } ## Load configuration files sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive @@ -106,6 +108,10 @@ ScorecardsMulti <- function(data, reference.name <- c(reference.name, reference.name1) } + if("Multimodel" %in% system ){ + system.name <- c(system.name, "Multimodel") + } + ## Get metric long names metric.names.list <- .met_names(metrics, var.units) @@ -159,7 +165,7 @@ ScorecardsMulti <- function(data, font.size = 1.1 legend.white.space <- col1.width <- col2.width <- NULL ## Use default values of function - + ## Loop over region for(reg in 1:length(region.names)){ @@ -270,87 +276,91 @@ ScorecardsMulti <- function(data, #### Scorecard_type 11 #### ## (transformation only) - fileout <- .Filename(model = model, eval.name = eval.filename, var = var, - start.year = start.year, end.year = end.year, scorecard.type = 11, - region = sub(" ", "-", region.names[reg]), - fileout.label = fileout.label, output.path = output.path) - if(model == 'system'){ - data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') - } else if(model == 'reference'){ - data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 11, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ + data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_11, + row.dim = model, + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = model.name, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = table.model.name, + subrow.title = 'Forecast Month', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = 4, + col2.width = col2.width, + fileout = fileout) } - SCPlotScorecard(data = data_sc_11, - row.dim = model, - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = model.name, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = table.model.name, - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = 4, - col2.width = col2.width, - fileout = fileout) #### Scorecard_type 12 #### ## (transformation and reorder) - fileout <- .Filename(model = model, eval.name = eval.filename, var = var, - start.year = start.year, end.year = end.year, scorecard.type = 12, - region = sub(" ", "-", region.names[reg]), - fileout.label = fileout.label, output.path = output.path) - new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') - if(model == 'system'){ - data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') - } else if(model == 'reference'){ - data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, + start.year = start.year, end.year = end.year, scorecard.type = 12, + region = sub(" ", "-", region.names[reg]), + fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else if(model == 'reference'){ + data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + } + SCPlotScorecard(data = data_sc_12, + row.dim = 'time', + subrow.dim = model, + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = forecast.months, + subrow.names = model.name, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Forecast Month', + subrow.title = table.model.name, + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = 4, + fileout = fileout) } - SCPlotScorecard(data = data_sc_12, - row.dim = 'time', - subrow.dim = model, - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = model.name, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = table.model.name, - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = 4, - fileout = fileout) } ## close loop on region diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 1c010b1c..0e1d7cbc 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -89,9 +89,11 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, attributes(input_data)$metrics <- metrics ## Transform data for scorecards by forecast month (types 3 & 4) - transformed_data <- SCTransform(data = input_data, - sdate_dim = 'sdate', - ftime_dim = 'time') + if(length(start.months) >= length(forecast.months)){ + transformed_data <- SCTransform(data = input_data, + sdate_dim = 'sdate', + ftime_dim = 'time') + } ## Load configuration files sys_dict <- read_yaml("conf/archive.yml")$esarchive @@ -256,46 +258,48 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, #### Scorecard_type 3 #### ## (transformation only) - fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, - start.year = start.year, end.year = end.year, scorecard.type = 3, - fileout.label = fileout.label, output.path = output.path) - data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') - SCPlotScorecard(data = data_sc_3, - row.dim = 'region', - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = region.names, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Region', - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - fileout = fileout) + if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, + start.year = start.year, end.year = end.year, scorecard.type = 3, + fileout.label = fileout.label, output.path = output.path) + data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + SCPlotScorecard(data = data_sc_3, + row.dim = 'region', + subrow.dim = 'time', + col.dim = 'metric', + subcol.dim = 'sdate', + legend.dim = 'metric', + row.names = region.names, + subrow.names = forecast.months, + col.names = metric.names, + subcol.names = month.abb[as.numeric(start.months)], + table.title = table.title, + table.subtitle = table.subtitle, + row.title = 'Region', + subrow.title = 'Forecast Month', + col.title = 'Target month', + legend.breaks = legend.breaks, + plot.legend = plot.legend, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round.decimal = round.decimal, + font.size = font.size, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + fileout = fileout) + } #### Scorecard_type 4 #### ## (transformation and reorder) ## Scorecard type 4 is same as type 3 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(input_data)['region'] > 1) { + if(dim(data)['region'] > 1 & length(start.months) >= length(forecast.months)){ fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 24731e9b..6c2daa5c 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -5,6 +5,12 @@ args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] output_dir <- args[2] +## for testing + +recipe_file <- '/esarchive/scratch/nmilders/multimodel/recipe_multimodel_seasonal_nadia_test.yml' +output_dir <- '/esarchive/scratch/nmilders/scorecards_data/multimodel/test/recipe_multimodel_seasonal_nadia_test_20240223114714' + + ## TODO: Replace with function # Read recipe and set outdir recipe <- read_yaml(recipe_file) @@ -17,18 +23,18 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe <- recipe scorecard_recipe$Analysis$Datasets$System <- - unlist(recipe$Analysis$Datasets$System) + as.vector(unlist(recipe$Analysis$Datasets$System)) ## Include multimodel in systems - if(isTRUE(scorecard_recipe$Datasets$Multimodel$execute) || - scorecard_recipe$Datasets$Multimodel$execute == 'both' || - scorecard_recipe$Datasets$Multimodel$execute == 'yes'){ + if(isTRUE(scorecard_recipe$Analysis$Datasets$Multimodel$execute) || + scorecard_recipe$Analysis$Datasets$Multimodel$execute == 'both' || + scorecard_recipe$Analysis$Datasets$Multimodel$execute == 'yes'){ scorecard_recipe$Analysis$Datasets$System <- - paste0(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') + c(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') } scorecard_recipe$Analysis$Datasets$Reference <- - unlist(recipe$Analysis$Datasets$Reference) + as.vector(unlist(recipe$Analysis$Datasets$Reference)) scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] # Plot Scorecards -- GitLab From a16d2fb13f99fa81b809bdd2424853728fddb737 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 28 Feb 2024 09:46:43 +0100 Subject: [PATCH 89/91] Add Scorecards AS dependencies and create proj directory if it does not exist --- autosubmit/conf_esarchive/jobs.yml | 3 +-- tools/write_autosubmit_conf.R | 38 +++++++++++++++++------------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/autosubmit/conf_esarchive/jobs.yml b/autosubmit/conf_esarchive/jobs.yml index 7e2a1948..04d23ba0 100644 --- a/autosubmit/conf_esarchive/jobs.yml +++ b/autosubmit/conf_esarchive/jobs.yml @@ -24,6 +24,5 @@ JOBS: PLATFORM: nord3v2 NOTIFY_ON: PROCESSORS: 1 - DEPENDENCIES: verification - ## TODO: Add scorecards-multimodel with multimodel dependency? + DEPENDENCIES: diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index a31df62b..d80519bc 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -23,6 +23,11 @@ write_autosubmit_conf <- function(recipe, nchunks, # Output directory dest_dir <- paste0(auto_specs$experiment_dir, expid, "/conf/") proj_dir <- paste0(auto_specs$experiment_dir, expid, "/proj/auto-s2s/") + # Create project directory if it does not exist yet so that chunk_to_recipe + # and split_to_recipe files can be created + if (!dir.exists(proj_dir)) { + dir.create(proj_dir, recursive = TRUE) + } # Modify the configuration files according to the info in the recipe for (file in list.files(template_dir)) { conf_type <- strsplit(file, split = "[.]")[[1]][1] @@ -68,28 +73,14 @@ write_autosubmit_conf <- function(recipe, nchunks, } conf$JOBS$verification$PROCESSORS <- recipe$Run$auto_conf$processors_per_job # ncores? conf$JOBS$verification$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives - # Only include Scorecards job if section exists in the recipe and - # is set to 'execute: True' - if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || - (!recipe$Analysis$Workflow$Scorecards$execute)) { - conf$JOBS$scorecards <- NULL - } else { - ## TODO: Add multimodel dependency - if (recipe$Run$auto_conf$notify_completed) { - conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, - "COMPLETED") - } - if (recipe$Run$auto_conf$notify_failed) { - conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, - "FAILED") - } - } # Only include Multimodel job if sections exists in the recipe # is set to execute = 'True' or 'both' if (!is.null(recipe$Analysis$Datasets$Multimodel) && tolower(recipe$Analysis$Datasets$Multimodel$execute) == "false") { conf$JOBS$multimodel <- NULL + conf$JOBS$scorecards$DEPENDENCIES <- "verification" } else { + conf$JOBS$scorecards$DEPENDENCIES <- "multimodel" # Create bash file to associate split number to recipe name split_file <- paste0(proj_dir, "split_to_recipe") .create_bash_file(fileout = split_file, @@ -119,6 +110,21 @@ write_autosubmit_conf <- function(recipe, nchunks, conf$JOBS$multimodel$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives conf$JOBS$multimodel$WALLCLOCK <- recipe$Run$auto_conf$wallclock } + # Only include Scorecards job if section exists in the recipe and + # is set to 'execute: True' + if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || + (!recipe$Analysis$Workflow$Scorecards$execute)) { + conf$JOBS$scorecards <- NULL + } else { + if (recipe$Run$auto_conf$notify_completed) { + conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, + "COMPLETED") + } + if (recipe$Run$auto_conf$notify_failed) { + conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, + "FAILED") + } + } } else if (conf_type == "platforms") { # Section 4: platform configuration ## nord3v2 configuration... platform name? user, processors_per_node -- GitLab From 691b5bc561eb08598b1e4f1dce238b18162b197d Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 28 Feb 2024 16:03:01 +0100 Subject: [PATCH 90/91] Changes to choose slurm directives for multimodel (WIP) --- recipes/recipe_multimodel_seasonal.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index b2442d1f..d2cf1fc7 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -72,7 +72,11 @@ Run: expid: a6wq # replace with your EXPID hpc_user: bsc32762 # replace with your hpc username wallclock: 02:00 # hh:mm - processors_per_job: 4 + wallclock_multimodel: + processors_verification: 4 + processors_multimodel: 16 + custom_directives_verification: ['#SBATCH --exclusive'] + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] platform: nord3v2 email_notifications: yes # enable/disable email notifications. Change it if you want to. email_address: victoria.agudetse@bsc.es # replace with your email address -- GitLab From fa4a1e0cc6fe2dacafae4b015b36116381c8a8f3 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 7 Mar 2024 11:51:52 +0100 Subject: [PATCH 91/91] Enable user to choose specific HPC cluster parameters for multi-model jobs --- recipes/recipe_multimodel_seasonal.yml | 14 ++++++------- tools/check_recipe.R | 27 +++++++++++++++++++++++++- tools/write_autosubmit_conf.R | 7 +++---- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/recipes/recipe_multimodel_seasonal.yml b/recipes/recipe_multimodel_seasonal.yml index d2cf1fc7..f974fc48 100644 --- a/recipes/recipe_multimodel_seasonal.yml +++ b/recipes/recipe_multimodel_seasonal.yml @@ -6,7 +6,6 @@ Analysis: Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - {name: tas, freq: monthly_mean, units: C} - - {name: prlr, freq: monthly_mean, units: mm, flux: no} Datasets: System: - {name: ECMWF-SEAS5.1} @@ -21,9 +20,8 @@ Analysis: Time: sdate: - '0101' ## MMDD - - '0601' fcst_year: '2023' # Optional, int: Forecast year 'YYYY' - hcst_start: '2007' # Mandatory, int: Hindcast start year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 2 # Mandatory, int: Last leadtime time step in months @@ -54,7 +52,7 @@ Analysis: plots: skill_metrics forecast_ensemble_mean most_likely_terciles multi_panel: no dots: both - ncores: 4 # Optional, int: number of cores, defaults to 1 + ncores: 16 # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: scorecards # scorecards logo: yes @@ -71,11 +69,11 @@ Run: script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script expid: a6wq # replace with your EXPID hpc_user: bsc32762 # replace with your hpc username - wallclock: 02:00 # hh:mm - wallclock_multimodel: - processors_verification: 4 + wallclock: 01:00 # hh:mm + wallclock_multimodel: 02:00 + processors_per_job: 4 processors_multimodel: 16 - custom_directives_verification: ['#SBATCH --exclusive'] + custom_directives: ['#SBATCH --exclusive'] custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] platform: nord3v2 email_notifications: yes # enable/disable email notifications. Change it if you want to. diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 4bea7dab..1f1c7dc8 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -700,7 +700,7 @@ check_recipe <- function(recipe) { "email_address", "notify_completed", "notify_failed") # Autosubmit false by default if (is.null(recipe$Run$autosubmit)) { - recipe$Run$autosubmit <- F + recipe$Run$autosubmit <- FALSE } # Autosubmit configuration checks if (recipe$Run$autosubmit) { @@ -762,6 +762,31 @@ check_recipe <- function(recipe) { "Please check your hpc_user ID. It should look like: 'bsc32xxx'") error_status <- TRUE } + # Multimodel-specific parameters + if (multimodel) { + if (is.null(recipe$Run$auto_conf$wallclock_multimodel)) { + warn(recipe$Run$logger, + paste("No parameter 'wallclock_multimodel' specified, 'wallclock'", + "will be used.")) + recipe$Run$auto_conf$wallclock_multimodel <- recipe$Run$auto_conf$wallclock + } + if (is.null(recipe$Run$auto_conf$custom_directives_multimodel) && + !is.null(recipe$Run$auto_conf$custom_directives_multimodel)) { + warn(recipe$Run$logger, + paste("No 'custom_directives_multimodel' specified, the", + "single-model verification custom directives will be used.")) + recipe$Run$auto_conf$custom_directives_multimodel <- + recipe$Run$auto_conf$custom_directives + } + if (is.null(recipe$Run$auto_conf$processors_multimodel) && + !is.null(recipe$Run$auto_conf$processors_per_job)) { + warn(recipe$Run$logger, + paste("No 'processors_multimodel' specified, the", + "'processors_per_job' parameters will be used.")) + recipe$Run$auto_conf$custom_directives_multimodel <- + recipe$Run$auto_conf$custom_directives + } + } } # --------------------------------------------------------------------- diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index d80519bc..5f398197 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -106,9 +106,9 @@ write_autosubmit_conf <- function(recipe, nchunks, "FAILED") } - conf$JOBS$multimodel$PROCESSORS <- recipe$Run$auto_conf$processors_per_job - conf$JOBS$multimodel$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives - conf$JOBS$multimodel$WALLCLOCK <- recipe$Run$auto_conf$wallclock + conf$JOBS$multimodel$PROCESSORS <- recipe$Run$auto_conf$processors_multimodel + conf$JOBS$multimodel$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives_multimodel + conf$JOBS$multimodel$WALLCLOCK <- recipe$Run$auto_conf$wallclock_multimodel } # Only include Scorecards job if section exists in the recipe and # is set to 'execute: True' @@ -138,7 +138,6 @@ write_autosubmit_conf <- function(recipe, nchunks, conf$common$RECIPE <- paste0(recipe$name, ".yml") } # Write config file inside autosubmit dir - ## TODO: Change write.type depending on autosubmit version write.config(conf, paste0(dest_dir, dest_file), write.type = auto_specs$conf_format) Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) -- GitLab