From a31aefe51ebffc99a20987daea81d599cadcf4da Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 25 Aug 2022 11:46:55 +0200 Subject: [PATCH 1/5] Add -9.e+33 as missing value for skill metrics to handle NAs --- .../testing_recipes/recipe_era5land.yml | 46 +++++++++++++++++++ modules/Saving/Saving.R | 5 +- modules/Skill/Skill.R | 2 +- modules/test_victoria.R | 2 +- 4 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_era5land.yml diff --git a/modules/Loading/testing_recipes/recipe_era5land.yml b/modules/Loading/testing_recipes/recipe_era5land.yml new file mode 100644 index 00000000..55da61e5 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_era5land.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5land + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] #, [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 2673eeb6..e0722e9a 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -412,6 +412,8 @@ save_metrics <- function(skill, for (i in 1:length(skill)) { metric <- names(skill[i]) long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val if (tolower(agg) == "country") { sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') @@ -421,7 +423,8 @@ save_metrics <- function(skill, } metadata <- list(metric = list(name = metric, standard_name = sdname, - long_name = long_name)) + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 7c6820da..23c4ac52 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -182,7 +182,7 @@ compute_probabilities <- function(data, recipe) { ncores <- recipe$Analysis$ncores } if (is.null(recipe$Analysis$remove_NAs)) { - na.rm = F + na.rm = T } else { na.rm = recipe$Analysis$remove_NAs } diff --git a/modules/test_victoria.R b/modules/test_victoria.R index e1a1531d..df425659 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -4,7 +4,7 @@ source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_era5land.yml" recipe <- read_yaml(recipe_file) # Load datasets -- GitLab From bc3c0487b13cd4b2918de848d4db1e4dab03d04f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 25 Aug 2022 12:44:53 +0200 Subject: [PATCH 2/5] Change subsetting to Subset() for safer handling of dimensions --- modules/Skill/Skill.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 23c4ac52..db0365ad 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -195,12 +195,13 @@ compute_probabilities <- function(data, recipe) { } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { thresholds <- sapply(element, function (x) eval(parse(text = x))) - probs <- Compute_probs(data$data, thresholds, + probs <- Compute_probs(data1$data, thresholds, ncores = ncores, na.rm = na.rm) for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { named_quantiles <- append(named_quantiles, - list(probs$quantiles[i, , , , , , ,])) + list(ClimProjDiags::Subset(probs$quantiles, + 'bin', i))) names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", as.integer(thresholds[i]*100)) } @@ -213,12 +214,17 @@ compute_probabilities <- function(data, recipe) { name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", as.integer(thresholds[i]*100)) } - named_probs <- append(named_probs, list(probs$probs[i, , , , , , , ,])) + named_probs <- append(named_probs, + list(ClimProjDiags::Subset(probs$probs, + 'bin', i))) names(named_probs)[length(named_probs)] <- name_i } # remove(probs) } } + named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) + named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) + print("##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") return(list(probs=named_probs, percentiles=named_quantiles)) } -- GitLab From 5d23347051762dccf0ec4fd43b63ed8fd0491bd3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 25 Aug 2022 12:51:45 +0200 Subject: [PATCH 3/5] fix typo! --- modules/Skill/Skill.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index db0365ad..d6b959c8 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -195,7 +195,7 @@ compute_probabilities <- function(data, recipe) { } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { thresholds <- sapply(element, function (x) eval(parse(text = x))) - probs <- Compute_probs(data1$data, thresholds, + probs <- Compute_probs(data$data, thresholds, ncores = ncores, na.rm = na.rm) for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { -- GitLab From 500367a27553ce4bc135705abe375279618ead46 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 25 Aug 2022 14:38:22 +0200 Subject: [PATCH 4/5] In save_probabilities(), drop selected dimension instead of all dims --- modules/Saving/Saving.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index e0722e9a..bda3bc27 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -735,13 +735,7 @@ save_probabilities <- function(probs, syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time] for (i in syears) { # Select year from array and rearrange dimensions - probs_syear <- lapply(probs, ClimProjDiags::Subset,'syear', i, drop = T) - - # Restore time dimension if the arrays are missing it - if (!("time" %in% names(dim(probs_syear[[1]])))) { - probs_syear <- lapply(probs_syear, function(x) { - dim(x) <- c("time" = 1, dim(x))}) - } + probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') if (tolower(agg) == "global") { probs_syear <- lapply(probs_syear, function(x) { Reorder(x, c(lalo, 'time'))}) -- GitLab From 37d84cc34734ac0ad673f58096c1221aff79ce93 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 25 Aug 2022 15:38:05 +0200 Subject: [PATCH 5/5] Handle missing values in save_corr(), fix bug where script failed if corr was the only metric --- modules/Saving/Saving.R | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index bda3bc27..03288a8e 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -29,12 +29,25 @@ save_data <- function(recipe, data, save_observations(data$obs, recipe, dict, outdir, archive = archive) } + # Separate ensemble correlation from the rest of the metrics, as it has one + # extra dimension "ensemble" and must be saved to a different file + if ("corr" %in% names(skill_metrics)) { + corr_metric_names <- grep("^corr", names(skill_metrics)) + corr_metrics <- skill_metrics[corr_metric_names] + skill_metrics <- skill_metrics[-corr_metric_names] + if (length(skill_metrics) == 0) { + skill_metrics <- NULL + } + } else { + corr_metrics <- NULL + } + # Export skill metrics onto outfile if (!is.null(skill_metrics)) { save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, archive = archive) - if ("corr" %in% names(skill_metrics)) { - save_corr(skill_metrics, recipe, dict, data$hcst, outdir, archive = archive) - } + } + if (!is.null(corr_metrics)) { + save_corr(corr_metrics, recipe, dict, data$hcst, outdir, archive = archive) } # Export probabilities onto outfile @@ -390,13 +403,6 @@ save_metrics <- function(skill, # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. - # Remove ensemble correlation from the list since it should be saved in - # a separate file, as it has 'ensemble' dim. - if ("corr" %in% names(skill)) { - corr_metrics <- grep("^corr", names(skill)) - skill <- skill[-corr_metrics] - } - # Define grid dimensions and names lalo <- c('longitude', 'latitude') # Remove singleton dimensions and rearrange lon, lat and time dims @@ -500,12 +506,6 @@ save_corr <- function(skill, # This function adds metadata to the ensemble correlation in 'skill' # and exports it to a netCDF file inside 'outdir'. - # Select ensemble correlation from the list of metrics - if ("corr" %in% names(skill)) { - corr_metrics <- grep("^corr", names(skill)) - skill <- skill[corr_metrics] - } - # Define grid dimensions and names lalo <- c('longitude', 'latitude') # Remove singleton dimensions and rearrange lon, lat and time dims @@ -521,6 +521,8 @@ save_corr <- function(skill, for (i in 1:length(skill)) { metric <- names(skill[i]) long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val if (tolower(agg) == "country") { sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'ensemble', 'time') @@ -530,7 +532,8 @@ save_corr <- function(skill, } metadata <- list(metric = list(name = metric, standard_name = sdname, - long_name = long_name)) + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } -- GitLab