From ba8094fac03813854aee38354428c8ea345b3f20 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 4 Nov 2022 11:49:45 +0100 Subject: [PATCH 01/80] Add parameters to compute anomalies and climatology upon loading --- modules/Loading/Loading.R | 68 ++++++++++++++++++- .../testing_recipes/recipe_system7c3s-tas.yml | 6 +- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 8d54d63d..f6403881 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -4,7 +4,7 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") source("tools/libs.R") - +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CST_Anomaly/R/CST_Anomaly.R") load_datasets <- function(recipe) { @@ -343,6 +343,69 @@ load_datasets <- function(recipe) { } } + # Compute anomalies if requested + if (recipe$Analysis$Variables$anomaly) { + if (recipe$Analysis$Variables$anomaly_cross_validation) { + cross <- TRUE + cross_msg <- "with" + } else { + cross <- FALSE + cross_msg <- "without" + } + anom <- CST_Anomaly(hcst, obs, + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time') + + anom$exp$data <- Reorder(anom$exp$data, names(dim(hcst$data))) + anom$obs$data <- Reorder(anom$obs$data, names(dim(hcst$data))) + + hcst_clim <- hcst + obs_clim <- obs + ## Maybe just use s2dv::Clim() here? + clim <- s2dv::Clim(hcst$data, + obs$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = TRUE, + memb_dim = "ensemble", + ftime_dim = "time") + hcst_clim$data <- InsertDim(clim$clim_exp, + posdim = 1, lendim = 1, name = "syear") + hcst_clim$data <- Reorder(hcst_clim$data, names(dim(hcst$data))) + obs_clim$data <- InsertDim(clim$clim_obs, + posdim = 1, lendim = 1, name = "syear") + obs_clim$data <- Reorder(obs_clim$data, names(dim(hcst$data))) + + # Replace + hcst <- anom$exp + obs <- anom$obs + remove(anom, clim) + + ## TODO: Compute forecast anomaly field + if (!is.null(fcst)) { + warn(recipe$Run$logger, + "fcst anomalies are a work in progress...") + mean_hcst_clim <- MeanDims(hcst_clim$data, dims = "ensemble", drop = T) + dims <- dim(mean_hcst_clim) + mean_hcst_clim <- rep(mean_hcst_clim, fcst.nmember) + dim(mean_hcst_clim) <- c(dims, + ensemble = fcst.nmember) + + fcst$data <- fcst$data - mean_hcst_clim + } + info(recipe$Run$logger, + "The anomalies have been computed, ", cross_msg, " cross-validation. + The climatologies are returned in $hcst_clim and $obs_clim.") + + } else { + hcst_clim <- NULL + obs_clim <- NULL + } + # Print a summary of the loaded data for the user, for each object if (recipe$Run$logger$threshold <= 2) { data_summary(hcst, recipe) @@ -403,6 +466,7 @@ load_datasets <- function(recipe) { ############################################################################ ############################################################################ - return(list(hcst = hcst, fcst = fcst, obs = obs)) + return(list(hcst = hcst, fcst = fcst, obs = obs, + hcst_clim = hcst_clim, obs_clim = obs_clim)) } diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml index f83f91bb..0a54682b 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml @@ -6,6 +6,8 @@ Analysis: Variables: name: tas freq: monthly_mean + anomaly: no # yes/no, default yes + anomaly_cross_validation: # yes/no, default yes Datasets: System: name: system7c3s @@ -16,9 +18,9 @@ Analysis: sdate: '1101' fcst_year: '2020' hcst_start: '1993' - hcst_end: '2016' + hcst_end: '1996' ftime_min: 1 - ftime_max: 6 + ftime_max: 2 Region: latmin: -10 latmax: 10 -- GitLab From ad4481b9d9010844db7359b4a86ce906b0c0af7d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 4 Nov 2022 15:19:33 +0100 Subject: [PATCH 02/80] Fix conflict with logger package --- tools/prepare_outputs.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 8a683178..da5d2562 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -60,14 +60,14 @@ prepare_outputs <- function(recipe_file) { recipe$Run$Terminal <- TRUE } if (recipe$Run$Terminal) { - logger <- logger(threshold = recipe$Run$Loglevel, - appenders = list(console_appender(layout = default_log_layout()), - file_appender(logfile, append = TRUE, - layout = default_log_layout()))) + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) } else { - logger <- logger(threshold = recipe$Run$Loglevel, - appenders = list(file_appende(logfile, append = TRUE, - layout = default_log_layout()))) + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(file_appende(logfile, append = TRUE, + layout = default_log_layout()))) } recipe$Run$output_dir <- file.path(output_dir, folder_name) -- GitLab From 79291de17262288f7f7f263d413734707ef971f8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 7 Nov 2022 15:35:48 +0100 Subject: [PATCH 03/80] Move anomaly computation to Anomalies module --- modules/Anomalies/Anomalies.R | 81 +++++++++++++++++++++++++++++++++++ modules/Loading/Loading.R | 65 +--------------------------- 2 files changed, 82 insertions(+), 64 deletions(-) create mode 100644 modules/Anomalies/Anomalies.R diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R new file mode 100644 index 00000000..2b603390 --- /dev/null +++ b/modules/Anomalies/Anomalies.R @@ -0,0 +1,81 @@ +# Compute the hcst, obs and fcst anomalies with or without cross-validation +# and return them, along with the hcst and obs climatologies. + +compute_anomalies <- function(recipe, data) { + + if (recipe$Analysis$Workflow$Anomalies$compute) { + if (recipe$Analysis$Workflow$Anomalies$cross_validation) { + cross <- TRUE + cross_msg <- "with" + } else { + cross <- FALSE + cross_msg <- "without" + } + original_dims <- dim(data$hcst$data) + anom <- CST_Anomaly(data$hcst, data$obs, + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time') + + anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) + anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + + clim_hcst <- hcst + clim_obs <- obs + ## Maybe just use s2dv::Clim() here? + clim <- s2dv::Clim(data$hcst$data, data$obs$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = TRUE, + memb_dim = "ensemble", + ftime_dim = "time") + clim_hcst$data <- InsertDim(clim$clim_exp, + posdim = 1, lendim = 1, name = "syear") + clim_hcst$data <- Reorder(clim_hcst$data, names(original_dims)) + clim_obs$data <- InsertDim(clim$clim_obs, + posdim = 1, lendim = 1, name = "syear") + clim_obs$data <- Reorder(clim_obs$data, names(original_dims)) + + hcst <- anom$exp + obs <- anom$obs + remove(anom, clim) + + ## TODO: Compute forecast anomaly field + if (!is.null(data$fcst)) { + warn(recipe$Run$logger, + "fcst anomalies are a work in progress...") + mean_clim_hcst <- MeanDims(clim_hcst$data, dims = "ensemble", drop = T) + dims <- dim(mean_clim_hcst) + mean_clim_hcst <- rep(mean_clim_hcst, dim(data$fcst)[['ensemble']]) + dim(mean_clim_hcst) <- c(dims, + ensemble = dim(data$fcst)[['ensemble']]) + fcst$data <- fcst$data - mean_clim_hcst + info(recipe$Run$logger, + "The anomalies have been computed, ", cross_msg, " cross-validation. + The climatologies are returned in $clim_hcst and $clim_obs.") + } + + info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + + } else { + warn(recipe$Run$logger, "The Anomalies module has been called, but + recipe parameter Analysis:Variables:anomaly is set to FALSE. + The full field will be returned.") + clim_hcst <- NULL + clim_obs <- NULL + info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") + } + + return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, + clim_hcst = clim_hcst, clim_obs = clim_obs)) + +} + + + + + + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index f6403881..d0c17944 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -344,68 +344,6 @@ load_datasets <- function(recipe) { } # Compute anomalies if requested - if (recipe$Analysis$Variables$anomaly) { - if (recipe$Analysis$Variables$anomaly_cross_validation) { - cross <- TRUE - cross_msg <- "with" - } else { - cross <- FALSE - cross_msg <- "without" - } - anom <- CST_Anomaly(hcst, obs, - cross = cross, - memb = TRUE, - memb_dim = 'ensemble', - dim_anom = 'syear', - dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time') - - anom$exp$data <- Reorder(anom$exp$data, names(dim(hcst$data))) - anom$obs$data <- Reorder(anom$obs$data, names(dim(hcst$data))) - - hcst_clim <- hcst - obs_clim <- obs - ## Maybe just use s2dv::Clim() here? - clim <- s2dv::Clim(hcst$data, - obs$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = TRUE, - memb_dim = "ensemble", - ftime_dim = "time") - hcst_clim$data <- InsertDim(clim$clim_exp, - posdim = 1, lendim = 1, name = "syear") - hcst_clim$data <- Reorder(hcst_clim$data, names(dim(hcst$data))) - obs_clim$data <- InsertDim(clim$clim_obs, - posdim = 1, lendim = 1, name = "syear") - obs_clim$data <- Reorder(obs_clim$data, names(dim(hcst$data))) - - # Replace - hcst <- anom$exp - obs <- anom$obs - remove(anom, clim) - - ## TODO: Compute forecast anomaly field - if (!is.null(fcst)) { - warn(recipe$Run$logger, - "fcst anomalies are a work in progress...") - mean_hcst_clim <- MeanDims(hcst_clim$data, dims = "ensemble", drop = T) - dims <- dim(mean_hcst_clim) - mean_hcst_clim <- rep(mean_hcst_clim, fcst.nmember) - dim(mean_hcst_clim) <- c(dims, - ensemble = fcst.nmember) - - fcst$data <- fcst$data - mean_hcst_clim - } - info(recipe$Run$logger, - "The anomalies have been computed, ", cross_msg, " cross-validation. - The climatologies are returned in $hcst_clim and $obs_clim.") - - } else { - hcst_clim <- NULL - obs_clim <- NULL - } - # Print a summary of the loaded data for the user, for each object if (recipe$Run$logger$threshold <= 2) { data_summary(hcst, recipe) @@ -466,7 +404,6 @@ load_datasets <- function(recipe) { ############################################################################ ############################################################################ - return(list(hcst = hcst, fcst = fcst, obs = obs, - hcst_clim = hcst_clim, obs_clim = obs_clim)) + return(list(hcst = hcst, fcst = fcst, obs = obs)) } -- GitLab From 98192e55f079bdc15f0a77d9b7debcc849d95125 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 8 Nov 2022 11:26:58 +0100 Subject: [PATCH 04/80] Fix inconsistent object names in compute_anomalies(), adapt sample recipe and add TODOs --- modules/Anomalies/Anomalies.R | 16 +- modules/Anomalies/tmp/CST_Anomaly.R | 241 ++++++++++++++++++ modules/Loading/Loading.R | 2 +- .../testing_recipes/recipe_system7c3s-tas.yml | 7 +- modules/test_seasonal.R | 4 + 5 files changed, 259 insertions(+), 11 deletions(-) create mode 100644 modules/Anomalies/tmp/CST_Anomaly.R diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 2b603390..aadeca10 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -1,3 +1,5 @@ +source("modules/Anomalies/tmp/CST_Anomaly.R") + # Compute the hcst, obs and fcst anomalies with or without cross-validation # and return them, along with the hcst and obs climatologies. @@ -23,8 +25,8 @@ compute_anomalies <- function(recipe, data) { anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) - clim_hcst <- hcst - clim_obs <- obs + clim_hcst <- data$hcst + clim_obs <- data$obs ## Maybe just use s2dv::Clim() here? clim <- s2dv::Clim(data$hcst$data, data$obs$data, time_dim = "syear", @@ -39,8 +41,8 @@ compute_anomalies <- function(recipe, data) { posdim = 1, lendim = 1, name = "syear") clim_obs$data <- Reorder(clim_obs$data, names(original_dims)) - hcst <- anom$exp - obs <- anom$obs + data$hcst <- anom$exp + data$obs <- anom$obs remove(anom, clim) ## TODO: Compute forecast anomaly field @@ -49,10 +51,10 @@ compute_anomalies <- function(recipe, data) { "fcst anomalies are a work in progress...") mean_clim_hcst <- MeanDims(clim_hcst$data, dims = "ensemble", drop = T) dims <- dim(mean_clim_hcst) - mean_clim_hcst <- rep(mean_clim_hcst, dim(data$fcst)[['ensemble']]) + mean_clim_hcst <- rep(mean_clim_hcst, dim(data$fcst$data)[['ensemble']]) dim(mean_clim_hcst) <- c(dims, - ensemble = dim(data$fcst)[['ensemble']]) - fcst$data <- fcst$data - mean_clim_hcst + ensemble = dim(data$fcst$data)[['ensemble']]) + data$fcst$data <- data$fcst$data - mean_clim_hcst info(recipe$Run$logger, "The anomalies have been computed, ", cross_msg, " cross-validation. The climatologies are returned in $clim_hcst and $clim_obs.") diff --git a/modules/Anomalies/tmp/CST_Anomaly.R b/modules/Anomalies/tmp/CST_Anomaly.R new file mode 100644 index 00000000..a84b6fc8 --- /dev/null +++ b/modules/Anomalies/tmp/CST_Anomaly.R @@ -0,0 +1,241 @@ +#'Anomalies relative to a climatology along selected dimension with or without cross-validation +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#'@author Pena Jesus, \email{jesus.pena@bsc.es} +#'@description This function computes the anomalies relative to a climatology +#'computed along the selected dimension (usually starting dates or forecast +#'time) allowing the application or not of crossvalidated climatologies. The +#'computation is carried out independently for experimental and observational +#'data products. +#' +#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the seasonal forecast experiment data in the element +#' named \code{$data}. +#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the observed data in the element named \code{$data}. +#'@param dim_anom A character string indicating the name of the dimension +#' along which the climatology will be computed. The default value is 'sdate'. +#'@param cross A logical value indicating whether cross-validation should be +#' applied or not. Default = FALSE. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is 'member'. +#'@param memb A logical value indicating whether to subtract the climatology +#' based on the individual members (TRUE) or the ensemble mean over all +#' members (FALSE) when calculating the anomalies. The default value is TRUE. +#'@param dat_dim A character vector indicating the name of the dataset and +#' member dimensions. If there is no dataset dimension, it can be NULL. +#' The default value is "c('dataset', 'member')". +#'@param filter_span A numeric value indicating the degree of smoothing. This +#' option is only available if parameter \code{cross} is set to FALSE. +#'@param ftime_dim A character string indicating the name of the temporal +#' dimension where the smoothing with 'filter_span' will be applied. It cannot +#' be NULL if 'filter_span' is provided. The default value is 'ftime'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. It will be used only when +#' 'filter_span' is not NULL. +#' +#'@return A list with two S3 objects, 'exp' and 'obs', of the class +#''s2dv_cube', containing experimental and date-corresponding observational +#'anomalies, respectively. These 's2dv_cube's can be ingested by other functions +#'in CSTools. +#' +#'@examples +#'# Example 1: +#'mod <- 1 : (2 * 3 * 4 * 5 * 6 * 7) +#'dim(mod) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'obs <- 1 : (1 * 1 * 4 * 5 * 6 * 7) +#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'lon <- seq(0, 30, 5) +#'lat <- seq(0, 25, 5) +#'exp <- list(data = mod, lat = lat, lon = lon) +#'obs <- list(data = obs, lat = lat, lon = lon) +#'attr(exp, 'class') <- 's2dv_cube' +#'attr(obs, 'class') <- 's2dv_cube' +#' +#'anom1 <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) +#'anom2 <- CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb = TRUE) +#'anom3 <- CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb = FALSE) +#'anom4 <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = FALSE) +#'anom5 <- CST_Anomaly(lonlat_temp$exp) +#'anom6 <- CST_Anomaly(obs = lonlat_temp$obs) +#' +#'@seealso \code{\link[s2dv]{Ano_CrossValid}}, \code{\link[s2dv]{Clim}} and \code{\link{CST_Load}} +#' +#'@import multiApply +#'@importFrom s2dv InsertDim Clim Ano_CrossValid Reorder +#'@export +CST_Anomaly <- function(exp = NULL, obs = NULL, dim_anom = 'sdate', cross = FALSE, + memb_dim = 'member', memb = TRUE, dat_dim = c('dataset', 'member'), + filter_span = NULL, ftime_dim = 'ftime', ncores = NULL) { + # s2dv_cube + if (!inherits(exp, 's2dv_cube') & !is.null(exp) || + !inherits(obs, 's2dv_cube') & !is.null(obs)) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + # exp and obs + if (is.null(exp$data) & is.null(obs$data)) { + stop("One of the parameter 'exp' or 'obs' cannot be NULL.") + } + case_exp = case_obs = 0 + if (is.null(exp)) { + exp <- obs + case_obs = 1 + warning("Parameter 'exp' is not provided and 'obs' will be used instead.") + } + if (is.null(obs)) { + obs <- exp + case_exp = 1 + warning("Parameter 'obs' is not provided and 'exp' will be used instead.") + } + if(any(is.null(names(dim(exp$data))))| any(nchar(names(dim(exp$data))) == 0) | + any(is.null(names(dim(obs$data))))| any(nchar(names(dim(obs$data))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names in element 'data'.") + } + if(!all(names(dim(exp$data)) %in% names(dim(obs$data))) | + !all(names(dim(obs$data)) %in% names(dim(exp$data)))) { + stop("Parameter 'exp' and 'obs' must have same dimension names in element 'data'.") + } + dim_exp <- dim(exp$data) + dim_obs <- dim(obs$data) + dimnames_data <- names(dim_exp) + # dim_anom + if (is.numeric(dim_anom) & length(dim_anom) == 1) { + warning("Parameter 'dim_anom' must be a character string and a numeric value will not be ", + "accepted in the next release. The corresponding dimension name is assigned.") + dim_anom <- dimnames_data[dim_anom] + } + if (!is.character(dim_anom)) { + stop("Parameter 'dim_anom' must be a character string.") + } + if (!dim_anom %in% names(dim_exp) | !dim_anom %in% names(dim_obs)) { + stop("Parameter 'dim_anom' is not found in 'exp' or in 'obs' dimension in element 'data'.") + } + if (dim_exp[dim_anom] <= 1 | dim_obs[dim_anom] <= 1) { + stop("The length of dimension 'dim_anom' in label 'data' of the parameter ", + "'exp' and 'obs' must be greater than 1.") + } + # cross + if (!is.logical(cross) | !is.logical(memb) ) { + stop("Parameters 'cross' and 'memb' must be logical.") + } + if (length(cross) > 1 | length(memb) > 1 ) { + cross <- cross[1] + warning("Parameter 'cross' has length greater than 1 and only the first element", + "will be used.") + } + # memb + if (length(memb) > 1) { + memb <- memb[1] + warning("Parameter 'memb' has length greater than 1 and only the first element", + "will be used.") + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { + stop("Parameter 'memb_dim' is not found in 'exp' or in 'obs' dimension.") + } + } + # dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + if (!all(dat_dim %in% names(dim_exp)) | !all(dat_dim %in% names(dim_obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension in element 'data'.", + " Set it as NULL if there is no dataset dimension.") + } + } + # filter_span + if (!is.null(filter_span)) { + if (!is.numeric(filter_span)) { + warning("Paramater 'filter_span' is not numeric and any filter", + " is being applied.") + filter_span <- NULL + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + # ftime_dim + if (!is.character(ftime_dim)) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!ftime_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { + stop("Parameter 'ftime_dim' is not found in 'exp' or in 'obs' dimension in element 'data'.") + } + } + + # Computating anomalies + #---------------------- + + # With cross-validation + if (cross) { + ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, time_dim = dim_anom, memb_dim = memb_dim, memb = memb, dat_dim = dat_dim) + + # Without cross-validation + } else { + tmp <- Clim(exp = exp$data, obs = obs$data, time_dim = dim_anom, memb_dim = memb_dim, memb = memb, dat_dim = dat_dim) + if (!is.null(filter_span)) { + tmp$clim_exp <- Apply(tmp$clim_exp, + target_dims = c(ftime_dim), + output_dims = c(ftime_dim), + fun = .Loess, + loess_span = filter_span, + ncores = ncores)$output1 + tmp$clim_obs <- Apply(tmp$clim_obs, + target_dims = c(ftime_dim), + output_dims = c(ftime_dim), + fun = .Loess, + loess_span = filter_span, + ncores = ncores)$output1 + } + if (memb) { + clim_exp <- tmp$clim_exp + clim_obs <- tmp$clim_obs + } else { + clim_exp <- InsertDim(tmp$clim_exp, 1, dim_exp[memb_dim]) + clim_obs <- InsertDim(tmp$clim_obs, 1, dim_obs[memb_dim]) + } + clim_exp <- InsertDim(clim_exp, 1, dim_exp[dim_anom]) + clim_obs <- InsertDim(clim_obs, 1, dim_obs[dim_anom]) + ano <- NULL + + # Permuting back dimensions to original order + clim_exp <- Reorder(clim_exp, dimnames_data) + clim_obs <- Reorder(clim_obs, dimnames_data) + + ano$exp <- exp$data - clim_exp + ano$obs <- obs$data - clim_obs + } + + exp$data <- ano$exp + obs$data <- ano$obs + + # Outputs + # ~~~~~~~~~ + if (case_obs == 1) { + return(obs) + } + else if (case_exp == 1) { + return(exp) + } + else { + return(list(exp = exp, obs = obs)) + } +} + +.Loess <- function(clim, loess_span) { + data <- data.frame(ensmean = clim, day = 1 : length(clim)) + loess_filt <- loess(ensmean ~ day, data, span = loess_span) + output <- predict(loess_filt) + return(output) +} + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index d0c17944..66a53451 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -3,8 +3,8 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") +## TODO: Move to prepare_outputs.R source("tools/libs.R") -source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CST_Anomaly/R/CST_Anomaly.R") load_datasets <- function(recipe) { diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml index 0a54682b..df82c349 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml @@ -6,8 +6,6 @@ Analysis: Variables: name: tas freq: monthly_mean - anomaly: no # yes/no, default yes - anomaly_cross_validation: # yes/no, default yes Datasets: System: name: system7c3s @@ -18,7 +16,7 @@ Analysis: sdate: '1101' fcst_year: '2020' hcst_start: '1993' - hcst_end: '1996' + hcst_end: '2010' ftime_min: 1 ftime_max: 2 Region: @@ -30,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes Calibration: method: mse_min Skill: diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index d8eb5c4e..f42e2900 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,4 +1,5 @@ source("modules/Loading/Loading.R") +source("modules/Anomalies/Anomalies.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") @@ -10,9 +11,12 @@ recipe <- prepare_outputs(recipe_file) # Load datasets data <- load_datasets(recipe) +# Compute anomalies +data <- compute_anomalies(recipe, data) # Calibrate datasets calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics +## TODO: Turn arguments into (recipe, data)? skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, calibrated_data$hcst) -- GitLab From ff21306cc439fdaa04a80557b022417634bc8d15 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 9 Nov 2022 16:25:23 +0100 Subject: [PATCH 05/80] Adapt Skill script to anomalies (WIP) --- modules/Skill/Skill.R | 74 ++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 99f12346..43095102 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -51,12 +51,29 @@ source("modules/Skill/tmp/AbsBiasSS.R") # " running Skill module ", "\n", # " it can call ", metric_fun )) -compute_skill_metrics <- function(recipe, exp, obs) { +compute_skill_metrics <- function(recipe, exp, obs, + clim_exp = NULL, + clim_obs = NULL) { # exp: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml ## TODO: Adapt time_dims to subseasonal case + ## TODO: Add dat_dim + ## TODO: Refine error messages + + if (recipe$Analysis$Workflow$Anomalies$compute) { + if (is.null(clim_exp) || is.null(clim_obs)) { + warn(recipe$Run$logger, "Anomalies have been requested in the recipe, + but the climatologies have not been provided in the + compute_skill_metrics call. Be aware that some metrics like the + Mean Bias may not be correct.") + } + } else { + warn(recipe$Run$logger, "Anomaly computation was not requested in the + recipe. Be aware that some metrics like the CRPSS may not be + correct.") + } time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) @@ -87,22 +104,31 @@ compute_skill_metrics <- function(recipe, exp, obs) { } # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { - skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) + skill <- RPS(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) + skill <- RPSS(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, + skill <- RPSS(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.1, + Fair = Fair, ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) @@ -110,8 +136,11 @@ compute_skill_metrics <- function(recipe, exp, obs) { skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, + skill <- RPSS(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.9, + Fair = Fair, ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) @@ -119,28 +148,38 @@ compute_skill_metrics <- function(recipe, exp, obs) { skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # CRPS and FCRPS } else if (metric %in% c('crps', 'fcrps')) { - skill <- CRPS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- CRPS(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { - skill <- CRPSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- CRPSS(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Mean bias (climatology) } else if (metric == 'mean_bias') { - skill <- Bias(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, ncores = ncores) + skill <- Bias(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Mean bias skill score } else if (metric == 'mean_bias_ss') { - skill <- AbsBiasSS(exp$data, obs$data, time_dim = time_dim, - memb_dim = memb_dim, ncores = ncores) + skill <- AbsBiasSS(exp$data, obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$biasSS @@ -184,7 +223,6 @@ compute_skill_metrics <- function(recipe, exp, obs) { metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', 'rpss'))) { - ## TODO: Test this scenario warn(recipe$Run$logger, "Some of the requested metrics are not available.") } -- GitLab From 2be03d8ed5b7a241ac8303555881022b7991f4b9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 15 Nov 2022 09:41:18 +0100 Subject: [PATCH 06/80] Improve SpecsVerification metric not available warning --- modules/Skill/Skill.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 43095102..3185bbf6 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -224,7 +224,7 @@ compute_skill_metrics <- function(recipe, exp, obs, if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', 'rpss'))) { warn(recipe$Run$logger, - "Some of the requested metrics are not available.") + "Some of the requested SpecsVerification metrics are not available.") } capture.output( skill <- Compute_verif_metrics(exp$data, obs$data, -- GitLab From 025d5b94ff4fb8166caba631bf34df61daea3b22 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 17 Nov 2022 14:34:30 +0100 Subject: [PATCH 07/80] Rethink anomaly module returns, adapt parameters in other modules --- modules/Anomalies/Anomalies.R | 40 +++---- modules/Calibration/Calibration.R | 2 +- .../testing_recipes/recipe_test_anomalies.yml | 49 +++++++++ modules/Saving/Saving.R | 14 +-- modules/Skill/Skill.R | 100 +++++++++++------- modules/Visualization/Visualization.R | 12 ++- modules/test_seasonal.R | 11 +- 7 files changed, 153 insertions(+), 75 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_test_anomalies.yml diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index aadeca10..3e24f3fe 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -25,21 +25,22 @@ compute_anomalies <- function(recipe, data) { anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) - clim_hcst <- data$hcst - clim_obs <- data$obs + hcst_fullvalue <- data$hcst + obs_fullvalue <- data$obs + ## Maybe just use s2dv::Clim() here? clim <- s2dv::Clim(data$hcst$data, data$obs$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = TRUE, - memb_dim = "ensemble", - ftime_dim = "time") - clim_hcst$data <- InsertDim(clim$clim_exp, - posdim = 1, lendim = 1, name = "syear") - clim_hcst$data <- Reorder(clim_hcst$data, names(original_dims)) - clim_obs$data <- InsertDim(clim$clim_obs, - posdim = 1, lendim = 1, name = "syear") - clim_obs$data <- Reorder(clim_obs$data, names(original_dims)) + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = TRUE, + memb_dim = "ensemble", + ftime_dim = "time") + clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, + name = "syear") + clim_hcst <- Reorder(clim_hcst, names(original_dims)) + # clim_obs$data <- InsertDim(clim$clim_obs, + # posdim = 1, lendim = 1, name = "syear") + # clim_obs$data <- Reorder(clim_obs$data, names(original_dims)) data$hcst <- anom$exp data$obs <- anom$obs @@ -49,7 +50,7 @@ compute_anomalies <- function(recipe, data) { if (!is.null(data$fcst)) { warn(recipe$Run$logger, "fcst anomalies are a work in progress...") - mean_clim_hcst <- MeanDims(clim_hcst$data, dims = "ensemble", drop = T) + mean_clim_hcst <- MeanDims(clim_hcst, dims = "ensemble", drop = T) dims <- dim(mean_clim_hcst) mean_clim_hcst <- rep(mean_clim_hcst, dim(data$fcst$data)[['ensemble']]) dim(mean_clim_hcst) <- c(dims, @@ -57,7 +58,8 @@ compute_anomalies <- function(recipe, data) { data$fcst$data <- data$fcst$data - mean_clim_hcst info(recipe$Run$logger, "The anomalies have been computed, ", cross_msg, " cross-validation. - The climatologies are returned in $clim_hcst and $clim_obs.") + The original full fields are returned in $hcst_full.val and + $obs_full.val.") } info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") @@ -65,14 +67,14 @@ compute_anomalies <- function(recipe, data) { } else { warn(recipe$Run$logger, "The Anomalies module has been called, but recipe parameter Analysis:Variables:anomaly is set to FALSE. - The full field will be returned.") - clim_hcst <- NULL - clim_obs <- NULL + The full fields will be returned.") + hcst_fullvalue <- NULL + obs_fullvalue <- NULL info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") } return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, - clim_hcst = clim_hcst, clim_obs = clim_obs)) + hcst_full.val = hcst_fullvalue, obs_full.val = obs_fullvalue)) } diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 59e5451a..0099983b 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -124,5 +124,5 @@ calibrate_datasets <- function(recipe, data) { } } info(recipe$Run$logger, CALIB_MSG) - return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) + return(list(hcst = hcst_calibrated, obs = data$obs, fcst = fcst_calibrated)) } diff --git a/modules/Loading/testing_recipes/recipe_test_anomalies.yml b/modules/Loading/testing_recipes/recipe_test_anomalies.yml new file mode 100644 index 00000000..cdf5e3ca --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test_anomalies.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1999' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: RPS RPSS CRPS CRPSS BSS10 BSS90 EnsCorr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index ed0933f2..97af1dca 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -106,7 +106,6 @@ get_global_attributes <- function(recipe, archive) { get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { # Generates time dimensions and the corresponding metadata. - ## TODO: Add calendar ## TODO: Subseasonal switch(fcst.horizon, @@ -176,6 +175,7 @@ save_forecast <- function(data_cube, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar + # if (fcst.horizon == "seasonal") { # calendar <- attr(data_cube$Variable, "variable")$dim$time$calendar # } else { @@ -186,12 +186,13 @@ save_forecast <- function(data_cube, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users) + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - # Method 2: use initial month + ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month if (type == 'hcst') { -#PROBLEM for fcst!!!!!!!!!!!! + ## PROBLEM for fcst!!!!!!!!!!!! init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), cal = calendar) @@ -322,9 +323,10 @@ save_observations <- function(data_cube, dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { - # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users) + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - # Method 2: use initial month + ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 3185bbf6..3503078a 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -51,29 +51,32 @@ source("modules/Skill/tmp/AbsBiasSS.R") # " running Skill module ", "\n", # " it can call ", metric_fun )) -compute_skill_metrics <- function(recipe, exp, obs, - clim_exp = NULL, - clim_obs = NULL) { - # exp: s2dv_cube containing the hindcast +# compute_skill_metrics <- function(recipe, data$hcst, obs, +# clim_data$hcst = NULL, +# clim_obs = NULL) { +compute_skill_metrics <- function(recipe, data) { + + # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml ## TODO: Adapt time_dims to subseasonal case ## TODO: Add dat_dim ## TODO: Refine error messages + ## TODO: Add check to see if anomalies are provided (info inside s2dv_cube) - if (recipe$Analysis$Workflow$Anomalies$compute) { - if (is.null(clim_exp) || is.null(clim_obs)) { - warn(recipe$Run$logger, "Anomalies have been requested in the recipe, - but the climatologies have not been provided in the - compute_skill_metrics call. Be aware that some metrics like the - Mean Bias may not be correct.") - } - } else { - warn(recipe$Run$logger, "Anomaly computation was not requested in the - recipe. Be aware that some metrics like the CRPSS may not be - correct.") - } +# if (recipe$Analysis$Workflow$Anomalies$compute) { +# if (is.null(clim_data$hcst) || is.null(clim_obs)) { +# warn(recipe$Run$logger, "Anomalies have been requested in the recipe, +# but the climatologies have not been provided in the +# compute_skill_metrics call. Be aware that some metrics like the +# Mean Bias may not be correct.") +# } +# } else { +# warn(recipe$Run$logger, "Anomaly computation was not requested in the +# recipe. Be aware that some metrics, such as the CRPSS may not be +# correct.") +# } time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) @@ -104,7 +107,7 @@ compute_skill_metrics <- function(recipe, exp, obs, } # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { - skill <- RPS(exp$data, obs$data, + skill <- RPS(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, @@ -113,7 +116,7 @@ compute_skill_metrics <- function(recipe, exp, obs, skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { - skill <- RPSS(exp$data, obs$data, + skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, @@ -124,7 +127,7 @@ compute_skill_metrics <- function(recipe, exp, obs, skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { - skill <- RPSS(exp$data, obs$data, + skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.1, @@ -136,7 +139,7 @@ compute_skill_metrics <- function(recipe, exp, obs, skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { - skill <- RPSS(exp$data, obs$data, + skill <- RPSS(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.9, @@ -148,7 +151,7 @@ compute_skill_metrics <- function(recipe, exp, obs, skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # CRPS and FCRPS } else if (metric %in% c('crps', 'fcrps')) { - skill <- CRPS(exp$data, obs$data, + skill <- CRPS(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, @@ -157,7 +160,7 @@ compute_skill_metrics <- function(recipe, exp, obs, skill_metrics[[ metric ]] <- skill # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { - skill <- CRPSS(exp$data, obs$data, + skill <- CRPSS(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, @@ -166,20 +169,38 @@ compute_skill_metrics <- function(recipe, exp, obs, .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign - # Mean bias (climatology) - } else if (metric == 'mean_bias') { - skill <- Bias(exp$data, obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + # Mean bias (climatology) + } else if (metric == 'mean_bias') { + ## TODO: Eliminate option to compute from anomalies + # Compute from full field + if ((!is.null(data$hcst_full.val)) && (!is.null(data$obs_full.val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- Bias(data$hcst_full.val$data, data$obs_full.val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- Bias(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Mean bias skill score } else if (metric == 'mean_bias_ss') { - skill <- AbsBiasSS(exp$data, obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + if ((!is.null(data$hcst_full.val)) && (!is.null(data$obs_full.val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- AbsBiasSS(data$hcst_full.val$data, data$obs_full.val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- AbsBiasSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$biasSS @@ -187,7 +208,7 @@ compute_skill_metrics <- function(recipe, exp, obs, # Ensemble mean correlation } else if (metric %in% c('enscorr', 'corr')) { ## TODO: Implement option for Kendall and Spearman methods? - skill <- s2dv::Corr(exp$data, obs$data, + skill <- s2dv::Corr(data$hcst$data, data$obs$data, dat_dim = 'dat', time_dim = time_dim, method = 'pearson', @@ -202,14 +223,14 @@ compute_skill_metrics <- function(recipe, exp, obs, skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper } else if (metric == 'enssprerr') { # Remove ensemble dim from obs to avoid veriApply warning - obs_noensdim <- ClimProjDiags::Subset(obs$data, "ensemble", 1, + obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, drop = "selected") capture.output( skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = exp$data, + fcst = data$hcst$data, obs = obs_noensdim, - tdim = which(names(dim(exp$data))==time_dim), - ensdim = which(names(dim(exp$data))==memb_dim), + tdim = which(names(dim(data$hcst$data))==time_dim), + ensdim = which(names(dim(data$hcst$data))==memb_dim), na.rm = na.rm, ncpus = ncores) ) @@ -227,7 +248,7 @@ compute_skill_metrics <- function(recipe, exp, obs, "Some of the requested SpecsVerification metrics are not available.") } capture.output( - skill <- Compute_verif_metrics(exp$data, obs$data, + skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, skill_metrics = metric_name, verif.dims=c("syear", "sday", "sweek"), na.rm = na.rm, @@ -246,6 +267,7 @@ compute_skill_metrics <- function(recipe, exp, obs, } compute_probabilities <- function(recipe, data) { + ## TODO: Do hcst and fcst at the same time if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -313,7 +335,7 @@ compute_probabilities <- function(recipe, data) { if (!("time" %in% names(dim(metric_array)))) { dim(metric_array) <- c("time" = 1, dim(metric_array)) } - # If array has memb_exp (Corr case), change name to 'ensemble' + # If array has memb dim (Corr case), change name to 'ensemble' if ("exp_memb" %in% names(dim(metric_array))) { names(dim(metric_array))[which(names(dim(metric_array)) == "exp_memb")] <- "ensemble" diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ff0e9fd4..2ed61890 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -28,8 +28,10 @@ plot_data <- function(recipe, if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && (is.null(data$fcst))) { - stop("The Visualization module has been called, but there is no data ", - "that can be plotted.") + error(recipe$Run$logger, "The Visualization module has been called, + but args 'data', 'calibrated_data' and 'skill_metrics', are all NULL + so there is no data that can be plotted.") + stop() } if (is.null(archive)) { @@ -38,7 +40,7 @@ plot_data <- function(recipe, "conf/archive.yml"))$archive } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(paste0(recipe$Run$code_dir, - "conf/archive_decadal.yml"))$archive + "conf/archive_decadal.yml"))$archive } } @@ -74,7 +76,9 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() } # Abort if skill_metrics is not list if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index f42e2900..ca9d34ee 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,23 +1,22 @@ source("modules/Loading/Loading.R") -source("modules/Anomalies/Anomalies.R") source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_test_anomalies.yml" recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe) -# Compute anomalies -data <- compute_anomalies(recipe, data) # Calibrate datasets calibrated_data <- calibrate_datasets(recipe, data) -# Compute skill metrics +# Compute anomalies +calibrated_data <- compute_anomalies(recipe, calibrated_data) ## TODO: Turn arguments into (recipe, data)? -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, calibrated_data$hcst) # Export all data to netCDF -- GitLab From 126edb974d6925a40a5cdaab21b70f5510e8208f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 18 Nov 2022 16:24:00 +0100 Subject: [PATCH 08/80] Change data_summary() to use logger without sink() --- tools/data_summary.R | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 34b6bd6e..4ff30875 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -19,14 +19,22 @@ data_summary <- function(data_cube, recipe) { # Create log instance and sink output to logfile and terminal info(recipe$Run$logger, "DATA SUMMARY:") - sink(recipe$Run$logfile, append = TRUE, split = TRUE) - print(paste0(object_name, " months: ", months)) - print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) - print(paste0(object_name, " dimensions: ")) - print(dim(data_cube$data)) - print(paste0("Statistical summary of the data in ", object_name, ":")) - print(summary(data_cube$data)) - print("---------------------------------------------") - sink() + # sink(recipe$Run$logfile, append = TRUE, split = TRUE) + info(recipe$Run$logger, paste(object_name, "months:", months)) + info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", + sdate_max)) + info(recipe$Run$logger, paste(object_name, "dimensions:")) + output_string <- capture.output(dim(data_cube$data)) + for (i in output_string) { + info(recipe$Run$logger, i) + } + info(recipe$Run$logger, paste0("Statistical summary of the data in ", + object_name, ":")) + output_string <- capture.output(summary(data_cube$data)) + for (i in output_string) { + info(recipe$Run$logger, i) + } + info(recipe$Run$logger, "---------------------------------------------") + # sink() } -- GitLab From 1c9b24ff781a5c173afe8ff2fa54ba26470a1876 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 21 Nov 2022 16:16:39 +0100 Subject: [PATCH 09/80] List TODOs and new variables --- modules/Saving/paths2save.R | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index f48ebe7b..3e80614e 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -1,10 +1,19 @@ ## TODO: Separate by time aggregation get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { + ## TODO: Add 'recipe' as argument # This function builds the path of the output file based on directory, # variable, forecast date, startdate, aggregation, forecast horizon and # type of metric/forecast/probability. - + + ## if scorecards: + ## scorecards_systemshortname_referenceshortname_variable_hcstsyear-hcstendyear_smm.nc + ## mm == shortdate + ## systemshortname <- get from dictionary or archive + ## referenceshortname <- get from dictionary or archive + ## hcstsyear <- get from recipe + ## hcstendyear <- get from recipe + ## variable <- hmmmmmmmmm get from cube? if (horizon == "subseasonal") { shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%V") dd <- "week" @@ -37,10 +46,19 @@ get_dir <- function(recipe, agg = "global") { # startdate, and aggregation. ## TODO: Get aggregation from recipe - ## TODO: Add time frequency outdir <- paste0(recipe$Run$output_dir, "/outputs/") + ## TODO: mutlivar case variable <- recipe$Analysis$Variables$name + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + ## Needed: - scorecards system shortname (e.g. "ecmwfs5") + ## - variable name + system_name <- recipe$Analysis$Datasets$system$name + + } + + if (!is.null(recipe$Analysis$Time$fcst_year)) { if (tolower(recipe$Analysis$Horizon) == 'decadal') { #PROBLEM: decadal doesn't have sdate -- GitLab From 501cc9380f1f63e28dc67c6bdc759e2aebd185a1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 21 Nov 2022 17:14:57 +0100 Subject: [PATCH 10/80] Fix pipeline --- tests/recipes/recipe-decadal_daily_1.yml | 3 +++ tests/recipes/recipe-decadal_monthly_1.yml | 3 +++ tests/recipes/recipe-decadal_monthly_2.yml | 3 +++ tests/recipes/recipe-decadal_monthly_3.yml | 3 +++ tests/recipes/recipe-seasonal_daily_1.yml | 3 +++ tests/recipes/recipe-seasonal_monthly_1.yml | 3 +++ tests/testthat/test-decadal_monthly_1.R | 4 ++-- tests/testthat/test-decadal_monthly_2.R | 4 ++-- tests/testthat/test-decadal_monthly_3.R | 4 ++-- tests/testthat/test-seasonal_daily.R | 4 ++-- tests/testthat/test-seasonal_monthly.R | 4 ++-- 11 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index 90048009..ab0fb9a6 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross-validation: Calibration: method: qmap Skill: diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index cedb07f0..35b55b1a 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross-validation: Calibration: method: bias Skill: diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 6c33d302..49824f42 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross-validation: Calibration: method: raw Skill: diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml index 87197af6..1e2daa70 100644 --- a/tests/recipes/recipe-decadal_monthly_3.yml +++ b/tests/recipes/recipe-decadal_monthly_3.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross-validation: Calibration: method: 'evmos' Skill: diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index fc1bc58c..637b5371 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -28,6 +28,9 @@ Analysis: method: conservative type: to_system Workflow: + Anomalies: + compute: no + cross-validation: Calibration: method: qmap Skill: diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 0e1b6f4b..e75ccad5 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross-validation: Calibration: method: mse_min Skill: diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 5cf1922e..d78bb322 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -24,7 +24,7 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( probs <- compute_probabilities(recipe, calibrated_data$hcst) @@ -137,7 +137,7 @@ TRUE ) expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( class(calibrated_data$hcst), diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 4dd72ebf..f3f91ec3 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -22,7 +22,7 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressMessages({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( probs <- compute_probabilities(recipe, calibrated_data$hcst) @@ -132,7 +132,7 @@ test_that("2. Calibration", { expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( calibrated_data, diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 7535e8dc..70e98160 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -23,7 +23,7 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( probs <- compute_probabilities(recipe, calibrated_data$hcst) @@ -110,7 +110,7 @@ test_that("2. Calibration", { expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 5b771d77..94955a7b 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -19,7 +19,7 @@ calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) test_that("1. Loading", { @@ -106,7 +106,7 @@ TRUE ) expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( class(calibrated_data$hcst), diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 86feedfb..b53c7291 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -22,7 +22,7 @@ calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( @@ -133,7 +133,7 @@ TRUE ) expect_equal( names(calibrated_data), -c("hcst", "fcst") +c("hcst", "obs", "fcst") ) expect_equal( class(calibrated_data$hcst), -- GitLab From 3fd7736b61472192f7d7b09316a02d23224fa738 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 22 Nov 2022 09:04:58 +0100 Subject: [PATCH 11/80] Provisionally adapt decadal unit test --- tests/testthat/test-decadal_monthly_2.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index f3f91ec3..6549ce0e 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -134,10 +134,11 @@ expect_equal( names(calibrated_data), c("hcst", "obs", "fcst") ) -expect_equal( -calibrated_data, -data[1:2] -) +## TODO: Ask An-Chi about this test +# expect_equal( +# calibrated_data, +# data[1:2] +# ) }) -- GitLab From 98acf1a76d41fb74faf661f5b2c94aca52d08bda Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 23 Nov 2022 12:12:01 +0100 Subject: [PATCH 12/80] Add 'Scorecards' output format option to Saving module --- conf/output_dictionaries/scorecards.yml | 37 ++++++ .../testing_recipes/recipe_seasonal-tests.yml | 46 +++++++ modules/Saving/Saving.R | 30 ++--- modules/Saving/paths2save.R | 124 ++++++++++-------- modules/test_seasonal.R | 2 +- 5 files changed, 166 insertions(+), 73 deletions(-) create mode 100644 conf/output_dictionaries/scorecards.yml create mode 100644 modules/Loading/testing_recipes/recipe_seasonal-tests.yml diff --git a/conf/output_dictionaries/scorecards.yml b/conf/output_dictionaries/scorecards.yml new file mode 100644 index 00000000..4503af0f --- /dev/null +++ b/conf/output_dictionaries/scorecards.yml @@ -0,0 +1,37 @@ +System: + system5c3s: + short_name: "ecmwfs5" + display_name: "ECMWF System 5" + system7c3s: + short_name: "meteofrances7" + display_name: "Meteo-France System 7" + system21_m1: + short_name: "dwds21" + display_name: "DWD System 21" + system35c3s: + short_name: "cmccs35" + display_name: "CMM System 35" + system2c3s: + short_name: "jmas2" + display_name: "JMA System 2" + eccc1: + short_name: "ecccs1" + display_name: "ECCC System 1" + glosea6_system600-c3s: + short_name: "ukmos600" + display_name: "UK Met Office System 600" + ncep-cfsv2: + short_name: "nceps2" + system_name: "NCEP System 2" +Reference: + era5: + short_name: "era5" + display_name: "ERA5" + era5land: + short_name: "era5land" + display_name: "ERA5-Land" + uerra: + short_name: "uerra_mescan" + display_name: "UERRA MESCAN" + + diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml new file mode 100644 index 00000000..e1c3b56d --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '2000' + hcst_end: '2015' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: Scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index ed0933f2..961bacee 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -273,9 +273,8 @@ save_forecast <- function(data_cube, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "exp") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "exp") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -409,9 +408,8 @@ save_observations <- function(data_cube, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "obs") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "obs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -527,9 +525,8 @@ save_metrics <- function(skill, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "skill") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "skill") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -634,9 +631,8 @@ save_corr <- function(skill, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "corr") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "corr") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -734,9 +730,8 @@ save_percentiles <- function(percentiles, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "percentiles") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "percentiles") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -842,9 +837,8 @@ save_probabilities <- function(probs, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "probs") + outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + fcst.sdate, agg, "probs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 3e80614e..2d6353fe 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -1,40 +1,54 @@ ## TODO: Separate by time aggregation -get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { - ## TODO: Add 'recipe' as argument +get_filename <- function(dir, recipe, var, date, agg, file.type) { # This function builds the path of the output file based on directory, # variable, forecast date, startdate, aggregation, forecast horizon and # type of metric/forecast/probability. - ## if scorecards: - ## scorecards_systemshortname_referenceshortname_variable_hcstsyear-hcstendyear_smm.nc - ## mm == shortdate - ## systemshortname <- get from dictionary or archive - ## referenceshortname <- get from dictionary or archive - ## hcstsyear <- get from recipe - ## hcstendyear <- get from recipe - ## variable <- hmmmmmmmmm get from cube? - if (horizon == "subseasonal") { - shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%V") + if (recipe$Analysis$Horizon == "subseasonal") { + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") dd <- "week" } else { - shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%m") + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") dd <- "month" } - + switch(tolower(agg), "country" = {gg <- "-country"}, "global" = {gg <- ""}) - switch(file.type, - "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, - "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, - "exp" = {file <- paste0(var, gg, "_", date)}, - "obs" = {file <- paste0(var, gg, "-obs_", date)}, - "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, - shortdate)}, - "probs" = {file <- paste0(var, gg, "-probs_", date)}, - "bias" = {file <- paste0(var, gg, "-bias_", date)}) + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # Get necessary names + system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + reference <- dict$Reference[[recipe$Analysis$Datasets$Reference$name]]$short_name + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + + switch(file.type, + "skill" = {type_info <- "-skill_"}, + "corr" = {type_info <- "-corr_"}, + "exp" = {type_info <- paste0("_", date, "_")}, + "obs" = {type_info <- paste0("-obs_", date, "_")}, + "percentiles" = {type_info <- "-percentiles_"}, + "probs" = {type_info <- paste0("-probs_", date, "_")}, + "bias" = {type_info <- paste0("-bias_", date, "_")}) + + # Build file name + file <- paste0("scorecards_", system, "_", reference, "_", + var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) + } else { + switch(file.type, + "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, + "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "exp" = {file <- paste0(var, gg, "_", date)}, + "obs" = {file <- paste0(var, gg, "-obs_", date)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, + shortdate)}, + "probs" = {file <- paste0(var, gg, "-probs_", date)}, + "bias" = {file <- paste0(var, gg, "-bias_", date)}) + } return(paste0(dir, file, ".nc")) @@ -48,45 +62,47 @@ get_dir <- function(recipe, agg = "global") { ## TODO: Get aggregation from recipe outdir <- paste0(recipe$Run$output_dir, "/outputs/") - ## TODO: mutlivar case + ## TODO: multivar case variable <- recipe$Analysis$Variables$name if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - ## Needed: - scorecards system shortname (e.g. "ecmwfs5") - ## - variable name - system_name <- recipe$Analysis$Datasets$system$name - - } - + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + dir <- paste0(outdir, "/", system, "/", variable, "/") - if (!is.null(recipe$Analysis$Time$fcst_year)) { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') - } else { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } } else { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate - fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end, sep = '_')) + # Default generic output format based on FOCUS + if (!is.null(recipe$Analysis$Time$fcst_year)) { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + #PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') + } else { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } } else { - fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + #PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$hcst_end, + sep = '_')) + } else { + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + } } - } - - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - store.freq <- recipe$Analysis$Variables$freq - switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, "/", - fcst.sdate, "/")}) - + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + store.freq <- recipe$Analysis$Variables$freq + + switch(tolower(agg), + "country" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, "/", + fcst.sdate, "/")}) + } return(dir) } diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index d8eb5c4e..fba75bfe 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -4,7 +4,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) # archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive -- GitLab From fa67c1d9688855f62945aa75bd5a4319f93b7d95 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 23 Nov 2022 12:23:11 +0100 Subject: [PATCH 13/80] Correct typos --- conf/output_dictionaries/scorecards.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/output_dictionaries/scorecards.yml b/conf/output_dictionaries/scorecards.yml index 4503af0f..c5071987 100644 --- a/conf/output_dictionaries/scorecards.yml +++ b/conf/output_dictionaries/scorecards.yml @@ -10,7 +10,7 @@ System: display_name: "DWD System 21" system35c3s: short_name: "cmccs35" - display_name: "CMM System 35" + display_name: "CMCC System 35" system2c3s: short_name: "jmas2" display_name: "JMA System 2" @@ -22,7 +22,7 @@ System: display_name: "UK Met Office System 600" ncep-cfsv2: short_name: "nceps2" - system_name: "NCEP System 2" + display_name: "NCEP System 2" Reference: era5: short_name: "era5" -- GitLab From ab8813b8dbbefe3401341295caf5988dcf8591c1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 24 Nov 2022 12:42:40 +0100 Subject: [PATCH 14/80] Add 'anomaly' to metadata long name --- modules/Anomalies/Anomalies.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 3e24f3fe..ff86c9e3 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -45,6 +45,9 @@ compute_anomalies <- function(recipe, data) { data$hcst <- anom$exp data$obs <- anom$obs remove(anom, clim) + # Change variable names + data$hcst$Variable$varName <- paste0(data$hcst$Variable$varName, "anomaly") + data$obs$Variable$varName <- paste0(data$obs$Variable$varName, "anomaly") ## TODO: Compute forecast anomaly field if (!is.null(data$fcst)) { @@ -56,6 +59,7 @@ compute_anomalies <- function(recipe, data) { dim(mean_clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) data$fcst$data <- data$fcst$data - mean_clim_hcst + data$fcst$Variable$varName <- paste0(data$fcst$Variable$varName, "anomaly") info(recipe$Run$logger, "The anomalies have been computed, ", cross_msg, " cross-validation. The original full fields are returned in $hcst_full.val and @@ -73,6 +77,8 @@ compute_anomalies <- function(recipe, data) { info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") } + ## TODO: Return fcst full value? + return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, hcst_full.val = hcst_fullvalue, obs_full.val = obs_fullvalue)) -- GitLab From f4059b95c40a63be6afa77cbccba681aa12c19a6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 24 Nov 2022 12:44:28 +0100 Subject: [PATCH 15/80] Add 'anomaly' to metadata and include commented code to change variable name --- modules/Anomalies/Anomalies.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index ff86c9e3..a1d3e0ca 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -45,9 +45,13 @@ compute_anomalies <- function(recipe, data) { data$hcst <- anom$exp data$obs <- anom$obs remove(anom, clim) - # Change variable names - data$hcst$Variable$varName <- paste0(data$hcst$Variable$varName, "anomaly") - data$obs$Variable$varName <- paste0(data$obs$Variable$varName, "anomaly") + # Change variable long names + # data$hcst$Variable$varName <- paste0(data$hcst$Variable$varName, "anomaly") + attr(data$hcst$Variable, "variable")$long_name <- + paste(attr(data$hcst$Variable, "variable")$long_name, "anomaly") + # data$obs$Variable$varName <- paste0(data$obs$Variable$varName, "anomaly") + attr(data$obs$Variable, "variable")$long_name <- + paste(attr(data$obs$Variable, "variable")$long_name, "anomaly") ## TODO: Compute forecast anomaly field if (!is.null(data$fcst)) { @@ -59,7 +63,9 @@ compute_anomalies <- function(recipe, data) { dim(mean_clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) data$fcst$data <- data$fcst$data - mean_clim_hcst - data$fcst$Variable$varName <- paste0(data$fcst$Variable$varName, "anomaly") + # data$fcst$Variable$varName <- paste0(data$fcst$Variable$varName, "anomaly") + attr(data$hcst$Variable, "variable")$long_name <- + paste(attr(data$hcst$Variable, "variable")$long_name, "anomaly") info(recipe$Run$logger, "The anomalies have been computed, ", cross_msg, " cross-validation. The original full fields are returned in $hcst_full.val and -- GitLab From c6c865b6b11a2133968f576e08230e1f7a536ba4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 25 Nov 2022 11:44:09 +0100 Subject: [PATCH 16/80] Add TODOs to deprecated calibrated_data param --- modules/Saving/Saving.R | 2 ++ modules/Visualization/Visualization.R | 2 ++ 2 files changed, 4 insertions(+) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 97af1dca..88ab44cd 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -8,6 +8,8 @@ save_data <- function(recipe, data, probabilities = NULL, archive = NULL) { + ## TODO: Deprecate calibrated_data? + # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 2ed61890..60042f4b 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -14,6 +14,8 @@ plot_data <- function(recipe, archive = NULL, significance = F) { + ## TODO: Depreate calibrated_data + # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() # archive: the auto-s2s archive as read by read_yaml() -- GitLab From 4a2eea3c46352be047f9a6bbe2e21be39efce9fd Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 25 Nov 2022 15:43:24 +0100 Subject: [PATCH 17/80] Modify compute_probabilities() to return fcst probabilities as well --- modules/Saving/Saving.R | 2 ++ modules/Skill/Skill.R | 52 ++++++++++++++++++++++++--- modules/Visualization/Visualization.R | 36 +++++++++---------- modules/test_seasonal.R | 2 +- 4 files changed, 67 insertions(+), 25 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 88ab44cd..58908209 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -82,6 +82,8 @@ save_data <- function(recipe, data, archive = archive) save_probabilities(probabilities$probs, recipe, data$hcst, outdir, archive = archive) + save_probabilities(probabilities$probs_fcst, recipe, data$fcst, outdir, + archive = archive) } } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 3503078a..8930bd17 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -282,7 +282,9 @@ compute_probabilities <- function(recipe, data) { } named_probs <- list() + named_probs_fcst <- list() named_quantiles <- list() + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, "Quantiles and probability bins have been requested, but no thresholds are provided in the recipe.") @@ -291,12 +293,13 @@ compute_probabilities <- function(recipe, data) { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) - quants <- compute_quants(data$data, thresholds, + quants <- compute_quants(data$hcst$data, thresholds, ncores = ncores, na.rm = na.rm) - probs <- compute_probs(data$data, quants, + probs <- compute_probs(data$hcst$data, quants, ncores = ncores, na.rm = na.rm) + for (i in seq(1:dim(quants)['bin'][[1]])) { named_quantiles <- append(named_quantiles, list(ClimProjDiags::Subset(quants, @@ -318,13 +321,52 @@ compute_probabilities <- function(recipe, data) { 'bin', i))) names(named_probs)[length(named_probs)] <- name_i } + + # Compute fcst probability bins + if (!is.null(data$fcst)) { + probs_fcst <- compute_probs(data$fcst$data, quants, + ncores = ncores, + na.rm = na.rm) + + for (i in seq(1:dim(probs_fcst)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs_fcst)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs_fcst <- append(named_probs_fcst, + list(ClimProjDiags::Subset(probs_fcst, + 'bin', i))) + names(named_probs_fcst)[length(named_probs_fcst)] <- name_i + } + } } + + # Rearrange dimensions and return probabilities named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) + if (!is.null(data$fcst)) { + fcst_years <- dim(data$fcst$data)[['syear']] + named_probs_fcst <- lapply(named_probs_fcst, + function(x) {Subset(x, + along = 'syear', + indices = 1:fcst_years, + drop = 'non-selected')}) + results <- list(probs = named_probs, + probs_fcst = named_probs_fcst, + percentiles = named_quantiles) + } else { + results <- list(probs = named_probs, + percentiles = named_quantiles) + } + + info(recipe$Run$logger, + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + return(results) } - info(recipe$Run$logger, - "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") - return(list(probs=named_probs, percentiles=named_quantiles)) } ## TODO: Replace with ClimProjDiags::Subset diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 60042f4b..1da08531 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -64,7 +64,7 @@ plot_data <- function(recipe, # Plot Most Likely Terciles if ((!is.null(probabilities)) && (!is.null(calibrated_data$fcst))) { plot_most_likely_terciles(recipe, archive, calibrated_data$fcst, - probabilities$percentiles, outdir) + probabilities, outdir) } else if ((!is.null(probabilities)) && (!is.null(data$fcst))) { warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. Using this data to plot the most likely terciles.") @@ -275,7 +275,7 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { plot_most_likely_terciles <- function(recipe, archive, fcst, - percentiles, + probabilities, outdir) { # Abort if frequency is daily @@ -289,26 +289,23 @@ plot_most_likely_terciles <- function(recipe, archive, variable <- recipe$Analysis$Variables$name start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) - if (is.null(recipe$Analysis$remove_NAs)) { - recipe$Analysis$remove_NAs <- FALSE - } - if (is.null(recipe$Analysis$ncores)) { - recipe$Analysis$ncores <- 1 - } - # Compute probability bins for the forecast - if (is.null(percentiles$percentile_33) | is.null(percentiles$percentile_33)) { - stop("The quantile array does not contain the 33rd and 66th percentiles,", - " the most likely tercile map cannot be plotted.") + # Retrieve and rearrange probability bins for the forecast + if (is.null(probabilities$probs_fcst$prob_b33) || + is.null(probabilities$probs_fcst$prob_33_to_66) || + is.null(probabilities$probs_fcst$prob_a66)) { + stop("The forecast tercile probability bins are not present inside ", + "'probabilities', the most likely tercile map cannot be plotted.") } - terciles <- abind(percentiles$percentile_33, percentiles$percentile_66, - along = 0) - names(dim(terciles)) <- c("bin", names(dim(percentiles$percentile_33))) - probs_fcst <- compute_probs(fcst$data, terciles, - ncores = recipe$Analysis$ncores, - na.rm = recipe$Analysis$remove_NAs) + probs_fcst <- abind(probabilities$probs_fcst$prob_b33, + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) + names(dim(probs_fcst)) <- c("bin", + names(dim(probabilities$probs_fcst$prob_b33))) + ## TODO: Improve this section # Drop extra dims, add time dim if missing: probs_fcst <- drop(probs_fcst) if (!("time" %in% names(dim(probs_fcst)))) { @@ -317,7 +314,8 @@ plot_most_likely_terciles <- function(recipe, archive, if (!'syear' %in% names(dim(probs_fcst))) { probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) } else { - probs_fcst <- Reorder(probs_fcst, c("syear", "time", "bin", "longitude", "latitude")) + probs_fcst <- Reorder(probs_fcst, + c("syear", "time", "bin", "longitude", "latitude")) } for (i_syear in start_date) { diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index ca9d34ee..2e2429d7 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -18,7 +18,7 @@ calibrated_data <- compute_anomalies(recipe, calibrated_data) ## TODO: Turn arguments into (recipe, data)? skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +probabilities <- compute_probabilities(recipe, calibrated_data) # Export all data to netCDF save_data(recipe, data, calibrated_data, skill_metrics, probabilities) # Plot data -- GitLab From e6ebb28b900ba90080524b860557456272519176 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 28 Nov 2022 09:23:26 +0100 Subject: [PATCH 18/80] Adapt seasonal monthly unit test --- tests/testthat/test-seasonal_monthly.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index b53c7291..39a1cebb 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -26,7 +26,7 @@ skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) # Saving @@ -216,10 +216,12 @@ test_that("4. Saving", { expect_equal( list.files(outdir), -c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", - "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", - "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", - "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", "tas-skill_month11.nc") +c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc") ) }) -- GitLab From ab87e1ec267849eda09bd9f2b17159e2968c269c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 29 Nov 2022 09:03:25 +0100 Subject: [PATCH 19/80] Improve log message when saving probabilities --- modules/Saving/Saving.R | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 58908209..b4040ea4 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -81,9 +81,11 @@ save_data <- function(recipe, data, save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, archive = archive) save_probabilities(probabilities$probs, recipe, data$hcst, outdir, - archive = archive) - save_probabilities(probabilities$probs_fcst, recipe, data$fcst, outdir, - archive = archive) + archive = archive, type = "hcst") + if (!is.null(probabilities$probs_fcst)) { + save_probabilities(probabilities$probs_fcst, recipe, data$fcst, outdir, + archive = archive, type = "fcst") + } } } @@ -765,7 +767,8 @@ save_probabilities <- function(probs, data_cube, outdir, agg = "global", - archive = NULL) { + archive = NULL, + type = "hcst") { # Loops over the years in the s2dv_cube containing a hindcast or forecast # and exports the corresponding category probabilities to a netCDF file. # probs: array containing the probability data @@ -774,6 +777,7 @@ save_probabilities <- function(probs, # outdir: directory where the files should be saved # type: 'exp' (hcst and fcst) or 'obs' # agg: aggregation, "global" or "country" + # type: 'hcst' or 'fcst' lalo <- c('longitude', 'latitude') @@ -866,5 +870,8 @@ save_probabilities <- function(probs, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### PROBABILITIES SAVED TO NETCDF FILE #####") + + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) } -- GitLab From 853ae40aef41898dc91a1812ddde77a072a6afae Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 29 Nov 2022 16:23:01 +0100 Subject: [PATCH 20/80] Clean code and fix fcst anomaly bug --- modules/Anomalies/Anomalies.R | 67 +++++++++++++++++------------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index a1d3e0ca..e980961a 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -14,6 +14,8 @@ compute_anomalies <- function(recipe, data) { cross_msg <- "without" } original_dims <- dim(data$hcst$data) + + # Compute anomalies anom <- CST_Anomaly(data$hcst, data$obs, cross = cross, memb = TRUE, @@ -21,31 +23,20 @@ compute_anomalies <- function(recipe, data) { dim_anom = 'syear', dat_dim = c('dat', 'ensemble'), ftime_dim = 'time') - + # Reorder dims anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + # Save full fields hcst_fullvalue <- data$hcst obs_fullvalue <- data$obs - ## Maybe just use s2dv::Clim() here? - clim <- s2dv::Clim(data$hcst$data, data$obs$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = TRUE, - memb_dim = "ensemble", - ftime_dim = "time") - clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, - name = "syear") - clim_hcst <- Reorder(clim_hcst, names(original_dims)) - # clim_obs$data <- InsertDim(clim$clim_obs, - # posdim = 1, lendim = 1, name = "syear") - # clim_obs$data <- Reorder(clim_obs$data, names(original_dims)) + # Hindcast climatology data$hcst <- anom$exp data$obs <- anom$obs - remove(anom, clim) - # Change variable long names + remove(anom) + # Change variable metadata # data$hcst$Variable$varName <- paste0(data$hcst$Variable$varName, "anomaly") attr(data$hcst$Variable, "variable")$long_name <- paste(attr(data$hcst$Variable, "variable")$long_name, "anomaly") @@ -53,31 +44,39 @@ compute_anomalies <- function(recipe, data) { attr(data$obs$Variable, "variable")$long_name <- paste(attr(data$obs$Variable, "variable")$long_name, "anomaly") - ## TODO: Compute forecast anomaly field + # Compute forecast anomaly field if (!is.null(data$fcst)) { - warn(recipe$Run$logger, - "fcst anomalies are a work in progress...") - mean_clim_hcst <- MeanDims(clim_hcst, dims = "ensemble", drop = T) - dims <- dim(mean_clim_hcst) - mean_clim_hcst <- rep(mean_clim_hcst, dim(data$fcst$data)[['ensemble']]) - dim(mean_clim_hcst) <- c(dims, - ensemble = dim(data$fcst$data)[['ensemble']]) - data$fcst$data <- data$fcst$data - mean_clim_hcst + # Compute hindcast climatology ensemble mean + clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time") + clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, + name = "syear") + dims <- dim(clim_hcst) + clim_hcst <- rep(clim_hcst, dim(data$fcst$data)[['ensemble']]) + dim(clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) + # Get fcst anomalies + data$fcst$data <- data$fcst$data - clim_hcst + # Change metadata # data$fcst$Variable$varName <- paste0(data$fcst$Variable$varName, "anomaly") - attr(data$hcst$Variable, "variable")$long_name <- - paste(attr(data$hcst$Variable, "variable")$long_name, "anomaly") - info(recipe$Run$logger, - "The anomalies have been computed, ", cross_msg, " cross-validation. - The original full fields are returned in $hcst_full.val and - $obs_full.val.") + attr(data$fcst$Variable, "variable")$long_name <- + paste(attr(data$fcst$Variable, "variable")$long_name, "anomaly") } + info(recipe$Run$logger, + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst_full.val and $obs_full.val.")) + info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") } else { - warn(recipe$Run$logger, "The Anomalies module has been called, but - recipe parameter Analysis:Variables:anomaly is set to FALSE. - The full fields will be returned.") + warn(recipe$Run$logger, paste("The Anomalies module has been called, but", + "recipe parameter Analysis:Variables:anomaly is set to FALSE.", + "The full fields will be returned.")) hcst_fullvalue <- NULL obs_fullvalue <- NULL info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") -- GitLab From 442b71774cf4de2c20d35e4b8671da18902edff2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 30 Nov 2022 10:41:49 +0100 Subject: [PATCH 21/80] Adapt calibration module and change names of full fields --- modules/Anomalies/Anomalies.R | 4 +- modules/Calibration/Calibration.R | 74 +++++++++++++++++++++++++------ modules/Skill/Skill.R | 8 ++-- 3 files changed, 67 insertions(+), 19 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index e980961a..2fe1da0e 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -69,7 +69,7 @@ compute_anomalies <- function(recipe, data) { info(recipe$Run$logger, paste("The anomalies have been computed,", cross_msg, "cross-validation. The original full fields are returned as", - "$hcst_full.val and $obs_full.val.")) + "$hcst.full_val and $obs.full_val.")) info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") @@ -85,7 +85,7 @@ compute_anomalies <- function(recipe, data) { ## TODO: Return fcst full value? return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, - hcst_full.val = hcst_fullvalue, obs_full.val = obs_fullvalue)) + hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) } diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 0099983b..899b1291 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -4,19 +4,27 @@ calibrate_datasets <- function(recipe, data) { # recipe. If the forecast is not null, it calibrates it as well. # # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # Optionally, it may also have hcst.full_val and obs.full_val. # recipe: object obtained when passing the .yml recipe file to read_yaml() method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (method == "raw") { - warn(recipe$Run$logger, "The Calibration module has been called, - but the calibration method in the recipe is 'raw'. - The hcst and fcst will not be calibrated.") + warn(recipe$Run$logger, + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- data$hcst.full_val + } else { + hcst_full_calibrated <- NULL + } CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" } else { + ## TODO: Calibrate full fields when present # Calibration function params mm <- recipe$Analysis$Datasets$Multimodel if (is.null(recipe$Analysis$ncores)) { @@ -32,6 +40,7 @@ calibrate_datasets <- function(recipe, data) { CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case + ## TODO: Implement for obs.full_val if (mm) { obs.mm <- obs$data for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { @@ -47,13 +56,12 @@ calibrate_datasets <- function(recipe, data) { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: implement other calibration methods - ## TODO: Restructure the code? if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, "Calibration method in the recipe is not - available for monthly data.") + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) stop() } else { - ## Alba's version of CST_Calibration (pending merge) is being used # Calibrate the hindcast hcst_calibrated <- CST_Calibration(data$hcst, data$obs, cal.method = method, @@ -66,8 +74,25 @@ calibrate_datasets <- function(recipe, data) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + data$obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } + + # Calibrate the forecast if (!is.null(data$fcst)) { - # Calibrate the forecast fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, cal.method = method, eval.method = "leave-one-out", @@ -86,9 +111,9 @@ calibrate_datasets <- function(recipe, data) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, "Calibration method in the recipe is not - available for daily data. Only quantile mapping 'qmap is - implemented.") + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) stop() } # Calibrate the hindcast @@ -104,7 +129,21 @@ calibrate_datasets <- function(recipe, data) { wet.day = F) # Restore dimension order hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) - + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + data$obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + } else { + hcst_full_calibrated <- NULL + } + if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, @@ -124,5 +163,14 @@ calibrate_datasets <- function(recipe, data) { } } info(recipe$Run$logger, CALIB_MSG) - return(list(hcst = hcst_calibrated, obs = data$obs, fcst = fcst_calibrated)) + ## TODO: Sort out returns + return_list <- list(hcst = hcst_calibrated, + obs = data$obs, + fcst = fcst_calibrated) + if (!is.null(hcst_full_calibrated)) { + return_list <- append(return_list, + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = data$obs.full_val)) + } + return(return_list) } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 3503078a..6b069c7b 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -173,9 +173,9 @@ compute_skill_metrics <- function(recipe, data) { } else if (metric == 'mean_bias') { ## TODO: Eliminate option to compute from anomalies # Compute from full field - if ((!is.null(data$hcst_full.val)) && (!is.null(data$obs_full.val)) && + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- Bias(data$hcst_full.val$data, data$obs_full.val$data, + skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, memb_dim = memb_dim, ncores = ncores) @@ -189,9 +189,9 @@ compute_skill_metrics <- function(recipe, data) { skill_metrics[[ metric ]] <- skill # Mean bias skill score } else if (metric == 'mean_bias_ss') { - if ((!is.null(data$hcst_full.val)) && (!is.null(data$obs_full.val)) && + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- AbsBiasSS(data$hcst_full.val$data, data$obs_full.val$data, + skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, memb_dim = memb_dim, ncores = ncores) -- GitLab From d6798c0de622acd94c08b5b8896efb045d974812 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 30 Nov 2022 11:16:02 +0100 Subject: [PATCH 22/80] Change color of triangle ends --- modules/Visualization/Visualization.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ff0e9fd4..76d297b1 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -183,6 +183,8 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, filled.continents=F, brks = brks, cols = col2, + col_inf = col2[1], + col_sup = col2[length(col2)], fileout = outfile, bar_label_digits = 3) ) -- GitLab From b57406f76bbb9c757eddfafd63c32351ef55fb38 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 30 Nov 2022 11:37:55 +0100 Subject: [PATCH 23/80] Change parameter in recipe --- modules/Loading/testing_recipes/recipe_seasonal-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index e1c3b56d..e1857ac0 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -38,7 +38,7 @@ Analysis: index: no ncores: 7 remove_NAs: yes - Output_format: Scorecards + Output_format: S2S4E Run: Loglevel: INFO Terminal: yes -- GitLab From 96488b07ddb8ba6b2f949da3dac3ce951ffdc811 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 30 Nov 2022 16:03:39 +0100 Subject: [PATCH 24/80] Add some pressure level variables to the archive for ECMWF SEAS5 and ERA5 (ta and g) --- conf/archive.yml | 40 ++++++++++++++++++------------------ conf/variable-dictionary.yml | 27 +++++++++++++++++++++++- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index c50909c3..04d69c5a 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -7,12 +7,14 @@ archive: name: "ECMWF SEAS5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" - daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", - "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/"} - monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", - "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/"} + daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", + "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/"} + monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", + "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/"} nmember: fcst: 51 hcst: 25 @@ -103,20 +105,18 @@ archive: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/", - "rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", - "g500":"_f1h-r1440x721cds/", - "sfcWind":"_f1h-r1440x721cds/", - "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", - "rsds":"_f1h-r1440x721cds/", - "g500":"_f1h-r1440x721cds/", - "sfcWind":"_f1h-r1440x721cds/", - "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/"} + daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", "g300":"_f1h-r1440x721cds/", + "g500":"_f1h-r1440x721cds/", "g850":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/", "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", "ta850":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", "g300":"_f1h-r1440x721cds/", + "g500":"_f1h-r1440x721cds/", "g850":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/", "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", "ta850":"_f1h-r1440x721cds/"} calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 51252154..f0dfcb56 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -4,7 +4,22 @@ vars: ## Some variables in esarchive may have different units than stated here. ## Use with caution. -# ECVs +# ECVs + ta300: + units: "K" + long_name: "Air Temperature at 300 hPa" + standard_name: "air_temperature" + accum: no + ta500: + units: "K" + long_name: "Air Temperature at 500 hPa" + standard_name: "air_temperature" + accum: no + ta850: + units: "K" + long_name: "Air Temperature at 850 hPa" + standard_name: "air_temperature" + accum: no tas: units: "K" long_name: "Near-Surface Air Temperature" @@ -55,11 +70,21 @@ vars: standard_name: "total_precipitation_flux" #? Not in CF accum: yes # outname: "acprec" + g300: + units: "m2 s-2" + long_name: "Geopotential" + standard_name: "geopotential" + accum: no g500: units: "m2 s-2" long_name: "Geopotential" standard_name: "geopotential" accum: no + g850: + units: "m2 s-2" + long_name: "Geopotential" + standard_name: "geopotential" + accum: no pr: units: "kg m-2 s-1" long_name: "Precipitation" -- GitLab From 42f39bbc32999b4b969bc4be5c4518138c16581d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 30 Nov 2022 17:10:40 +0100 Subject: [PATCH 25/80] Add note to deprecate calibrated_data --- modules/Saving/Saving.R | 2 +- modules/Visualization/Visualization.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 88ab44cd..77674d73 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -8,7 +8,7 @@ save_data <- function(recipe, data, probabilities = NULL, archive = NULL) { - ## TODO: Deprecate calibrated_data? + ## TODO: Deprecate calibrated_data # Wrapper for the saving functions. # recipe: The auto-s2s recipe diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 60042f4b..55eed805 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -14,7 +14,7 @@ plot_data <- function(recipe, archive = NULL, significance = F) { - ## TODO: Depreate calibrated_data + ## TODO: Deprecate calibrated_data # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() -- GitLab From 77ee661f654b84e9c18860f171cb3fee76ebae71 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 1 Dec 2022 10:13:28 +0100 Subject: [PATCH 26/80] Deprecate 'calibrated_data' arg from save_data() and plot_data() --- modules/Saving/Saving.R | 31 +++++++++++++------------ modules/Visualization/Visualization.R | 19 ++++----------- modules/test_decadal.R | 4 ++-- modules/test_seasonal.R | 4 ++-- tests/testthat/test-decadal_monthly_1.R | 7 +++--- tests/testthat/test-seasonal_daily.R | 2 ++ tests/testthat/test-seasonal_monthly.R | 4 ++-- 7 files changed, 31 insertions(+), 40 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 77674d73..3447fc6a 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -3,29 +3,31 @@ source("modules/Saving/paths2save.R") save_data <- function(recipe, data, - calibrated_data = NULL, skill_metrics = NULL, probabilities = NULL, archive = NULL) { - ## TODO: Deprecate calibrated_data + ## TODO: Deprecate data # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive # data: output of load_datasets() - # calibrated_data: output of calibrate_datasets() + # data: output of calibrate_datasets() # skill_metrics: output of compute_skill_metrics() # probabilities: output of compute_probabilities() # mean_bias: output of compute_mean_bias() if (is.null(recipe)) { - stop("The 'recipe' parameter is mandatory.") + error(recipe$Run$logger, "The 'recipe' parameter is mandatory.") + stop() } if (is.null(data)) { - stop("The 'data' parameter is mandatory. It should be the output of", - "load_datasets().") + error(recupe$Run$logger, + paste("The 'data' parameter is mandatory. It should be a list", + "of at least two s2dv_cubes containing the hcst and obs.")) + stop() } if (is.null(archive)) { if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -43,15 +45,13 @@ save_data <- function(recipe, data, dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Export hindcast, forecast and observations onto outfile - if (!is.null(calibrated_data)) { - save_forecast(calibrated_data$hcst, recipe, dict, outdir, - archive = archive, type = 'hcst') - if (!is.null(calibrated_data$fcst)) { - save_forecast(calibrated_data$fcst, recipe, dict, outdir, - archive = archive, type = 'fcst') - } - save_observations(data$obs, recipe, dict, outdir, archive = archive) + save_forecast(data$hcst, recipe, dict, outdir, archive = archive, + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(data$fcst, recipe, dict, outdir, + archive = archive, type = 'fcst') } + save_observations(data$obs, recipe, dict, outdir, archive = archive) # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file @@ -294,7 +294,8 @@ save_forecast <- function(data_cube, ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### FCST SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 55eed805..9d0cf1e4 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -8,7 +8,6 @@ source("modules/Visualization/tmp/PlotCombinedMap.R") plot_data <- function(recipe, data, - calibrated_data = NULL, skill_metrics = NULL, probabilities = NULL, archive = NULL, @@ -28,10 +27,9 @@ plot_data <- function(recipe, outdir <- paste0(get_dir(recipe), "/plots/") dir.create(outdir, showWarnings = FALSE, recursive = TRUE) - if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && - (is.null(data$fcst))) { + if ((is.null(skill_metrics)) && (is.null(data$fcst))) { error(recipe$Run$logger, "The Visualization module has been called, - but args 'data', 'calibrated_data' and 'skill_metrics', are all NULL + but there is no fcst in 'data', and 'skill_metrics' is NULL so there is no data that can be plotted.") stop() } @@ -53,21 +51,12 @@ plot_data <- function(recipe, } # Plot forecast ensemble mean - if (!is.null(calibrated_data$fcst)) { - plot_ensemble_mean(recipe, archive, calibrated_data$fcst, outdir) - } else if (!is.null(data$fcst)) { - warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. - Using this data to plot the forecast ensemble mean.") + if (!is.null(data$fcst)) { plot_ensemble_mean(recipe, archive, data$fcst, outdir) } # Plot Most Likely Terciles - if ((!is.null(probabilities)) && (!is.null(calibrated_data$fcst))) { - plot_most_likely_terciles(recipe, archive, calibrated_data$fcst, - probabilities$percentiles, outdir) - } else if ((!is.null(probabilities)) && (!is.null(data$fcst))) { - warn(recipe$Run$logger, "Only the uncalibrated forecast was provided. - Using this data to plot the most likely terciles.") + if ((!is.null(probabilities)) && (!is.null(data$fcst))) { plot_most_likely_terciles(recipe, archive, data$fcst, probabilities$percentiles, outdir) } diff --git a/modules/test_decadal.R b/modules/test_decadal.R index 80304f97..c32f0bba 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -22,9 +22,9 @@ skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) probabilities <- compute_probabilities(recipe, calibrated_data$hcst) # Export all data to netCDF -save_data(recipe, data, calibrated_data, skill_metrics, probabilities) +save_data(recipe, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, +plot_data(recipe, calibrated_data, skill_metrics, probabilities, significance = T) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index ca9d34ee..6a60903c 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -20,7 +20,7 @@ skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, calibrated_data$hcst) # Export all data to netCDF -save_data(recipe, data, calibrated_data, skill_metrics, probabilities) +save_data(recipe, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, +plot_data(recipe, calibrated_data, skill_metrics, probabilities, significance = T) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index d78bb322..184f6f43 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -32,15 +32,14 @@ probs <- compute_probabilities(recipe, calibrated_data$hcst) # Saving suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, +save_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, archive = archive) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, archive = archive, data = data, - calibrated_data = calibrated_data, skill_metrics = skill_metrics, - probabilities = probs, significance = T) +plot_data(recipe = recipe, archive = archive, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 94955a7b..ddcca22f 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -163,3 +163,5 @@ c(0.7509920, 0.6514916, 0.5118371), tolerance=0.0001 ) }) + +unlink(recipe$Run$output_dir) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index b53c7291..d46d20dc 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -31,13 +31,13 @@ probs <- compute_probabilities(recipe, calibrated_data$hcst) # Saving suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, +save_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, data = data, calibrated_data = calibrated_data, +plot_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) -- GitLab From 600a301c7d56bce89efc5bbefc05cbdf1d792129 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 1 Dec 2022 11:29:27 +0100 Subject: [PATCH 27/80] Add info about anomaly computation to global attributes --- modules/Saving/Saving.R | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 3447fc6a..bb9c7e95 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -464,6 +464,13 @@ save_metrics <- function(skill, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) + if (recipe$Analysis$Workflow$Anomalies$compute) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -475,7 +482,7 @@ save_metrics <- function(skill, sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') } else { - sdname <- paste0(metric, " grid point metric") + sdname <- paste0(metric) #, " grid point metric") dims <- c(lalo, 'time') } metadata <- list(metric = list(name = metric, @@ -572,6 +579,13 @@ save_corr <- function(skill, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) + if (recipe$Analysis$Workflow$Anomalies$compute) { + global_attributes <- c(global_attributes, + list(from_anomalies = "Yes")) + } else { + global_attributes <- c(global_attributes, + list(from_anomalies = "No")) + } attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { @@ -583,7 +597,7 @@ save_corr <- function(skill, sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'ensemble', 'time') } else { - sdname <- paste0(metric, " grid point metric") # formerly names(metric) + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) dims <- c(lalo, 'ensemble', 'time') } metadata <- list(metric = list(name = metric, @@ -679,6 +693,13 @@ save_percentiles <- function(percentiles, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) + if (recipe$Analysis$Workflow$Anomalies$compute) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } attr(percentiles[[1]], 'global_attrs') <- global_attributes for (i in 1:length(percentiles)) { @@ -779,6 +800,14 @@ save_probabilities <- function(probs, variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name global_attributes <- get_global_attributes(recipe, archive) + # Add anomaly computation to global attributes + if (recipe$Analysis$Workflow$Anomalies$compute) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar -- GitLab From 626b1af0d0926cfff57218c1d982e681bcc6145d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 1 Dec 2022 11:53:29 +0100 Subject: [PATCH 28/80] Add TODOs --- modules/Saving/Saving.R | 3 --- modules/Visualization/Visualization.R | 5 ++--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index bb9c7e95..80c6c87d 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -6,9 +6,6 @@ save_data <- function(recipe, data, skill_metrics = NULL, probabilities = NULL, archive = NULL) { - - ## TODO: Deprecate data - # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 9d0cf1e4..22c9abb6 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -12,9 +12,6 @@ plot_data <- function(recipe, probabilities = NULL, archive = NULL, significance = F) { - - ## TODO: Deprecate calibrated_data - # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() # archive: the auto-s2s archive as read by read_yaml() @@ -190,6 +187,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { + ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { stop("Visualization functions not yet implemented for daily data.") @@ -267,6 +265,7 @@ plot_most_likely_terciles <- function(recipe, archive, percentiles, outdir) { + ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { stop("Visualization functions not yet implemented for daily data.") -- GitLab From 90fb8d80f3dec543831349b335b945f31201c258 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 1 Dec 2022 13:29:47 +0100 Subject: [PATCH 29/80] Update script --- modules/test_decadal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/test_decadal.R b/modules/test_decadal.R index c32f0bba..270e3800 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -16,7 +16,7 @@ data <- load_datasets(recipe) calibrated_data <- calibrate_datasets(recipe, data) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, calibrated_data$hcst) -- GitLab From 0f6c8f9ca845b7025c74ac561ee4c7309f675e61 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 2 Dec 2022 09:56:23 +0100 Subject: [PATCH 30/80] Add new metric RMSSS --- conf/variable-dictionary.yml | 2 + .../recipe_test-new-metrics.yml | 46 +++ modules/Skill/Skill.R | 18 ++ modules/Skill/tmp/RMSSS.R | 270 ++++++++++++++++++ modules/Visualization/Visualization.R | 5 +- 5 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_test-new-metrics.yml create mode 100644 modules/Skill/tmp/RMSSS.R diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index f0dfcb56..994256ce 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -264,3 +264,5 @@ metrics: long_name: "Mean Bias Skill Score Statistical Significance" enssprerr: long_name: "Ensemble Spread-To-Error Ratio" + rmsss: + long_name: "Root Mean Square Skill Score" diff --git a/modules/Loading/testing_recipes/recipe_test-new-metrics.yml b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml new file mode 100644 index 00000000..df84138d --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1998' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RMSSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 99f12346..76b08942 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -14,6 +14,7 @@ source("modules/Skill/s2s.metrics.R") source("modules/Skill/tmp/RandomWalkTest.R") source("modules/Skill/tmp/Bias.R") source("modules/Skill/tmp/AbsBiasSS.R") +source("modules/Skill/tmp/RMSSS.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -147,6 +148,7 @@ compute_skill_metrics <- function(recipe, exp, obs) { skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation } else if (metric %in% c('enscorr', 'corr')) { + ## TODO: Return significance ## TODO: Implement option for Kendall and Spearman methods? skill <- s2dv::Corr(exp$data, obs$data, dat_dim = 'dat', @@ -161,6 +163,22 @@ compute_skill_metrics <- function(recipe, exp, obs) { skill_metrics[[ paste0(metric, "_p.value") ]] <- skill$p.val skill_metrics[[ paste0(metric, "_conf.low") ]] <- skill$conf.lower skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper + } else if (metric == 'rmsss') { + # Compute hcst ensemble mean + exp_ensmean <- MeanDims(exp$data, dims = memb_dim, drop = FALSE) + # Compute RMSS + skill <- RMSSS(exp_ensmean, obs$data, + dat_dim = 'dat', + time_dim = time_dim, + pval = FALSE, + sign = TRUE, + ncores = ncores) + rm(exp_ensmean) + # Compute ensemble mean and modify dimensions + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rmsss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { # Remove ensemble dim from obs to avoid veriApply warning obs_noensdim <- ClimProjDiags::Subset(obs$data, "ensemble", 1, diff --git a/modules/Skill/tmp/RMSSS.R b/modules/Skill/tmp/RMSSS.R new file mode 100644 index 00000000..9c47da45 --- /dev/null +++ b/modules/Skill/tmp/RMSSS.R @@ -0,0 +1,270 @@ +#'Compute root mean square error skill score +#' +#'Compute the root mean square error skill score (RMSSS) between an array of +#'forecast 'exp' and an array of observation 'obs'. The two arrays should +#'have the same dimensions except along dat_dim, where the length can be +#'different, with the number of experiments/models (nexp) and the number of +#'observational datasets (nobs).\cr +#'RMSSS computes the root mean square error skill score of each jexp in 1:nexp +#'against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +#'of the array.\cr +#'The RMSSS are computed along the time_dim dimension which should correspond +#'to the start date dimension.\cr +#'The p-value and significance test are optionally provided by an one-sided +#'Fisher test.\cr +#' +#'@param exp A named numeric array of experimental data which contains at least +#' two dimensions for dat_dim and time_dim. It can also be a vector with the +#' same length as 'obs', then the vector will automatically be 'time_dim' and +#' 'dat_dim' will be 1. +#'@param obs A named numeric array of observational data which contains at least +#' two dimensions for dat_dim and time_dim. The dimensions should be the same +#' as paramter 'exp' except the length of 'dat_dim' dimension. The order of +#' dimension can be different. It can also be a vector with the same length as +#' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will +#' be 1. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is 'dataset'. +#'@param time_dim A character string indicating the name of dimension along +#' which the RMSSS are computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: RMSSS = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test Ho: RMSSS = 0. The default value is +#' FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr +#'\item{$rmsss}{ +#' A numerical array of the root mean square error skill score. +#'} +#'\item{$p.val}{ +#' A numerical array of the p-value with the same dimensions as $rmsss. +#' Only present if \code{pval = TRUE}. +#'} +#'\item{sign}{ +#' A logical array of the statistical significance of the RMSSS with the same +#' dimensions as $rmsss. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#' set.seed(1) +#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#' set.seed(2) +#' obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +#' res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') +#' +#'@rdname RMSSS +#'@import multiApply +#'@importFrom stats pf +#'@export +RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', + pval = TRUE, sign = FALSE, alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp), 1)) + names(dim(exp)) <- c(time_dim, dat_dim) + obs <- array(obs, dim = c(length(obs), 1)) + names(dim(obs)) <- c(time_dim, dat_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be one numeric value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim'.")) + } + if (dim(exp)[time_dim] <= 2) { + stop("The length of time_dim must be more than 2 to compute RMSSS.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate RMSSS + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, + sign = FALSE, alpha = 0.05) { + # exp: [sdate, (dat)] + # obs: [sdate, (dat)] + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + } + + nsdate <- as.numeric(dim(exp)[1]) + + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + dif1 <- array(dim = c(nsdate, nexp, nobs)) + names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + +# if (conf) { +# conflow <- (1 - conf.lev) / 2 +# confhigh <- 1 - conflow +# conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) +# conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) +# } + + # dif1 + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + rms1 <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + rms2 <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs)) + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( + rms2), na.rm = TRUE) / 1000 + #rms2 above: [nobs] + rms2 <- array(rms2, dim = c(nobs = nobs, nexp = nexp)) + #rms2 above: [nobs, nexp] + rms2 <- Reorder(rms2, c(2, 1)) + #rms2 above: [nexp, nobs] + + # use rms1 and rms2 to calculate rmsss + rmsss <- 1 - rms1/rms2 + + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + + # pval + if (pval || sign) { + + F.stat <- (eno2 * rms2^2 / (eno2- 1)) / ((eno1 * rms1^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not significant rmsss to NA + rmsss[which(!tmp)] <- NA + } + + ################################### + # Remove extra dimensions if dat_dim = NULL + if (is.null(dat_dim)) { + dim(rmsss) <- NULL + dim(p_val) <- NULL + if (sign) dim(signif) <- NULL + } + ################################### + + # output + res <- list(rmsss = rmsss) + if (pval) { + p.val <- list(p.val = p_val) + res <- c(res, p.val) + } + if (sign) { + signif <- list(sign = signif) + res <- c(res, signif) + } + + return(res) +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ff0e9fd4..2d50a1a0 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -93,7 +93,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, # Group different metrics by type skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", - "enscorr_specs") + "enscorr_specs", "rmsss") scores <- c("rps", "frps", "crps", "frps_specs") for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { @@ -101,7 +101,8 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if (name %in% names(skill_metrics)) { # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs")) { + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.1) -- GitLab From ce35f7438ec6af48fbfb7ac87acbf9a2168c6d67 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 2 Dec 2022 17:29:50 +0100 Subject: [PATCH 31/80] Correct unit tests and add comparison between 1 and 2 sdates in forecast --- tests/recipes/recipe-decadal_monthly_1b.yml | 51 ++++++++++++++++++ tests/recipes/recipe-decadal_monthly_2.yml | 2 +- tests/testthat/test-decadal_monthly_1.R | 57 ++++++++++++++++++++- tests/testthat/test-decadal_monthly_2.R | 55 +++++++++++++++++--- tests/testthat/test-decadal_monthly_3.R | 2 +- 5 files changed, 156 insertions(+), 11 deletions(-) create mode 100644 tests/recipes/recipe-decadal_monthly_1b.yml diff --git a/tests/recipes/recipe-decadal_monthly_1b.yml b/tests/recipes/recipe-decadal_monthly_1b.yml new file mode 100644 index 00000000..a09f2234 --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_1b.yml @@ -0,0 +1,51 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1 + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1991 + hcst_end: 1994 +# season: 'Annual' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross-validation: + Calibration: + method: bias + Skill: + metric: RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: FALSE + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 49824f42..fc230b5c 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -35,7 +35,7 @@ Analysis: Calibration: method: raw Skill: - metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + metric: RPSS_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS Probabilities: percentiles: [[1/3, 2/3]] Indicators: diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index d78bb322..3e4f1528 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -27,7 +27,7 @@ suppressWarnings({invisible(capture.output( skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) # Saving @@ -263,7 +263,7 @@ list.files(outdir), c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", - "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-skill_month11.nc") + "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") ) # open the files and check values/attributes? #expect_equal( @@ -284,3 +284,56 @@ c("forecast_ensemble_mean.png", "forecast_most_likely_tercile.png", # Delete files unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) + + +#============================================================== + +# Compare with 2 forecast + +recipe_file <- "tests/recipes/recipe-decadal_monthly_1b.yml" +recipe <- prepare_outputs(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data_b <- load_datasets(recipe) +))}) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data_b <- calibrate_datasets(recipe, data_b) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics_b <- compute_skill_metrics(recipe, calibrated_data_b) +))}) +suppressWarnings({invisible(capture.output( +probs_b <- compute_probabilities(recipe, calibrated_data_b) +))}) + + +test_that("6. Compare with two sdates in forecast", { + + +expect_equal( +c(ClimProjDiags::Subset(data_b$fcst$data, 'syear', 2, drop = F)), +c(data$fcst$data) +) + +expect_equal( +c(ClimProjDiags::Subset(calibrated_data_b$fcst$data, 'syear', 2, drop = F)), +c(calibrated_data$fcst$data) +) + +expect_equal( +skill_metrics_b, +skill_metrics +) + +expect_equal( +lapply(probs_b$probs_fcst, ClimProjDiags::Subset, 'syear', 2), +probs$probs_fcst +) + +}) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 6549ce0e..5a407b7e 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -6,6 +6,7 @@ source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" recipe <- prepare_outputs(recipe_file) @@ -25,9 +26,25 @@ suppressMessages({invisible(capture.output( skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) +# Saving +suppressWarnings({invisible(capture.output( +save_data(recipe, data, calibrated_data, skill_metrics, probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, data = data, + calibrated_data = calibrated_data, skill_metrics = skill_metrics, + probabilities = probs, significance = T) +))}) + + +outdir <- get_dir(recipe) + + #====================================== test_that("1. Loading", { @@ -152,7 +169,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("rpss_specs", "bss90_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") +c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") ) expect_equal( class(skill_metrics$rpss_specs), @@ -167,10 +184,10 @@ as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), c(-0.3333333, 0.1666667, -0.3333333), tolerance = 0.0001 ) -expect_equal( -all(is.na(skill_metrics$bss90_specs)), -TRUE -) +#expect_equal( +#all(is.na(skill_metrics$bss90_specs)), +#TRUE +#) expect_equal( as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), c(0.4474382, 0.1026333, 0.4042823), @@ -199,7 +216,7 @@ tolerance = 0.0001 # Probs expect_equal( names(probs), -c('probs', 'percentiles') +c('probs', 'probs_fcst', 'percentiles') ) expect_equal( names(probs$probs), @@ -230,4 +247,28 @@ tolerance = 0.0001 }) +#====================================== + +test_that("4. Saving", { + +expect_equal( +list.files(outdir), +c("plots", "tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", + "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", + "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", + "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") +) + +#====================================== + +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("bss10_specs.png", "enscorr_specs.png", "forecast_ensemble_mean_2020.png", "forecast_ensemble_mean_2021.png", "forecast_most_likely_tercile_2020.png", "forecast_most_likely_tercile_2021.png", "frps_specs.png", "frps.png", "rpss_specs.png") +) + +}) + +# Delete files +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 70e98160..102f2f53 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -26,7 +26,7 @@ suppressWarnings({invisible(capture.output( skill_metrics <- compute_skill_metrics(recipe, calibrated_data) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(recipe, calibrated_data$hcst) +probs <- compute_probabilities(recipe, calibrated_data) ))}) #====================================== -- GitLab From 4ec075a36982d1ba3731ed346bdea8d061a7e476 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 2 Dec 2022 17:38:14 +0100 Subject: [PATCH 32/80] Fix unit test; one list item was missing --- tests/testthat/test-decadal_monthly_1.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 3e4f1528..4d58315d 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -215,7 +215,7 @@ rep(FALSE, 3) # Probs expect_equal( names(probs), -c('probs', 'percentiles') +c('probs', 'probs_fcst', 'percentiles') ) expect_equal( names(probs$probs), -- GitLab From efe755fb46f6b7a73f10731a4f85437c02243764 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 2 Dec 2022 17:47:42 +0100 Subject: [PATCH 33/80] Fix bracelet --- tests/testthat/test-decadal_monthly_2.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 5a407b7e..d7e786c0 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -258,6 +258,7 @@ c("plots", "tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_2020110 "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") ) +}) #====================================== -- GitLab From a59e1f9ada6d00d0e22765b4984879f7327de5c9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 09:04:19 +0100 Subject: [PATCH 34/80] Modify old recipes --- .../recipe_circular-sort-test.yml | 43 ----------------- .../testing_recipes/recipe_decadal.yml | 3 ++ .../testing_recipes/recipe_decadal_daily.yml | 3 ++ .../recipe_decadal_monthly_2.yml | 3 ++ .../testing_recipes/recipe_era5land.yml | 46 ------------------- .../recipe_system2c3s-prlr-nofcst.yml | 42 ----------------- .../recipe_system5c3s-rsds.yml | 3 ++ .../testing_recipes/recipe_system5c3s-tas.yml | 3 ++ .../recipe_system7c3s-prlr.yml | 3 ++ .../recipe_system7c3s-tas-specs.yml | 44 ------------------ .../recipe_tas-daily-regrid-to-reference.yml | 3 ++ .../recipe_tas-daily-regrid-to-system.yml | 3 ++ 12 files changed, 24 insertions(+), 175 deletions(-) delete mode 100644 modules/Loading/testing_recipes/recipe_circular-sort-test.yml delete mode 100644 modules/Loading/testing_recipes/recipe_era5land.yml delete mode 100644 modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml delete mode 100644 modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml diff --git a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml b/modules/Loading/testing_recipes/recipe_circular-sort-test.yml deleted file mode 100644 index 700fd3b2..00000000 --- a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml +++ /dev/null @@ -1,43 +0,0 @@ -Description: - Author: V. Agudetse - Info: For testing the behavior of the loading module when loading data - that crosses the date line or the Greenwich meridian. -Analysis: - Horizon: Seasonal - Variables: - name: tas - freq: monthly_mean - Datasets: - System: - name: system7c3s - Multimodel: False - Reference: - name: era5 - Time: - sdate: '1101' - fcst_year: - hcst_start: '1993' - hcst_end: '2003' - leadtimemin: 2 - leadtimemax: 2 - Region: - latmin: -10 - latmax: 10 - lonmin: 320 - lonmax: 350 - Regrid: - method: bilinear - type: to_system - Workflow: - Calibration: - method: mse_min - Skill: - metric: BSS90 - Indicators: - index: no - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index de4f9591..986578f7 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: bias Skill: diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index f362329e..9d404bfa 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: qmap Skill: diff --git a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml index f9130e16..38b25d42 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear type: to_system #to_reference Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: bias Skill: diff --git a/modules/Loading/testing_recipes/recipe_era5land.yml b/modules/Loading/testing_recipes/recipe_era5land.yml deleted file mode 100644 index c99906b6..00000000 --- a/modules/Loading/testing_recipes/recipe_era5land.yml +++ /dev/null @@ -1,46 +0,0 @@ -Description: - Author: V. Agudetse - -Analysis: - Horizon: Seasonal - Variables: - name: tas - freq: monthly_mean - Datasets: - System: - name: system7c3s - Multimodel: False - Reference: - name: era5land - Time: - sdate: '1101' - fcst_year: '2020' - hcst_start: '1993' - hcst_end: '2016' - ftime_min: 1 - ftime_max: 3 - Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 - Regrid: - method: bilinear - type: to_system - Workflow: - Calibration: - method: mse_min - Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr - Probabilities: - percentiles: [[1/4, 2/4, 3/4]] - Indicators: - index: no - ncores: 1 - remove_NAs: yes - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml b/modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml deleted file mode 100644 index 3bfad08e..00000000 --- a/modules/Loading/testing_recipes/recipe_system2c3s-prlr-nofcst.yml +++ /dev/null @@ -1,42 +0,0 @@ -Description: - Author: V. Agudetse - -Analysis: - Horizon: Seasonal - Variables: - name: prlr - freq: monthly_mean - Datasets: - System: - name: system2c3s - Multimodel: False - Reference: - name: era5 - Time: - sdate: '0301' - fcst_year: # - hcst_start: '1993' - hcst_end: '2016' - ftime_min: 1 - ftime_max: 1 - Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 - Regrid: - method: bilinear - type: to_system - Workflow: - Calibration: - method: evmos - Skill: - metric: FRPS - Indicators: - index: no - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml index ca52e8cc..94fc716c 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: mse_min Skill: diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 27cdccdc..3a2bc72e 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: raw Skill: diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml index 197c109c..23b630b5 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: mse_min Skill: diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml deleted file mode 100644 index b1829d22..00000000 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas-specs.yml +++ /dev/null @@ -1,44 +0,0 @@ -Description: - Author: V. Agudetse - -Analysis: - Horizon: Seasonal - Variables: - name: tas - freq: monthly_mean - Datasets: - System: - name: system7c3s - Multimodel: False - Reference: - name: era5 - Time: - sdate: '1101' - fcst_year: '2020' - hcst_start: '1993' - hcst_end: '2016' - ftime_min: 1 - ftime_max: 3 - Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 - Regrid: - method: bilinear - type: to_system - Workflow: - Calibration: - method: mse_min - Skill: - metric: CRPS CRPSS FCRPS FCRPSS FRPS_Specs - Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] - Indicators: - index: no - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml index 71e386fa..364d3dd6 100644 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml @@ -29,6 +29,9 @@ Analysis: method: bilinear # Mandatory, str: Interpolation method. See docu. type: to_reference # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: + Anomalies: + compute: no # Whether to compute the anomalies and use them for skill metrics + cross_validation: # whether they should be computed in cross-validation Calibration: method: qmap # Mandatory, str: Calibration method. See docu. Skill: diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml index 233c14eb..244a5654 100644 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml @@ -28,6 +28,9 @@ Analysis: method: bilinear type: to_system Workflow: + Anomalies: + compute: no + cross_validation: Calibration: method: qmap Skill: -- GitLab From 2593f44780d5b6e5ea579b8ef46ba794ffecc845 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 09:39:03 +0100 Subject: [PATCH 35/80] Fix pipeline --- modules/Visualization/Visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 81e16ce3..711df8bb 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -55,7 +55,7 @@ plot_data <- function(recipe, # Plot Most Likely Terciles if ((!is.null(probabilities)) && (!is.null(data$fcst))) { plot_most_likely_terciles(recipe, archive, data$fcst, - probabilities$percentiles, outdir) + probabilities, outdir) } } -- GitLab From 382fcf6031896203b3055fc22f0c5bfa13c7813d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 09:45:25 +0100 Subject: [PATCH 36/80] Modify decadal test script --- modules/test_decadal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/test_decadal.R b/modules/test_decadal.R index 270e3800..8998cfbe 100644 --- a/modules/test_decadal.R +++ b/modules/test_decadal.R @@ -19,7 +19,7 @@ calibrated_data <- calibrate_datasets(recipe, data) skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +probabilities <- compute_probabilities(recipe, calibrated_data) # Export all data to netCDF save_data(recipe, calibrated_data, skill_metrics, probabilities) -- GitLab From 1b08578d0f8e0b1b251c32b90be330b8e2d57d8e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 09:57:31 +0100 Subject: [PATCH 37/80] Fix decadal pipeline --- tests/testthat/test-decadal_monthly_1.R | 2 +- tests/testthat/test-decadal_monthly_2.R | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 114dc453..b76a216c 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -32,7 +32,7 @@ probs <- compute_probabilities(recipe, calibrated_data) # Saving suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = calibrated_data, +save_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, archive = archive) ))}) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index d7e786c0..cdab57f3 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -31,13 +31,13 @@ probs <- compute_probabilities(recipe, calibrated_data) # Saving suppressWarnings({invisible(capture.output( -save_data(recipe, data, calibrated_data, skill_metrics, probs) +save_data(recipe, calibrated_data, skill_metrics, probs) ))}) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, data = data, - calibrated_data = calibrated_data, skill_metrics = skill_metrics, +plot_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) -- GitLab From 27eb1069d606b7776bc7620fea6019baec5b7bb6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 10:07:19 +0100 Subject: [PATCH 38/80] Fix cross-validation to cross_validation in pipeline recipes --- tests/recipes/recipe-decadal_daily_1.yml | 2 +- tests/recipes/recipe-decadal_monthly_1b.yml | 2 +- tests/recipes/recipe-decadal_monthly_2.yml | 2 +- tests/recipes/recipe-decadal_monthly_3.yml | 2 +- tests/recipes/recipe-seasonal_daily_1.yml | 2 +- tests/recipes/recipe-seasonal_monthly_1.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index ab0fb9a6..7a2a575b 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -31,7 +31,7 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross_validation: Calibration: method: qmap Skill: diff --git a/tests/recipes/recipe-decadal_monthly_1b.yml b/tests/recipes/recipe-decadal_monthly_1b.yml index a09f2234..5551d9c7 100644 --- a/tests/recipes/recipe-decadal_monthly_1b.yml +++ b/tests/recipes/recipe-decadal_monthly_1b.yml @@ -31,7 +31,7 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross_validation: Calibration: method: bias Skill: diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index fc230b5c..45eb01dd 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -31,7 +31,7 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross_validation: Calibration: method: raw Skill: diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml index 1e2daa70..94bdfebc 100644 --- a/tests/recipes/recipe-decadal_monthly_3.yml +++ b/tests/recipes/recipe-decadal_monthly_3.yml @@ -31,7 +31,7 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross_validation: Calibration: method: 'evmos' Skill: diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index 637b5371..52c7c0b8 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -30,7 +30,7 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross_validation: Calibration: method: qmap Skill: diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index e75ccad5..00331332 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -30,7 +30,7 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross_validation: Calibration: method: mse_min Skill: -- GitLab From 8f4d6dbd932f3a2c933c2254c01cd03c359e35d3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 15:02:27 +0100 Subject: [PATCH 39/80] Fix leap year bug and clean code --- modules/Loading/dates2load.R | 44 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index e1e8f89e..ca8ecaa3 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -15,46 +15,41 @@ library(lubridate) -dates2load <- function(recipe, logger){ +dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time # hcst dates - file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) - - if (temp_freq == "monthly_mean"){ - file_dates <- .add_dims(file_dates, "hcst") + + if (temp_freq == "monthly_mean") { + file_dates <- .add_dims(file_dates) } # fcst dates (if fcst_year empty it creates an empty object) - if (! is.null(recipe$fcst_year)){ + if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) - if (temp_freq == "monthly_mean"){ - file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") + if (temp_freq == "monthly_mean") { + file_dates.fcst <- .add_dims(file_dates.fcst) } } else { file_dates.fcst <- NULL info(logger, paste("fcst_year empty in the recipe, creating empty fcst object...")) } - return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun - } # adds the correspondent dims to each sdate array -.add_dims <- function(data, type){ +.add_dims <- function(data) { default_dims <- c(sday = 1, sweek = 1, syear = length(data)) - default_dims[names(dim(data))] <- dim(data) dim(data) <- default_dims return(data) - } # Gets the corresponding dates or indices according @@ -65,30 +60,35 @@ dates2load <- function(recipe, logger){ # the forecasting times covering December to March get_timeidx <- function(sdates, ltmin, ltmax, - time_freq="monthly_mean"){ + time_freq="monthly_mean") { - if (time_freq == "daily_mean"){ + if (time_freq == "daily_mean") { sdates <- ymd(sdates) idx_min <- sdates + months(ltmin - 1) idx_max <- sdates + months(ltmax) - days(1) - indxs <- array(numeric(), c(file_date=length(sdates), - time = (as.integer(idx_max[1]-idx_min[1]+1)) + day_seq <- seq(idx_min[1], idx_max[1], by = 'days') + if (any("0229" %in% (format(day_seq, "%m%d")))) { + time_length <- as.integer(idx_max[1]-idx_min[1]) + } else { + time_length <- as.integer(idx_max[1]-idx_min[1]+1) + } + indxs <- array(numeric(), c(file_date = length(sdates), + time = time_length)) #syear = length(sdates), #sday = 1, sweek = 1, - )) for (sdate in 1:length(sdates)) { - days <- seq(idx_min[sdate], idx_max[sdate], by='days') - indxs[sdate,] <- days[!(format(days, "%m%d") == "0229")] + day_seq <- seq(idx_min[sdate], idx_max[sdate], by='days') + indxs[sdate,] <- day_seq[!(format(day_seq, "%m%d") == "0229")] } indxs <- as.POSIXct(indxs*86400, tz = 'UTC', origin = '1970-01-01') lubridate::hour(indxs) <- 12 lubridate::minute(indxs) <- 00 - dim(indxs) <- c(file_date=length(sdates), - time=(as.integer(idx_max[1]-idx_min[1])+1)) + dim(indxs) <- c(file_date = length(sdates), + time = time_length) } else if (time_freq == "monthly_mean") { -- GitLab From 52e0e777871d3c3a18083534e29dfa9eef6a18bc Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 16:26:12 +0100 Subject: [PATCH 40/80] Change Corr and Enscorr to return significance instead of conf. intervals and p-value --- .../testing_recipes/recipe_seasonal-tests.yml | 3 + modules/Skill/Skill.R | 23 +- modules/Skill/tmp/Corr.R | 463 ++++++++++++++++++ 3 files changed, 479 insertions(+), 10 deletions(-) create mode 100644 modules/Skill/tmp/Corr.R diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index e1857ac0..61177b71 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -30,6 +30,9 @@ Analysis: Workflow: Calibration: method: mse_min + Anomalies: + compute: yes + cross_validation: yes Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS Probabilities: diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9a59be40..1095ed10 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -15,6 +15,7 @@ source("modules/Skill/tmp/RandomWalkTest.R") source("modules/Skill/tmp/Bias.R") source("modules/Skill/tmp/AbsBiasSS.R") source("modules/Skill/tmp/RMSSS.R") +source("modules/Skill/tmp/Corr.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -210,19 +211,21 @@ compute_skill_metrics <- function(recipe, data) { } else if (metric %in% c('enscorr', 'corr')) { ## TODO: Return significance ## TODO: Implement option for Kendall and Spearman methods? - skill <- s2dv::Corr(data$hcst$data, data$obs$data, - dat_dim = 'dat', - time_dim = time_dim, - method = 'pearson', - memb_dim = memb_dim, - memb = memb, - ncores = ncores) + skill <- Corr(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + memb = memb, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$corr - skill_metrics[[ paste0(metric, "_p.value") ]] <- skill$p.val - skill_metrics[[ paste0(metric, "_conf.low") ]] <- skill$conf.lower - skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'rmsss') { # Compute hcst ensemble mean exp_ensmean <- MeanDims(exp$data, dims = memb_dim, drop = FALSE) diff --git a/modules/Skill/tmp/Corr.R b/modules/Skill/tmp/Corr.R new file mode 100644 index 00000000..c95b1034 --- /dev/null +++ b/modules/Skill/tmp/Corr.R @@ -0,0 +1,463 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is 'dataset'. If there is no dataset +#' dimension, set NULL. +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + if (is.null(memb_dim)) { + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + nexp <- 1 + nobs <- 1 + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim) & !is.null(memb_dim)) { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} -- GitLab From 8a49117a68b40b72e6d36af5bcb6a6dc94f1d81e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 16 Dec 2022 16:35:36 +0100 Subject: [PATCH 41/80] Update unit tests --- tests/testthat/test-decadal_monthly_3.R | 2 +- tests/testthat/test-seasonal_monthly.R | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 102f2f53..9f46a1fc 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -133,7 +133,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("bss10", "bss10_significance", "corr", "corr_p.value", "corr_conf.low", "corr_conf.up") +c("bss10", "bss10_significance", "corr", "corr_significance") ) expect_equal( class(skill_metrics[[1]]), diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 8caa6c62..e9792df0 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -185,8 +185,7 @@ TRUE expect_equal( names(skill_metrics), c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", - "enscorr_p.value", "enscorr_conf.low", "enscorr_conf.up", "corr", - "corr_p.value", "corr_conf.low", "corr_conf.up", "enscorr_specs") + "enscorr_significance", "corr", "corr_significance", "enscorr_specs") ) expect_equal( class(skill_metrics$rpss), -- GitLab From c7ca2ca2cd56f566a78b3cb6d661e75ee9b476b8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 19 Dec 2022 16:46:39 +0100 Subject: [PATCH 42/80] Change comments in test_seasonal --- modules/test_seasonal.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index a2eca7ee..854e293f 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -7,7 +7,7 @@ source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) -# archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +## archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe) @@ -15,7 +15,7 @@ data <- load_datasets(recipe) calibrated_data <- calibrate_datasets(recipe, data) # Compute anomalies calibrated_data <- compute_anomalies(recipe, calibrated_data) -## TODO: Turn arguments into (recipe, data)? +# Compute skill metrics skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, calibrated_data) -- GitLab From 639880f99c4543c2453376673bce514696e193a4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 20 Dec 2022 12:27:40 +0100 Subject: [PATCH 43/80] Add parameters to decrease colorbar size and increase label size --- modules/Visualization/Visualization.R | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 351031c0..7de55fb3 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -5,6 +5,8 @@ source("modules/Visualization/tmp/PlotCombinedMap.R") ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Adapt to multi-model case ## TODO: Add param 'raw'? +## TODO: Reduce colorbar size and increase colorbar label size +## Param: bar_label_scale and ???? plot_data <- function(recipe, data, @@ -179,7 +181,10 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, col_inf = col2[1], col_sup = col2[length(col2)], fileout = outfile, - bar_label_digits = 3) + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) ) } } @@ -256,7 +261,10 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { cols = col2, brks = brks, fileout = outfile, - bar_label_digits = 4) + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) } info(recipe$Run$logger, @@ -339,6 +347,9 @@ plot_most_likely_terciles <- function(recipe, archive, titles = titles, fileout = outfile, bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, triangle_ends = c(F, F), width = 11, height = 8) ) } -- GitLab From 24ac68353f14619af2972be105c29cc515ef3642 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 21 Dec 2022 09:04:44 +0100 Subject: [PATCH 44/80] Fix decadal pipeline --- tests/testthat/test-decadal_monthly_3.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 9f46a1fc..22fd4353 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -144,7 +144,7 @@ all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(time = 3, latitu TRUE ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[3:6], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( -- GitLab From bb69ee75cba4a5dc747ae76bddae74cc0415ddc9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Dec 2022 12:48:22 +0100 Subject: [PATCH 45/80] Add checks for anomalies --- modules/Anomalies/Anomalies.R | 7 +++++++ modules/Saving/Saving.R | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 2fe1da0e..f8b49da8 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -5,6 +5,13 @@ source("modules/Anomalies/tmp/CST_Anomaly.R") compute_anomalies <- function(recipe, data) { + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) + stop() + } + if (recipe$Analysis$Workflow$Anomalies$compute) { if (recipe$Analysis$Workflow$Anomalies$cross_validation) { cross <- TRUE diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 124f0468..1fc035e9 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -463,7 +463,9 @@ save_metrics <- function(skill, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) - if (recipe$Analysis$Workflow$Anomalies$compute) { + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), global_attributes) } else { -- GitLab From ef4221c71e8a3a581f3905f66b0278cfad7fdf05 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Dec 2022 13:24:07 +0100 Subject: [PATCH 46/80] Add anomalies in recipe check to all functions --- modules/Saving/Saving.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 1fc035e9..28b5e552 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -579,7 +579,9 @@ save_corr <- function(skill, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) - if (recipe$Analysis$Workflow$Anomalies$compute) { + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(global_attributes, list(from_anomalies = "Yes")) } else { @@ -692,7 +694,9 @@ save_percentiles <- function(percentiles, # Add global and variable attributes global_attributes <- get_global_attributes(recipe, archive) - if (recipe$Analysis$Workflow$Anomalies$compute) { + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), global_attributes) } else { @@ -801,7 +805,9 @@ save_probabilities <- function(probs, var.longname <- attr(data_cube$Variable, 'variable')$long_name global_attributes <- get_global_attributes(recipe, archive) # Add anomaly computation to global attributes - if (recipe$Analysis$Workflow$Anomalies$compute) { + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), global_attributes) } else { -- GitLab From 621ccda5a6c090526a5961d731a2a2c366d75f1b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 30 Dec 2022 13:24:26 +0100 Subject: [PATCH 47/80] Update call to RMSSS --- modules/Skill/Skill.R | 7 +- modules/Skill/tmp/RMSSS.R | 290 ++++++++++++++++++++++++++++++-------- 2 files changed, 237 insertions(+), 60 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9a59be40..9892ba52 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -224,16 +224,15 @@ compute_skill_metrics <- function(recipe, data) { skill_metrics[[ paste0(metric, "_conf.low") ]] <- skill$conf.lower skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper } else if (metric == 'rmsss') { - # Compute hcst ensemble mean - exp_ensmean <- MeanDims(exp$data, dims = memb_dim, drop = FALSE) # Compute RMSS - skill <- RMSSS(exp_ensmean, obs$data, + skill <- RMSSS(data$hcst$data, data$obs$data, dat_dim = 'dat', time_dim = time_dim, + memb_dim = memb_dim, pval = FALSE, sign = TRUE, + sig_method = 'Random Walk', ncores = ncores) - rm(exp_ensmean) # Compute ensemble mean and modify dimensions skill <- lapply(skill, function(x) { .drop_dims(x)}) diff --git a/modules/Skill/tmp/RMSSS.R b/modules/Skill/tmp/RMSSS.R index 9c47da45..d2ff4861 100644 --- a/modules/Skill/tmp/RMSSS.R +++ b/modules/Skill/tmp/RMSSS.R @@ -11,7 +11,7 @@ #'The RMSSS are computed along the time_dim dimension which should correspond #'to the start date dimension.\cr #'The p-value and significance test are optionally provided by an one-sided -#'Fisher test.\cr +#'Fisher test or Random Walk test.\cr #' #'@param exp A named numeric array of experimental data which contains at least #' two dimensions for dat_dim and time_dim. It can also be a vector with the @@ -23,10 +23,22 @@ #' dimension can be different. It can also be a vector with the same length as #' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will #' be 1. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension, or 0 (typical climatological forecast) or 1 +#' (normalized climatological forecast). If it is an array, the dimensions must +#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +#' reference dataset, it should not have dataset dimension. If there is +#' corresponding reference for each experiment, the dataset dimension must +#' have the same length as in 'exp'. If 'ref' is NULL, the typical +#' climatological forecast is used as reference forecast (equivelant to 0.) +#' The default value is NULL. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) #' dimension. The default value is 'dataset'. #'@param time_dim A character string indicating the name of dimension along #' which the RMSSS are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: RMSSS = 0. The default value is TRUE. #'@param sign A logical value indicating whether to compute or not the @@ -34,6 +46,8 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param sig_method A character string indicating the significance method. The +#' options are "one-sided Fisher" (default) and "Random Walk". #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -66,11 +80,12 @@ #'@import multiApply #'@importFrom stats pf #'@export -RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - pval = TRUE, sign = FALSE, alpha = 0.05, ncores = NULL) { +RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + sig_method = 'one-sided Fisher', ncores = NULL) { # Check inputs - ## exp and obs (1) + ## exp, obs, and ref (1) if (is.null(exp) | is.null(obs)) { stop("Parameter 'exp' and 'obs' cannot be NULL.") } @@ -99,6 +114,19 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', !all(names(dim(obs)) %in% names(dim(exp)))) { stop("Parameter 'exp' and 'obs' must have same dimension name.") } + if (!is.null(ref)) { + if (!is.numeric(ref)) { + stop("Parameter 'ref' must be numeric.") + } + if (is.array(ref)) { + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + stop("Parameter 'ref' must be a numeric array or number 0 or 1.") + } + } + ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -116,6 +144,23 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', " Set it as NULL if there is no dataset dimension.") } } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } ## pval if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") @@ -128,6 +173,14 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## sig_method + if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { + stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") + } + if (sig_method == "Random Walk" & pval == T) { + warning("p-value cannot be calculated by significance method 'Random Walk'.") + pval <- FALSE + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -138,66 +191,185 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } if (!is.null(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim'.")) + "all dimension except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } } + if (dim(exp)[time_dim] <= 2) { stop("The length of time_dim must be more than 2 to compute RMSSS.") } ############################### - # Sort dimension - name_exp <- names(dim(exp)) - name_obs <- names(dim(obs)) - order_obs <- match(name_exp, name_obs) - obs <- Reorder(obs, order_obs) +# # Sort dimension +# name_exp <- names(dim(exp)) +# name_obs <- names(dim(obs)) +# order_obs <- match(name_exp, name_obs) +# obs <- Reorder(obs, order_obs) + + + ############################### + # Create ref array if needed + if (is.null(ref)) ref <- 0 + if (!is.array(ref)) { + ref <- array(data = ref, dim = dim(exp)) + } + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = T) + } + } ############################### # Calculate RMSSS - - res <- Apply(list(exp, obs), - target_dims = list(c(time_dim, dat_dim), - c(time_dim, dat_dim)), + +# if (!is.null(ref)) { # use "ref" as reference forecast +# if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { +# target_dims_ref <- c(time_dim, dat_dim) +# } else { +# target_dims_ref <- c(time_dim) +# } +# data <- list(exp = exp, obs = obs, ref = ref) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim), +# ref = target_dims_ref) +# } else { +# data <- list(exp = exp, obs = obs) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim)) +# } + data <- list(exp = exp, obs = obs, ref = ref) + if (!is.null(dat_dim)) { + if (dat_dim %in% names(dim(ref))) { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim, dat_dim)) + } else { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim)) + } + } else { + target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) + } + + res <- Apply(data, + target_dims = target_dims, fun = .RMSSS, time_dim = time_dim, dat_dim = dat_dim, pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, ncores = ncores) return(res) } -.RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, - sign = FALSE, alpha = 0.05) { +.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { # exp: [sdate, (dat)] # obs: [sdate, (dat)] + # ref: [sdate, (dat)] or NULL + + if (is.null(ref)) { + ref <- array(data = 0, dim = dim(obs)) + } else if (identical(ref, 0) | identical(ref, 1)) { + ref <- array(ref, dim = dim(exp)) + } if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] nexp <- 1 nobs <- 1 - dim(exp) <- c(dim(exp), nexp = nexp) - dim(obs) <- c(dim(obs), nobs = nobs) + nref <- 1 + # Add dat dim back temporarily + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + dim(ref) <- c(dim(ref), dat = 1) + } else { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 + } } nsdate <- as.numeric(dim(exp)[1]) - - p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + + # RMS of forecast dif1 <- array(dim = c(nsdate, nexp, nobs)) names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + + # RMS of reference +# if (!is.null(ref)) { + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + rms_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nref, nobs)) + if (nexp != nref) { + # expand rms_ref to nexp (nref is 1) + rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) + rms_ref <- Reorder(rms_ref, c(2, 1)) + } +# } else { +# rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) +## rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( +## rms_ref), na.rm = TRUE) / 1000 +# rms_ref <- Reorder(rms_ref, c(2, 1)) +# #rms_ref above: [nexp, nobs] +# } + + rmsss <- 1 - rms_exp / rms_ref + +################################################# + # if (conf) { # conflow <- (1 - conf.lev) / 2 # confhigh <- 1 - conflow @@ -205,45 +377,51 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', # conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) # } - # dif1 - for (i in 1:nobs) { - dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) - } + if (sig_method == 'one-sided Fisher') { + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + if (is.null(ref)) { + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } else { + eno2 <- Eno(dif2, time_dim) + if (nref != nexp) { + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + } - rms1 <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) - rms2 <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs)) - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs( - rms2), na.rm = TRUE) / 1000 - #rms2 above: [nobs] - rms2 <- array(rms2, dim = c(nobs = nobs, nexp = nexp)) - #rms2 above: [nobs, nexp] - rms2 <- Reorder(rms2, c(2, 1)) - #rms2 above: [nexp, nobs] - - # use rms1 and rms2 to calculate rmsss - rmsss <- 1 - rms1/rms2 - - ## pval and sign - if (pval || sign) { - eno1 <- Eno(dif1, time_dim) - eno2 <- Eno(obs, time_dim) - eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) - eno2 <- Reorder(eno2, c(2, 1)) - } + F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not enough valid data rmsss to NA + rmsss[which(!tmp)] <- NA + } - # pval - if (pval || sign) { + } else if (sig_method == "Random Walk") { + signif <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { - F.stat <- (eno2 * rms2^2 / (eno2- 1)) / ((eno1 * rms1^2 / (eno1- 1))) - tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 - p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) - if (sign) signif <- p_val <= alpha - # If there isn't enough valid data, return NA - p_val[which(!tmp)] <- NA - if (sign) signif[which(!tmp)] <- NA - - # change not significant rmsss to NA - rmsss[which(!tmp)] <- NA + # Error + error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) + if (nref == nexp) { + error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) + } else { + # nref = 1 + error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) + } + signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$signif + } + } } ################################### -- GitLab From 64f9da6f99bc978e49e85a02dfea8d1ad6d5ea23 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 3 Jan 2023 09:20:33 +0100 Subject: [PATCH 48/80] Formatting --- modules/Loading/dates2load.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index ca8ecaa3..0e3613f3 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -19,7 +19,6 @@ dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time - # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) @@ -27,8 +26,7 @@ dates2load <- function(recipe, logger) { if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } - - # fcst dates (if fcst_year empty it creates an empty object) + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { -- GitLab From 21740a255a3643d2b6eb93b287ed70e8211fabca Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 3 Jan 2023 09:54:33 +0100 Subject: [PATCH 49/80] Fix handling of leadtimes in systems where the time stamp belongs to the following month --- conf/archive.yml | 8 ++++++++ modules/Loading/Loading.R | 13 +++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 04d69c5a..fb0379a0 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -19,6 +19,7 @@ archive: fcst: 51 hcst: 25 calendar: "proleptic_gregorian" + time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: name: "Meteo-France System 7" @@ -30,6 +31,7 @@ archive: nmember: fcst: 51 hcst: 25 + time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" system21_m1: @@ -43,6 +45,7 @@ archive: fcst: 50 hcst: 30 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: name: "CMCC-SPS3.5" @@ -55,6 +58,7 @@ archive: fcst: 50 hcst: 40 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" system2c3s: name: "JMA System 2" @@ -66,6 +70,7 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" eccc1: name: "ECCC CanCM4i" @@ -77,6 +82,7 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" glosea6_system600-c3s: name: "UKMO GloSea 6 6.0" @@ -88,6 +94,7 @@ archive: fcst: 62 hcst: 28 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" ncep-cfsv2: name: "NCEP CFSv2" @@ -99,6 +106,7 @@ archive: fcst: 20 hcst: 20 calendar: "gregorian" + time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: era5: diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 66a53451..1c8ccad7 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -143,7 +143,11 @@ load_datasets <- function(recipe) { ## TODO: Give correct dimensions to $Dates$start ## (sday, sweek, syear instead of file_date) hcst <- as.s2dv_cube(hcst) - + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$Dates$start[] <- hcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + } + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { @@ -151,7 +155,7 @@ load_datasets <- function(recipe) { # with the daily case and the current version of startR not allowing # multiple dims split - fcst <- Start(dat = fcst.path, + fcst <- Start(dat = fcst.path, var = variable, file_date = sdates$fcst, time = idxs$fcst, @@ -193,6 +197,11 @@ load_datasets <- function(recipe) { # Convert fcst to s2dv_cube fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$Dates$start[] <- + fcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + } } else { fcst <- NULL -- GitLab From 832702be2ad20814cc221e5c879f0fc424967a8e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 3 Jan 2023 10:38:46 +0100 Subject: [PATCH 50/80] Adapt seasonal test to fix pipeline --- tests/testthat/test-seasonal_monthly.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 8caa6c62..b29ccb95 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -108,19 +108,19 @@ tolerance = 0.0001 ) expect_equal( (data$hcst$Dates$start)[1], -as.POSIXct("1993-12-01", tz = 'UTC') +as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[2], -as.POSIXct("1994-12-01", tz = 'UTC') +as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[5], -as.POSIXct("1994-01-01", tz = 'UTC') +as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') ) expect_equal( (data$obs$Dates$start)[10], -as.POSIXct("1995-02-14", tz = 'UTC') +as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') ) }) @@ -153,22 +153,22 @@ c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, long ) expect_equal( mean(calibrated_data$fcst$data), -291.1218, +291.6433, tolerance = 0.0001 ) expect_equal( mean(calibrated_data$hcst$data), -289.8596, +290.9006, tolerance = 0.0001 ) expect_equal( as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), -c(287.7982, 287.0422, 290.4297), +c(291.8887, 287.0233, 289.8808), tolerance = 0.0001 ) expect_equal( range(calibrated_data$fcst$data), -c(283.5374, 306.2353), +c(283.8926, 299.0644), tolerance = 0.0001 ) @@ -202,7 +202,7 @@ dim(skill_metrics$rpss) ) expect_equal( as.vector(skill_metrics$rpss[, 2, 3]), -c(-1.153829, -1.114743, -1.392457), +c(-0.2918857, -1.4809143, -1.3842286), tolerance = 0.0001 ) expect_equal( -- GitLab From 4cdd0233dc78cb94d04e3d7022a7cc52d2a35b68 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 4 Jan 2023 11:48:08 +0100 Subject: [PATCH 51/80] Add new color scheme for CERISE/Scorecards (WIP) --- modules/Visualization/Visualization.R | 145 ++++++++++++++------------ 1 file changed, 81 insertions(+), 64 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 7de55fb3..0f604f51 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -63,7 +63,14 @@ plot_data <- function(recipe, plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { - + # recipe: Auto-S2S recipe + # archive: Auto-S2S archive + # data_cube: s2dv_cube object with the corresponding hindcast data + # skill_metrics: list of named skill metrics arrays + # outdir: output directory + # significance: T/F, whether to display the significance dots in the plots + + ## TODO: OPTION for CERISE: Using PuOr # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { error(recipe$Run$logger, "Visualization functions not yet implemented @@ -83,15 +90,21 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, start = 1, stop = 2)), label = T, abb = T) + # Define color palette and number of breaks according to output format + if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { + color_palette <- "PuOr" + n_brks <- 10 + } else { + color_palette <- "RdBu" + } # Group different metrics by type skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", "enscorr_specs", "rmsss") scores <- c("rps", "frps", "crps", "frps_specs") - + # Assign colorbar to each metric type for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", @@ -99,22 +112,26 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, "rmsss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + brks <- seq(-1, 1, by = 0.2) + col2 <- grDevices::hcl.colors(length(brks) - 1, + color_palette, + rev = TRUE) } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + col2 <- grDevices::hcl.colors(length(brks) - 1, + color_palette, + rev = TRUE) } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + col2 <- grDevices::hcl.colors(length(brks) - 1, + color_palette, + rev = TRUE) } else if (name %in% scores) { + ## TODO: CERISE color palette skill <- skill_metrics[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) @@ -125,70 +142,70 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, skill <- skill_metrics[[name]] display_name <- "Spread-to-Error Ratio" brks <- pretty(0:max(skill, na.rm = T), n = 20, min.n = 10) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) - + col2 <- grDevices::hcl.colors(length(brks) - 1, + color_palette, + rev = TRUE) } else if (name == "mean_bias") { skill <- skill_metrics[[name]] display_name <- "Mean Bias" max_value <- max(abs(skill)) ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) brks <- pretty(ugly_intervals, n = 20, min.n = 10) - col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + col2 <- grDevices::hcl.colors(length(brks) - 1, + color_palette, + rev = TRUE) } - - options(bitmapType = "cairo") - - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { + options(bitmapType = "cairo") + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. significance_name <- paste0(name, "_significance") - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL - } - # Define output file name and titles - outfile <- paste0(outdir, name, ".png") - toptitle <- paste(display_name, "-", data_cube$Variable$varName, - "-", system_name, "-", init_month, hcst_period) - months <- unique(lubridate::month(data_cube$Dates$start, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = col2, - col_inf = col2[1], - col_sup = col2[length(col2)], - fileout = outfile, - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - ) + if ((significance) && (significance_name %in% names(skill_metrics))) { + significance_name <- paste0(name, "_significance") + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # to avoid overlapping of significance dots. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + outfile <- paste0(outdir, name, ".png") + toptitle <- paste(display_name, "-", data_cube$Variable$varName, + "-", system_name, "-", init_month, hcst_period) + months <- unique(lubridate::month(data_cube$Dates$start, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = col2, + col_inf = col2[1], + col_sup = col2[length(col2)], + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) } } - info(recipe$Run$logger, "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } -- GitLab From 964529ee5956faadfeb1fb693641ff18b096ad3a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 5 Jan 2023 10:57:43 +0100 Subject: [PATCH 52/80] Add ncores to anomaly computation --- modules/Anomalies/Anomalies.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index f8b49da8..f41c50b1 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -29,7 +29,8 @@ compute_anomalies <- function(recipe, data) { memb_dim = 'ensemble', dim_anom = 'syear', dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time') + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) # Reorder dims anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) @@ -59,7 +60,8 @@ compute_anomalies <- function(recipe, data) { dat_dim = c("dat", "ensemble"), memb = FALSE, memb_dim = "ensemble", - ftime_dim = "time") + ftime_dim = "time", + ncores = recipe$Analysis$ncores) clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, name = "syear") dims <- dim(clim_hcst) -- GitLab From 19541b5954a475ff1728f68a7de695af0db660ba Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 5 Jan 2023 10:58:18 +0100 Subject: [PATCH 53/80] Make separate functions to generate colorbar (WIP) --- modules/Visualization/Visualization.R | 62 ++++++++++++++------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 0f604f51..26f78144 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -91,11 +91,24 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, start = 1, stop = 2)), label = T, abb = T) # Define color palette and number of breaks according to output format + ## TODO: Make separate function if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { - color_palette <- "PuOr" - n_brks <- 10 + reverse <- FALSE + .make_color_palette <- function(n_brks, reverse = F) { + color_scheme <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', + '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') + if (reverse) color_scheme <- rev(color_scheme) + palette <- colorRampPalette(colors = color_scheme)(n_brks) + return(palette) + } } else { - color_palette <- "RdBu" + reverse <- TRUE + .make_color_palette <- function(n_brks, reverse = F) { + color_scheme <- "RdBu" + palette <- grDevices::hcl.colors(n_brks, + color_scheme, + rev = reverse) + } } # Group different metrics by type @@ -112,48 +125,37 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, "rmsss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - col2 <- grDevices::hcl.colors(length(brks) - 1, - color_palette, - rev = TRUE) + brks <- seq(-0.5, 0.5, by = 0.1) + cols <- .make_color_palette(length(brks) - 1, reverse) } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, - color_palette, - rev = TRUE) + brks <- seq(-1, 1, by = 0.2) + cols <- .make_color_palette(length(brks) - 1, reverse) } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, - color_palette, - rev = TRUE) + brks <- seq(-1, 1, by = 0.2) + cols <- .make_color_palette(length(brks) - 1, reverse) } else if (name %in% scores) { ## TODO: CERISE color palette skill <- skill_metrics[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) - col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") - + cols <- grDevices::hcl.colors(length(brks) - 1, "Reds") } else if (name == "enssprerr") { ## TODO: Adjust colorbar parameters skill <- skill_metrics[[name]] display_name <- "Spread-to-Error Ratio" - brks <- pretty(0:max(skill, na.rm = T), n = 20, min.n = 10) - col2 <- grDevices::hcl.colors(length(brks) - 1, - color_palette, - rev = TRUE) + brks <- pretty(0:max(skill, na.rm = T), n = 12, min.n = 10) + cols <- .make_color_palette(length(brks) - 1, reverse) } else if (name == "mean_bias") { skill <- skill_metrics[[name]] display_name <- "Mean Bias" max_value <- max(abs(skill)) ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) - brks <- pretty(ugly_intervals, n = 20, min.n = 10) - col2 <- grDevices::hcl.colors(length(brks) - 1, - color_palette, - rev = TRUE) + brks <- pretty(ugly_intervals, n = 12, min.n = 10) + cols <- .make_color_palette(length(brks) - 1, reverse) } options(bitmapType = "cairo") # Reorder dimensions @@ -195,9 +197,9 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, titles = titles, filled.continents=F, brks = brks, - cols = col2, - col_inf = col2[1], - col_sup = col2[length(col2)], + cols = cols, + col_inf = cols[1], + col_sup = cols[length(cols)], fileout = outfile, bar_label_digits = 3, bar_extra_margin = rep(0.9, 4), @@ -249,7 +251,7 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { } brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - col2 <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") @@ -275,7 +277,7 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { title_scale = 0.6, titles = titles, units = units, - cols = col2, + cols = cols, brks = brks, fileout = outfile, bar_label_digits = 4, -- GitLab From 0a9ad8adfaff60ba324e8e0435376556f71b0b5b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 5 Jan 2023 11:27:10 +0100 Subject: [PATCH 54/80] Add ncores to Anomalies module and include new ncores fix from CSTools --- modules/Anomalies/Anomalies.R | 12 ++++-------- modules/Anomalies/tmp/CST_Anomaly.R | 23 ++++++++++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index f8b49da8..552f895a 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -29,7 +29,8 @@ compute_anomalies <- function(recipe, data) { memb_dim = 'ensemble', dim_anom = 'syear', dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time') + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) # Reorder dims anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) @@ -59,7 +60,8 @@ compute_anomalies <- function(recipe, data) { dat_dim = c("dat", "ensemble"), memb = FALSE, memb_dim = "ensemble", - ftime_dim = "time") + ftime_dim = "time", + ncores = recipe$Analysis$ncores) clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, name = "syear") dims <- dim(clim_hcst) @@ -95,9 +97,3 @@ compute_anomalies <- function(recipe, data) { hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) } - - - - - - diff --git a/modules/Anomalies/tmp/CST_Anomaly.R b/modules/Anomalies/tmp/CST_Anomaly.R index a84b6fc8..f38e39b0 100644 --- a/modules/Anomalies/tmp/CST_Anomaly.R +++ b/modules/Anomalies/tmp/CST_Anomaly.R @@ -53,12 +53,7 @@ #'attr(exp, 'class') <- 's2dv_cube' #'attr(obs, 'class') <- 's2dv_cube' #' -#'anom1 <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) -#'anom2 <- CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb = TRUE) -#'anom3 <- CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb = FALSE) -#'anom4 <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = FALSE) -#'anom5 <- CST_Anomaly(lonlat_temp$exp) -#'anom6 <- CST_Anomaly(obs = lonlat_temp$obs) +#'anom <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) #' #'@seealso \code{\link[s2dv]{Ano_CrossValid}}, \code{\link[s2dv]{Clim}} and \code{\link{CST_Load}} #' @@ -178,11 +173,21 @@ CST_Anomaly <- function(exp = NULL, obs = NULL, dim_anom = 'sdate', cross = FALS # With cross-validation if (cross) { - ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, time_dim = dim_anom, memb_dim = memb_dim, memb = memb, dat_dim = dat_dim) + ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, + time_dim = dim_anom, + memb_dim = memb_dim, + memb = memb, + dat_dim = dat_dim, + ncores = ncores) - # Without cross-validation + # Without cross-validation } else { - tmp <- Clim(exp = exp$data, obs = obs$data, time_dim = dim_anom, memb_dim = memb_dim, memb = memb, dat_dim = dat_dim) + tmp <- Clim(exp = exp$data, obs = obs$data, + time_dim = dim_anom, + memb_dim = memb_dim, + memb = memb, + dat_dim = dat_dim, + ncores = ncores) if (!is.null(filter_span)) { tmp$clim_exp <- Apply(tmp$clim_exp, target_dims = c(ftime_dim), -- GitLab From 691f1075e42e78ebb212b97d1bf8bf2a49c4d9de Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 5 Jan 2023 11:28:27 +0100 Subject: [PATCH 55/80] Update recipe --- modules/Loading/testing_recipes/recipe_seasonal-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index e1857ac0..61177b71 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -30,6 +30,9 @@ Analysis: Workflow: Calibration: method: mse_min + Anomalies: + compute: yes + cross_validation: yes Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS Probabilities: -- GitLab From c77deb7495c32a4b30acc7a1980aae35692a7500 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 10 Jan 2023 12:11:30 +0100 Subject: [PATCH 56/80] Add INFO messate to prepare_outputs --- tools/prepare_outputs.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index da5d2562..e12ecc88 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -74,5 +74,8 @@ prepare_outputs <- function(recipe_file) { recipe$Run$logger <- logger recipe$Run$logfile <- logfile + info(recipe$Run$logger, + "##### LOGGER SET UP AND OUTPUT DIRECTORY PREPARED #####") + return(recipe) } -- GitLab From 66ec2d886e66937707191ca574b241bb7d746a6d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 10 Jan 2023 17:08:12 +0100 Subject: [PATCH 57/80] Improve comments --- tools/data_summary.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 4ff30875..8dcc9910 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -1,6 +1,5 @@ # Print a summary of the loaded data for the user, for each object. # object: hindcast, forecast or reference data in s2dv_cube format. -## TODO: Incorporate into logger ## TODO: Adapt to daily/subseasonal cases ## TODO: Add check for missing files/NAs by dimension @@ -19,11 +18,11 @@ data_summary <- function(data_cube, recipe) { # Create log instance and sink output to logfile and terminal info(recipe$Run$logger, "DATA SUMMARY:") - # sink(recipe$Run$logfile, append = TRUE, split = TRUE) info(recipe$Run$logger, paste(object_name, "months:", months)) info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", sdate_max)) info(recipe$Run$logger, paste(object_name, "dimensions:")) + # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data_cube$data)) for (i in output_string) { info(recipe$Run$logger, i) @@ -35,6 +34,5 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, i) } info(recipe$Run$logger, "---------------------------------------------") - # sink() } -- GitLab From ea98ab7b8d2dda68161c5798db116fc703da4c9c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 11 Jan 2023 12:28:27 +0100 Subject: [PATCH 58/80] Fix small bugs: File appender and 'prlr' in system21_m1 --- conf/archive.yml | 2 +- tools/prepare_outputs.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 04d69c5a..e7c18bdf 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -36,7 +36,7 @@ archive: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/dwd/system21_m1/" - monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", + monthly_mean: {"tas":"_f6h/", "prlr":"_f24h/", "g500":"_f12h/", "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/"} nmember: diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index e12ecc88..a89e5e7b 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -66,7 +66,7 @@ prepare_outputs <- function(recipe_file) { layout = default_log_layout()))) } else { logger <- log4r::logger(threshold = recipe$Run$Loglevel, - appenders = list(file_appende(logfile, append = TRUE, + appenders = list(file_appender(logfile, append = TRUE, layout = default_log_layout()))) } -- GitLab From e83a2c693a50cad4e3ab6f9a80c64fdc4612dc49 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 13 Jan 2023 16:57:41 +0100 Subject: [PATCH 59/80] Add clim.palette() function, create alternative brks for anomalies --- modules/Visualization/Visualization.R | 65 +++++++++++----------- modules/Visualization/tmp/clim.palette.R | 69 ++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 33 deletions(-) create mode 100644 modules/Visualization/tmp/clim.palette.R diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 26f78144..9648184a 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,12 +1,11 @@ #G# TODO: Remove once released in s2dv/CSTools source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") source("modules/Visualization/tmp/PlotCombinedMap.R") +source("modules/Visualization/tmp/clim.palette.R") ## TODO: Add the possibility to read the data directly from netCDF ## TODO: Adapt to multi-model case ## TODO: Add param 'raw'? -## TODO: Reduce colorbar size and increase colorbar label size -## Param: bar_label_scale and ???? plot_data <- function(recipe, data, @@ -93,22 +92,11 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, # Define color palette and number of breaks according to output format ## TODO: Make separate function if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { - reverse <- FALSE - .make_color_palette <- function(n_brks, reverse = F) { - color_scheme <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', - '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') - if (reverse) color_scheme <- rev(color_scheme) - palette <- colorRampPalette(colors = color_scheme)(n_brks) - return(palette) - } + diverging_palette <- "purpleorange" + sequential_palette <- "Oranges" } else { - reverse <- TRUE - .make_color_palette <- function(n_brks, reverse = F) { - color_scheme <- "RdBu" - palette <- grDevices::hcl.colors(n_brks, - color_scheme, - rev = reverse) - } + diverging_palette <- "bluered" + sequential_palette <- "Reds" } # Group different metrics by type @@ -117,6 +105,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, "enscorr_specs", "rmsss") scores <- c("rps", "frps", "crps", "frps_specs") # Assign colorbar to each metric type + ## TODO: Triangle ends for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { if (name %in% names(skill_metrics)) { # Define plot characteristics and metric name to display in plot @@ -125,37 +114,36 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, "rmsss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] - brks <- seq(-0.5, 0.5, by = 0.1) - cols <- .make_color_palette(length(brks) - 1, reverse) + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.2) - cols <- .make_color_palette(length(brks) - 1, reverse) + cols <- clim.colors(length(brks) - 1, diverging_palette) } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.2) - cols <- .make_color_palette(length(brks) - 1, reverse) + cols <- clim.colors(length(brks) - 1, diverging_palette) } else if (name %in% scores) { - ## TODO: CERISE color palette skill <- skill_metrics[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) - cols <- grDevices::hcl.colors(length(brks) - 1, "Reds") + cols <- grDevices::hcl.colors(length(brks) - 1, sequential_palette) } else if (name == "enssprerr") { ## TODO: Adjust colorbar parameters skill <- skill_metrics[[name]] display_name <- "Spread-to-Error Ratio" brks <- pretty(0:max(skill, na.rm = T), n = 12, min.n = 10) - cols <- .make_color_palette(length(brks) - 1, reverse) + cols <- clim.colors(length(brks) - 1, diverging_palette) } else if (name == "mean_bias") { skill <- skill_metrics[[name]] display_name <- "Mean Bias" max_value <- max(abs(skill)) ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) brks <- pretty(ugly_intervals, n = 12, min.n = 10) - cols <- .make_color_palette(length(brks) - 1, reverse) + cols <- clim.colors(length(brks) - 1, diverging_palette) } options(bitmapType = "cairo") # Reorder dimensions @@ -206,7 +194,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, bar_label_scale = 1.5, axes_label_scale = 1.3) ) - } + } } info(recipe$Run$logger, "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") @@ -227,7 +215,6 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { units <- attr(fcst$Variable, "variable")$units start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) - # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') # Drop extra dims, add time dim if missing: @@ -237,9 +224,14 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) } if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) + ensemble_mean <- Reorder(ensemble_mean, c("time", + "longitude", + "latitude")) } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", "time", "longitude", "latitude")) + ensemble_mean <- Reorder(ensemble_mean, c("syear", + "time", + "longitude", + "latitude")) } ## TODO: Redefine column colors, possibly depending on variable if (variable == 'prlr') { @@ -249,10 +241,18 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { palette = "RdBu" rev = T } - - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + attr(fcst$Variable, "variable")$long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(ensemble_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + } else { + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + } cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - # color <- colorRampPalette(col2)(length(brks) - 1) options(bitmapType = "cairo") for (i_syear in start_date) { @@ -285,7 +285,6 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { bar_label_scale = 1.5, axes_label_scale = 1.3) } - info(recipe$Run$logger, "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/tmp/clim.palette.R b/modules/Visualization/tmp/clim.palette.R new file mode 100644 index 00000000..b23ff842 --- /dev/null +++ b/modules/Visualization/tmp/clim.palette.R @@ -0,0 +1,69 @@ +#'Generate Climate Color Palettes +#' +#'Generates a colorblind friendly color palette with color ranges useful in +#'climate temperature variable plotting. +#' +#'@param palette Which type of palette to generate: from blue through white +#' to red ('bluered'), from red through white to blue ('redblue'), from +#' yellow through orange to red ('yellowred'), from red through orange to +#' red ('redyellow'), from purple through white to orange ('purpleorange'), +#' and from orange through white to purple ('orangepurple'). +#'@param n Number of colors to generate. +#' +#'@examples +#'lims <- seq(-1, 1, length.out = 21) +#' +#'ColorBar(lims, color_fun = clim.palette('redyellow')) +#' +#'cols <- clim.colors(20) +#'ColorBar(lims, cols) +#' +#'@rdname clim.palette +#'@importFrom grDevices colorRampPalette +#'@export +clim.palette <- function(palette = "bluered") { + if (palette == "bluered") { + colorbar <- colorRampPalette(rev(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redblue") { + colorbar <- colorRampPalette(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "yellowred") { + colorbar <- colorRampPalette(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redyellow") { + colorbar <- colorRampPalette(rev(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "purpleorange") { + colorbar <- colorRampPalette(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "orangepurple") { + colorbar <- colorRampPalette(rev(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08"))) + attr(colorbar, 'na_color') <- 'pink' + } else { + stop("Parameter 'palette' must be one of 'bluered', 'redblue', 'yellowred'", + "'redyellow', 'purpleorange' or 'orangepurple'.") + } + colorbar +} +#'@rdname clim.palette +#'@export +clim.colors <- function(n, palette = "bluered") { + clim.palette(palette)(n) +} -- GitLab From 63f4bd383b44c21e31bf91821f6dd6c4c9f9f86b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 13 Jan 2023 17:21:00 +0100 Subject: [PATCH 60/80] Adjust triangle ends depending on metric --- modules/Visualization/Visualization.R | 31 ++++++++++++++++++++------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 9648184a..59157853 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -115,35 +115,50 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) + cols <- clim.colors(length(brks) + 1, diverging_palette) + colorbar <- cols[2:(length(cols) - 1)] + col_inf <- cols[1] + col_sup <- NULL } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) + cols <- clim.colors(length(brks) + 1, diverging_palette) + colorbar <- cols[2:(length(cols) - 1)] + col_inf <- cols[1] + col_sup <- NULL } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.2) cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL } else if (name %in% scores) { skill <- skill_metrics[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) - cols <- grDevices::hcl.colors(length(brks) - 1, sequential_palette) + cols <- grDevices::hcl.colors(length(brks), sequential_palette) + colorbar <- cols[1:(length(cols) - 1)] + col_inf <- NULL + col_sup <- cols[length(cols)] } else if (name == "enssprerr") { ## TODO: Adjust colorbar parameters skill <- skill_metrics[[name]] display_name <- "Spread-to-Error Ratio" brks <- pretty(0:max(skill, na.rm = T), n = 12, min.n = 10) - cols <- clim.colors(length(brks) - 1, diverging_palette) + colorbar <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL } else if (name == "mean_bias") { skill <- skill_metrics[[name]] display_name <- "Mean Bias" max_value <- max(abs(skill)) ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) brks <- pretty(ugly_intervals, n = 12, min.n = 10) - cols <- clim.colors(length(brks) - 1, diverging_palette) + colorbar <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL } options(bitmapType = "cairo") # Reorder dimensions @@ -185,9 +200,9 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, titles = titles, filled.continents=F, brks = brks, - cols = cols, - col_inf = cols[1], - col_sup = cols[length(cols)], + cols = colorbar, + col_inf = col_inf, + col_sup = col_sup, fileout = outfile, bar_label_digits = 3, bar_extra_margin = rep(0.9, 4), -- GitLab From 21150613acad9ea4ef0233d73582f94125058f9b Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 13 Jan 2023 17:49:11 +0100 Subject: [PATCH 61/80] Revert "Merge branch 'dev-loading-system_dates' into 'master'" This reverts merge request !43 --- conf/archive.yml | 8 -------- modules/Loading/Loading.R | 13 ++----------- modules/Loading/dates2load.R | 4 +++- tests/testthat/test-seasonal_monthly.R | 18 +++++++++--------- 4 files changed, 14 insertions(+), 29 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 0251beaf..e7c18bdf 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -19,7 +19,6 @@ archive: fcst: 51 hcst: 25 calendar: "proleptic_gregorian" - time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: name: "Meteo-France System 7" @@ -31,7 +30,6 @@ archive: nmember: fcst: 51 hcst: 25 - time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" system21_m1: @@ -45,7 +43,6 @@ archive: fcst: 50 hcst: 30 calendar: "proleptic_gregorian" - time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: name: "CMCC-SPS3.5" @@ -58,7 +55,6 @@ archive: fcst: 50 hcst: 40 calendar: "proleptic_gregorian" - time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" system2c3s: name: "JMA System 2" @@ -70,7 +66,6 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" - time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" eccc1: name: "ECCC CanCM4i" @@ -82,7 +77,6 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" - time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" glosea6_system600-c3s: name: "UKMO GloSea 6 6.0" @@ -94,7 +88,6 @@ archive: fcst: 62 hcst: 28 calendar: "proleptic_gregorian" - time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" ncep-cfsv2: name: "NCEP CFSv2" @@ -106,7 +99,6 @@ archive: fcst: 20 hcst: 20 calendar: "gregorian" - time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: era5: diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 1c8ccad7..66a53451 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -143,11 +143,7 @@ load_datasets <- function(recipe) { ## TODO: Give correct dimensions to $Dates$start ## (sday, sweek, syear instead of file_date) hcst <- as.s2dv_cube(hcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - hcst$Dates$start[] <- hcst$Dates$start - seconds(exp_descrip$time_stamp_lag) - } - + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { @@ -155,7 +151,7 @@ load_datasets <- function(recipe) { # with the daily case and the current version of startR not allowing # multiple dims split - fcst <- Start(dat = fcst.path, + fcst <- Start(dat = fcst.path, var = variable, file_date = sdates$fcst, time = idxs$fcst, @@ -197,11 +193,6 @@ load_datasets <- function(recipe) { # Convert fcst to s2dv_cube fcst <- as.s2dv_cube(fcst) - # Adjust dates for models where the time stamp goes into the next month - if (recipe$Analysis$Variables$freq == "monthly_mean") { - fcst$Dates$start[] <- - fcst$Dates$start - seconds(exp_descrip$time_stamp_lag) - } } else { fcst <- NULL diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index 0e3613f3..ca8ecaa3 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -19,6 +19,7 @@ dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time + # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) @@ -26,7 +27,8 @@ dates2load <- function(recipe, logger) { if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } - # fcst dates (if fcst_year empty it creates an empty object) + + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index de03bf73..e9792df0 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -108,19 +108,19 @@ tolerance = 0.0001 ) expect_equal( (data$hcst$Dates$start)[1], -as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') +as.POSIXct("1993-12-01", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[2], -as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') +as.POSIXct("1994-12-01", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[5], -as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') +as.POSIXct("1994-01-01", tz = 'UTC') ) expect_equal( (data$obs$Dates$start)[10], -as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') +as.POSIXct("1995-02-14", tz = 'UTC') ) }) @@ -153,22 +153,22 @@ c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, long ) expect_equal( mean(calibrated_data$fcst$data), -291.6433, +291.1218, tolerance = 0.0001 ) expect_equal( mean(calibrated_data$hcst$data), -290.9006, +289.8596, tolerance = 0.0001 ) expect_equal( as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), -c(291.8887, 287.0233, 289.8808), +c(287.7982, 287.0422, 290.4297), tolerance = 0.0001 ) expect_equal( range(calibrated_data$fcst$data), -c(283.8926, 299.0644), +c(283.5374, 306.2353), tolerance = 0.0001 ) @@ -201,7 +201,7 @@ dim(skill_metrics$rpss) ) expect_equal( as.vector(skill_metrics$rpss[, 2, 3]), -c(-0.2918857, -1.4809143, -1.3842286), +c(-1.153829, -1.114743, -1.392457), tolerance = 0.0001 ) expect_equal( -- GitLab From ce23e6fd972777ea6088d8a854d53b1cb8f42332 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 16 Jan 2023 12:46:20 +0100 Subject: [PATCH 62/80] Revert "Revert "Merge branch 'dev-loading-system_dates' into 'master'"" This reverts commit 21150613acad9ea4ef0233d73582f94125058f9b. --- conf/archive.yml | 8 ++++++++ modules/Loading/Loading.R | 13 +++++++++++-- modules/Loading/dates2load.R | 4 +--- tests/testthat/test-seasonal_monthly.R | 18 +++++++++--------- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index e7c18bdf..0251beaf 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -19,6 +19,7 @@ archive: fcst: 51 hcst: 25 calendar: "proleptic_gregorian" + time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: name: "Meteo-France System 7" @@ -30,6 +31,7 @@ archive: nmember: fcst: 51 hcst: 25 + time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" system21_m1: @@ -43,6 +45,7 @@ archive: fcst: 50 hcst: 30 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: name: "CMCC-SPS3.5" @@ -55,6 +58,7 @@ archive: fcst: 50 hcst: 40 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" system2c3s: name: "JMA System 2" @@ -66,6 +70,7 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" eccc1: name: "ECCC CanCM4i" @@ -77,6 +82,7 @@ archive: fcst: 10 hcst: 10 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" glosea6_system600-c3s: name: "UKMO GloSea 6 6.0" @@ -88,6 +94,7 @@ archive: fcst: 62 hcst: 28 calendar: "proleptic_gregorian" + time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" ncep-cfsv2: name: "NCEP CFSv2" @@ -99,6 +106,7 @@ archive: fcst: 20 hcst: 20 calendar: "gregorian" + time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: era5: diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 66a53451..1c8ccad7 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -143,7 +143,11 @@ load_datasets <- function(recipe) { ## TODO: Give correct dimensions to $Dates$start ## (sday, sweek, syear instead of file_date) hcst <- as.s2dv_cube(hcst) - + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$Dates$start[] <- hcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + } + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { @@ -151,7 +155,7 @@ load_datasets <- function(recipe) { # with the daily case and the current version of startR not allowing # multiple dims split - fcst <- Start(dat = fcst.path, + fcst <- Start(dat = fcst.path, var = variable, file_date = sdates$fcst, time = idxs$fcst, @@ -193,6 +197,11 @@ load_datasets <- function(recipe) { # Convert fcst to s2dv_cube fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$Dates$start[] <- + fcst$Dates$start - seconds(exp_descrip$time_stamp_lag) + } } else { fcst <- NULL diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index ca8ecaa3..0e3613f3 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -19,7 +19,6 @@ dates2load <- function(recipe, logger) { temp_freq <- recipe$Analysis$Variables$freq recipe <- recipe$Analysis$Time - # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) @@ -27,8 +26,7 @@ dates2load <- function(recipe, logger) { if (temp_freq == "monthly_mean") { file_dates <- .add_dims(file_dates) } - - # fcst dates (if fcst_year empty it creates an empty object) + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean") { diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index e9792df0..de03bf73 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -108,19 +108,19 @@ tolerance = 0.0001 ) expect_equal( (data$hcst$Dates$start)[1], -as.POSIXct("1993-12-01", tz = 'UTC') +as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[2], -as.POSIXct("1994-12-01", tz = 'UTC') +as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') ) expect_equal( (data$hcst$Dates$start)[5], -as.POSIXct("1994-01-01", tz = 'UTC') +as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') ) expect_equal( (data$obs$Dates$start)[10], -as.POSIXct("1995-02-14", tz = 'UTC') +as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') ) }) @@ -153,22 +153,22 @@ c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, long ) expect_equal( mean(calibrated_data$fcst$data), -291.1218, +291.6433, tolerance = 0.0001 ) expect_equal( mean(calibrated_data$hcst$data), -289.8596, +290.9006, tolerance = 0.0001 ) expect_equal( as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), -c(287.7982, 287.0422, 290.4297), +c(291.8887, 287.0233, 289.8808), tolerance = 0.0001 ) expect_equal( range(calibrated_data$fcst$data), -c(283.5374, 306.2353), +c(283.8926, 299.0644), tolerance = 0.0001 ) @@ -201,7 +201,7 @@ dim(skill_metrics$rpss) ) expect_equal( as.vector(skill_metrics$rpss[, 2, 3]), -c(-1.153829, -1.114743, -1.392457), +c(-0.2918857, -1.4809143, -1.3842286), tolerance = 0.0001 ) expect_equal( -- GitLab From 8b06e9f83d8b2711c9cc5338f2736098fd53374c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 16 Jan 2023 13:16:40 +0100 Subject: [PATCH 63/80] Add Nadia's recipe --- .../testing_recipes/recipe_testing_nadia.yml | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 modules/Loading/testing_recipes/recipe_testing_nadia.yml diff --git a/modules/Loading/testing_recipes/recipe_testing_nadia.yml b/modules/Loading/testing_recipes/recipe_testing_nadia.yml new file mode 100644 index 00000000..60711981 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_testing_nadia.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: + hcst_start: '2010' + hcst_end: '2015' + ftime_min: 1 + ftime_max: 6 + Region: + latmin: 30 + latmax: 50 + lonmin: -10 + lonmax: 30 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: mean_bias EnsCorr RPSS CRPSS EnsSprErr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 7add579b7b76e4fadf267e7d5a795618de74a368 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 16 Jan 2023 14:37:32 +0100 Subject: [PATCH 64/80] Remove archive --- modules/test_seasonal.R | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 854e293f..b8541488 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -7,7 +7,6 @@ source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) -## archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe) -- GitLab From bf43b6a6046265dafd77970d38e8a84fdec37724 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 16 Jan 2023 16:53:56 +0100 Subject: [PATCH 65/80] Testing for Nadia --- .../testing_recipes/recipe_seasonal-tests.yml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index 61177b71..4e8128e7 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -8,40 +8,40 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: system21_m1 Multimodel: False Reference: name: era5 Time: sdate: '1101' - fcst_year: '2020' + fcst_year: hcst_start: '2000' hcst_end: '2015' ftime_min: 1 - ftime_max: 2 + ftime_max: 6 Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 + latmin: 30 + latmax: 50 + lonmin: -10 + lonmax: 30 Regrid: method: bilinear type: to_system Workflow: Calibration: - method: mse_min + method: raw Anomalies: compute: yes cross_validation: yes Skill: - metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + metric: mean_bias EnsCorr RPSS CRPSS EnsSprErr # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] Indicators: index: no - ncores: 7 + ncores: 14 remove_NAs: yes - Output_format: S2S4E + Output_format: Scorecards Run: Loglevel: INFO Terminal: yes -- GitLab From 2f1b169b569e7209db032eab7b0477ea1c2878a8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 17 Jan 2023 08:44:07 +0100 Subject: [PATCH 66/80] Correct precipitation unit conversion factor to 86400 seconds per day --- modules/Loading/Loading.R | 6 +++--- modules/Loading/Loading_decadal.R | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 1c8ccad7..aff93f87 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -341,12 +341,12 @@ load_datasets <- function(recipe) { (attr(obs$Variable, "variable")$units == "m s-1")) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*84000*1000 + obs$data <- obs$data*86400*1000 attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*84000*1000 + hcst$data <- hcst$data*86400*1000 attr(hcst$Variable, "variable")$units <- "mm/day" if (!is.null(fcst)) { - fcst$data <- fcst$data*84000*1000 + fcst$data <- fcst$data*86400*1000 attr(fcst$Variable, "variable")$units <- "mm/day" } } diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 8046344b..59e73a04 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -491,12 +491,12 @@ load_datasets <- function(recipe) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*84000*1000 + obs$data <- obs$data*86400*1000 attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*84000*1000 + hcst$data <- hcst$data*86400*1000 attr(hcst$Variable, "variable")$units <- "mm/day" if (!is.null(fcst)) { - fcst$data <- fcst$data*84000*1000 + fcst$data <- fcst$data*86400*1000 attr(fcst$Variable, "variable")$units <- "mm/day" } } -- GitLab From eacfe83b308f2038e5573665fe7f1c874a07ee47 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 18 Jan 2023 12:59:28 +0100 Subject: [PATCH 67/80] Implement Nadia's suggestions to improve EnsSprErr and Mean Bias plots --- modules/Visualization/Visualization.R | 41 ++++++++++++++------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 59157853..a8664569 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -115,17 +115,17 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) + 1, diverging_palette) - colorbar <- cols[2:(length(cols) - 1)] - col_inf <- cols[1] + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] col_sup <- NULL } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" skill <- skill_metrics[[name]] brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) + 1, diverging_palette) - colorbar <- cols[2:(length(cols) - 1)] - col_inf <- cols[1] + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] col_sup <- NULL } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" @@ -138,27 +138,28 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, skill <- skill_metrics[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) - cols <- grDevices::hcl.colors(length(brks), sequential_palette) - colorbar <- cols[1:(length(cols) - 1)] + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] col_inf <- NULL - col_sup <- cols[length(cols)] + col_sup <- colorbar[length(colorbar)] } else if (name == "enssprerr") { - ## TODO: Adjust colorbar parameters skill <- skill_metrics[[name]] display_name <- "Spread-to-Error Ratio" - brks <- pretty(0:max(skill, na.rm = T), n = 12, min.n = 10) - colorbar <- clim.colors(length(brks) - 1, diverging_palette) + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] col_inf <- NULL - col_sup <- NULL + col_sup <- colorbar[length(colorbar)] } else if (name == "mean_bias") { skill <- skill_metrics[[name]] display_name <- "Mean Bias" - max_value <- max(abs(skill)) - ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) - brks <- pretty(ugly_intervals, n = 12, min.n = 10) - colorbar <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] } options(bitmapType = "cairo") # Reorder dimensions @@ -200,7 +201,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, titles = titles, filled.continents=F, brks = brks, - cols = colorbar, + cols = cols, col_inf = col_inf, col_sup = col_sup, fileout = outfile, -- GitLab From 17e682c3ce5fd50478bee4a91944790e819505cd Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 19 Jan 2023 09:23:13 +0100 Subject: [PATCH 68/80] Fix condition for prlr unit conversion --- modules/Loading/Loading.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index aff93f87..53d41cc2 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -336,9 +336,10 @@ load_datasets <- function(recipe) { ## TODO: Make a unit conversion function? if (variable == "prlr") { # Verify that the units are m/s and the same in obs and hcst - if ((attr(obs$Variable, "variable")$units != - attr(hcst$Variable, "variable")$units) && - (attr(obs$Variable, "variable")$units == "m s-1")) { + if (((attr(obs$Variable, "variable")$units == "m s-1") || + (attr(obs$Variable, "variable")$units == "m s**-1")) && + ((attr(hcst$Variable, "variable")$units == "m s-1") || + (attr(hcst$Variable, "variable")$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") obs$data <- obs$data*86400*1000 -- GitLab From 28505b94426c23b61783553bc797ddc9d51a3026 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 19 Jan 2023 09:54:57 +0100 Subject: [PATCH 69/80] Change prlr units condition in Loading decadal module --- modules/Loading/Loading_decadal.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 59e73a04..0a95d9a4 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -485,9 +485,10 @@ load_datasets <- function(recipe) { ## TODO: Make a function? if (variable == "prlr") { # Verify that the units are m/s and the same in obs and hcst - if ((attr(obs$Variable, "variable")$units != - attr(hcst$Variable, "variable")$units) && - (attr(obs$Variable, "variable")$units == "m s-1")) { + if (((attr(obs$Variable, "variable")$units == "m s-1") || + (attr(obs$Variable, "variable")$units == "m s**-1")) && + ((attr(hcst$Variable, "variable")$units == "m s-1") || + (attr(hcst$Variable, "variable")$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") -- GitLab From f09ac40c5c6b07090d7741180b2d1327ed1ef3b4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 31 Jan 2023 10:07:20 +0100 Subject: [PATCH 70/80] Reorder dims in anomalies module --- modules/Anomalies/Anomalies.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 552f895a..859e97bb 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -67,6 +67,7 @@ compute_anomalies <- function(recipe, data) { dims <- dim(clim_hcst) clim_hcst <- rep(clim_hcst, dim(data$fcst$data)[['ensemble']]) dim(clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) + clim_hcst <- Reorder(clim_hcst, order = names(dim(data$fcst$data))) # Get fcst anomalies data$fcst$data <- data$fcst$data - clim_hcst # Change metadata -- GitLab From e4d5723d2ec59182962133a23856746833308b9f Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 31 Jan 2023 12:09:57 +0100 Subject: [PATCH 71/80] Add atomic recipe checker to prepare_outputs() --- .gitignore | 1 - OperationalCS.R | 5 +- conf/vars-dict.yml-OLD | 114 ++++++ .../testing_recipes/wrong_recipe_example.yml | 44 ++ modules/Saving/paths2save.R | 2 + modules/test_seasonal.R | 39 +- recipes/recipe_splitting_example.yml | 60 +++ recipes/tests/{ => old_tests}/execute_tests.R | 0 .../seasonal_testWorkflow1.yml | 0 .../seasonal_testWorkflow2.yml | 0 .../seasonal_testWorkflow3.yml | 0 .../seasonal_testWorkflow4.yml | 0 .../seasonal_testWorkflow5.yml | 0 .../seasonal_testWorkflow6.yml | 0 .../seasonal_testWorkflow7.yml | 0 .../seasonal_testWorkflow8.yml | 0 .../tests/recipe_seasonal_two-variables.yml | 60 +++ tools/check_recipe.R | 380 ++++++++++++------ tools/data_summary.R | 11 +- tools/divide_recipe.R | 107 ++--- tools/prepare_outputs.R | 33 +- 21 files changed, 647 insertions(+), 209 deletions(-) create mode 100644 conf/vars-dict.yml-OLD create mode 100644 modules/Loading/testing_recipes/wrong_recipe_example.yml create mode 100644 recipes/recipe_splitting_example.yml rename recipes/tests/{ => old_tests}/execute_tests.R (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow1.yml (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow2.yml (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow3.yml (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow4.yml (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow5.yml (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow6.yml (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow7.yml (100%) rename recipes/tests/{ => old_tests}/seasonal_testWorkflow8.yml (100%) create mode 100644 recipes/tests/recipe_seasonal_two-variables.yml diff --git a/.gitignore b/.gitignore index d17d7634..e11ba7d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ out-logs/ *.swp *.swo -/modules/Calibration/test_victoria.R modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml diff --git a/OperationalCS.R b/OperationalCS.R index 1e662d1b..ec01a30e 100644 --- a/OperationalCS.R +++ b/OperationalCS.R @@ -22,7 +22,10 @@ log_file <- logger$logname logger <- logger$logger # Checks: -verifications <- check_recipe(recipe, logger) +verifications <- check_recipe(recipe, file = args[2], conf, logger) +# Divide recipe into single verifications recipes: +total_recipes <- divide_recipe(recipe, verifications, folder, logger) + # Divide recipe into single verifications recipes: total_recipes <- divide_recipe(recipe, verifications, folder, logger) # Go to verification code: diff --git a/conf/vars-dict.yml-OLD b/conf/vars-dict.yml-OLD new file mode 100644 index 00000000..04549d36 --- /dev/null +++ b/conf/vars-dict.yml-OLD @@ -0,0 +1,114 @@ + +vars: +# ECVs + tas: + units: "°C" + longname: "Daily mean temperature at surface" + outname: ~ + tasmin: + units: "°C" + longname: "Minimum daily temperature at surface" + outname: ~ + tasmax: + units: "°C" + longname: "Maximum daily temperature at surface" + outname: ~ + sfcwind: + units: "m/s" + longname: "Surface wind speed module" + outname: ~ + rsds: + units: "W/m2" + longname: "Surface solar radiation downwards" + outname: ~ + psl: + units: "hPa" + longname: "Mean sea level pressure" + outname: ~ + prlr: + units: "mm" + longname: "Total precipitation" + outname: ~ +# CFs + cfwnd1: + units: "%" + longname: "Wind Capacity factor IEC1" + outname: ~ + cfwnd2: + units: "%" + longname: "Wind Capacity factor IEC2" + outname: ~ + cfwnd3: + units: "%" + longname: "Wind Capacity factor IEC3" + outname: ~ + cfslr: + units: "%" + longname: "Solar Capacity factor" + outname: ~ +# Energy + edmnd: + units: "GW" + longname: "Electricity Demmand" + outname: ~ + wndpwo: + units: "GW" + longname: "Wind Power" + outname: ~ + dmndnetwnd: + units: "GW" + longname: "Demmand-net-Wind" + outname: ~ +# Indices + Spr32: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 32°C + from April 21st to June 21st + outname: ~ + SU35: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 35°C + from June 21st to September 21st + outname: ~ + SU36: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 36°C + from June 21st to September 21st + outname: ~ + SU40: + units: "days" + longname: > + Total count of days when daily maximum temp exceeded 40°C + from June 21st to September 21st + outname: ~ + GDD: + units: "days" + longname: > + The sum of the daily differences between daily mean + temperature and 10°C from April 1st to October 31st + outname: ~ + GST: + units: "°C" + longname: "The average temperature from April 1st to October 31st" + outname: ~ + SprTX: + units: "°C" + longname: "The average daily maximum temperature from April 1st to October 31st" + outname: ~ + WSDI: + units: "" + longname: > + The total count of days with at least 6 consecutives days + when the daily temperature maximum exceeds its 90th percentile + outname: ~ + SprR: + units: "mm" + longname: 'Total precipitation from April 21st to June 21st' + outname: ~ + HarR: + units: "mm" + longname: 'Total precipitation from August 21st to September 21st' + outname: ~ diff --git a/modules/Loading/testing_recipes/wrong_recipe_example.yml b/modules/Loading/testing_recipes/wrong_recipe_example.yml new file mode 100644 index 00000000..12e2fc06 --- /dev/null +++ b/modules/Loading/testing_recipes/wrong_recipe_example.yml @@ -0,0 +1,44 @@ +Description: + Author: V. Agudetse + Info: Incomplete recipe with incorrect fields to test the recipe checker. + +Analysis: + Horizon: Seasoning + Variables: + name: tas + freq: monthly_mean + Petaflops: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_syear: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_max: 6 + Region: + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: + Skill: + metric: RPS RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 2d6353fe..2d5a0a4e 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -1,4 +1,6 @@ ## TODO: Separate by time aggregation +## TODO: Build a default path that accounts for: +## variable, system, reference, start date and region name get_filename <- function(dir, recipe, var, date, agg, file.type) { # This function builds the path of the output file based on directory, diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b8541488..b22eb070 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -5,21 +5,30 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") +<<<<<<< HEAD recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) +======= +recipe_file <- "recipes/tests/recipe_seasonal_two-variables.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = T) +atomic_recipes <- divide_recipe(recipe) +## archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +>>>>>>> e9b1f4f4a5c3de69fa938c01fe827a5789613247 -# Load datasets -data <- load_datasets(recipe) -# Calibrate datasets -calibrated_data <- calibrate_datasets(recipe, data) -# Compute anomalies -calibrated_data <- compute_anomalies(recipe, calibrated_data) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) -# Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data) -# Export all data to netCDF -save_data(recipe, calibrated_data, skill_metrics, probabilities) -# Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, - significance = T) +for (atomic_recipe in atomic_recipes) { + # Load datasets + data <- load_datasets(atomic_recipe) + # Calibrate datasets + calibrated_data <- calibrate_datasets(atomic_recipe, data) + # Compute anomalies + calibrated_data <- compute_anomalies(atomic_recipe, calibrated_data) + # Compute skill metrics + skill_metrics <- compute_skill_metrics(atomic_recipe, calibrated_data) + # Compute percentiles and probability bins + probabilities <- compute_probabilities(atomic_recipe, calibrated_data) + # Export all data to netCDF + save_data(atomic_recipe, calibrated_data, skill_metrics, probabilities) + # Plot data + plot_data(atomic_recipe, calibrated_data, skill_metrics, probabilities, + significance = T) +} diff --git a/recipes/recipe_splitting_example.yml b/recipes/recipe_splitting_example.yml new file mode 100644 index 00000000..e62611ab --- /dev/null +++ b/recipes/recipe_splitting_example.yml @@ -0,0 +1,60 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + - {name: prlr, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: system7c3s} + - {name: system5c3s} + Multimodel: False # single option + Reference: + - {name: era5} # multiple references for single model? + Time: + sdate: # list, split + - '1101' + - '1201' + fcst_year: '2020' # list, don't split, handled internally + hcst_start: '1993' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "global", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Calibration: + method: mse_min ## TODO: list, split? + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + Indicators: + index: no # ? + ncores: 7 + remove_NAs: yes # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/tests/execute_tests.R b/recipes/tests/old_tests/execute_tests.R similarity index 100% rename from recipes/tests/execute_tests.R rename to recipes/tests/old_tests/execute_tests.R diff --git a/recipes/tests/seasonal_testWorkflow1.yml b/recipes/tests/old_tests/seasonal_testWorkflow1.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow1.yml rename to recipes/tests/old_tests/seasonal_testWorkflow1.yml diff --git a/recipes/tests/seasonal_testWorkflow2.yml b/recipes/tests/old_tests/seasonal_testWorkflow2.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow2.yml rename to recipes/tests/old_tests/seasonal_testWorkflow2.yml diff --git a/recipes/tests/seasonal_testWorkflow3.yml b/recipes/tests/old_tests/seasonal_testWorkflow3.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow3.yml rename to recipes/tests/old_tests/seasonal_testWorkflow3.yml diff --git a/recipes/tests/seasonal_testWorkflow4.yml b/recipes/tests/old_tests/seasonal_testWorkflow4.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow4.yml rename to recipes/tests/old_tests/seasonal_testWorkflow4.yml diff --git a/recipes/tests/seasonal_testWorkflow5.yml b/recipes/tests/old_tests/seasonal_testWorkflow5.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow5.yml rename to recipes/tests/old_tests/seasonal_testWorkflow5.yml diff --git a/recipes/tests/seasonal_testWorkflow6.yml b/recipes/tests/old_tests/seasonal_testWorkflow6.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow6.yml rename to recipes/tests/old_tests/seasonal_testWorkflow6.yml diff --git a/recipes/tests/seasonal_testWorkflow7.yml b/recipes/tests/old_tests/seasonal_testWorkflow7.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow7.yml rename to recipes/tests/old_tests/seasonal_testWorkflow7.yml diff --git a/recipes/tests/seasonal_testWorkflow8.yml b/recipes/tests/old_tests/seasonal_testWorkflow8.yml similarity index 100% rename from recipes/tests/seasonal_testWorkflow8.yml rename to recipes/tests/old_tests/seasonal_testWorkflow8.yml diff --git a/recipes/tests/recipe_seasonal_two-variables.yml b/recipes/tests/recipe_seasonal_two-variables.yml new file mode 100644 index 00000000..89406ece --- /dev/null +++ b/recipes/tests/recipe_seasonal_two-variables.yml @@ -0,0 +1,60 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test Independent verification of two variables + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + - {name: prlr, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: system5c3s} + Multimodel: False # single option + Reference: + - {name: era5} # multiple references for single model? + Time: + sdate: # list, split + - '0101' + fcst_year: '2020' # list, don't split, handled internally + hcst_start: '2000' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 3 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + Calibration: + method: mse_min ## TODO: list, split? + Skill: + metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + Indicators: + index: no # ? + ncores: 7 + remove_NAs: yes # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 25536335..541319f5 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,93 +1,162 @@ -check_recipe <- function(recipe, logger) { +check_recipe <- function(recipe) { # recipe: yaml recipe already read it - # output: errors or the total number of workflow (vars x regions) to compute + ## TODO: Adapt to decadal case - info(logger, paste("Checking recipe", recipe$filename)) + info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- - TIME_SETTINGS = c('sdate','leadtimemin','leadtimemax','hcst_start','hcst_end') - PARAMS = c('Horizon','Time','Variables','Region','Regrid','Workflow','Datasets') - HORIZONS <- c('Subseasonal','Seasonal','Decadal') + TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", + "hcst_end") + TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") + PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", + "Datasets") + HORIZONS <- c("subseasonal", "seasonal", "decadal") + # Define error status variable + error_status <- F - # create output dirs: - if (!any(names(recipe) %in% "Analysis")) { - error(logger, "The recipe should contain an element called 'Analysis'.") + # Check basic elements in recipe:Analysis: + if (!("Analysis" %in% names(recipe))) { + error(recipe$Run$logger, + "The recipe must contain an element called 'Analysis'.") + error_status <- T } if (!all(PARAMS %in% names(recipe$Analysis))) { - error(logger, - paste("The element 'Analysis' in the recipe should contain these", - "elements:", paste(PARAMS, collapse = " "))) + error(recipe$Run$logger, + paste0("The element 'Analysis' in the recipe must contain all of ", + "the following: ", paste(PARAMS, collapse = ", "), ".")) + error_status <- T } - if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { - error(logger, - "The element 'Horizon' in the recipe should be one of the followings:", - paste(HORIZONS, collapse = " ")) - } - # Check temporal settings and - # count the number of verifications - if (!all(TIME_SETTINGS %in% names(recipe$Analysis$Time))) { - error(logger, - paste("The element 'Time' in the recipe should contain these elements:", - paste(TIME_SETTINGS, collapse = " "))) - } - if (is.null(recipe$Analysis$Time$sdate$fcst_year) || - recipe$Analysis$Time$sdate$fcst_year == 'None') { + if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { + error(recipe$Run$logger, + paste0("The element 'Horizon' in the recipe must be one of the ", + "following: ", paste(HORIZONS, collapse = ", "), ".")) + error_status <- T + } + # Check time settings + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SEASONAL, + collapse = ", "), ".")) + error_status <- T + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { + error(recipe$Run$logger, + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_DECADAL, + collapse = ", "), ".")) + error_status <- T + } + } + # Check ftime_min and ftime_max + if ((!(recipe$Analysis$Time$ftime_min > 0)) || + (!is.integer(recipe$Analysis$Time$ftime_min))) { + error(recipe$Run$logger, + "The element 'ftime_min' must be an integer larger than 0.") + error_status <- T + } + if ((!(recipe$Analysis$Time$ftime_max > 0)) || + (!is.integer(recipe$Analysis$Time$ftime_max))) { + error(recipe$Run$logger, + "The element 'ftime_max' must be an integer larger than 0.") + error_status <- T + } + if ((is.numeric(recipe$Analysis$Time$ftime_max)) && + (is.numeric(recipe$Analysis$Time$ftime_min))) { + if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { + error(recipe$Run$logger, + "'ftime_max' cannot be smaller than 'ftime_min'.") + error_status <- T + } + } + # Check consistency of hindcast years + if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || + (!(recipe$Analysis$Time$hcst_start > 0))) { + error(recipe$Run$logger, + "The element 'hcst_start' must be a valid year.") + error_status <- T + } + if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || + (!(recipe$Analysis$Time$hcst_end > 0))) { + error(recipe$Run$logger, + "The element 'hcst_end' must be a valid year.") + error_status <- T + } + if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { + error(recipe$Run$logger, + "'hcst_end' cannot be smaller than 'hcst_start'.") + error_status <- T + } + ## TODO: Is this needed? + if (is.null(recipe$Analysis$Time$fcst_year) || + tolower(recipe$Analysis$Time$fcst_year) == 'none') { stream <- "hindcast" - recipe$Analysis$Time$sdate$fcst_year <- 'YYYY' + # recipe$Analysis$Time$fcst_year <- 'YYYY' } else { stream <- "fcst" } - if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && - tolower(recipe$Analysis$Horizon) != "subseasonal") { - warn(logger, - paste("Only subseasonal verification allows multiple forecast days."), - "Element fcst_day in recipe set as 1.") - recipe$Analysis$Time$sdate$fcst_day <- '01' - } - if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { - error(logger, - paste("The element 'fcst_sday' in the recipe should be defined.")) - } - if (is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - error(logger, - paste("The element 'fcst_syear' in the recipe should be defined.")) + + ## TODO: To be implemented in the future + # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && + # tolower(recipe$Analysis$Horizon) != "subseasonal") { + # warn(recipe$Run$logger, + # paste("Only subseasonal verification allows multiple forecast days."), + # "Element fcst_day in recipe set as 1.") + # recipe$Analysis$Time$sdate$fcst_day <- '01' + # } + ## TODO: Delete, this parameter was deprecated + # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { + # error(recipe$Run$logger, + # paste("The element 'fcst_sday' in the recipe should be defined.")) + # } + + if (is.null(recipe$Analysis$Time$fcst_year)) { + warn(recipe$Run$logger, + paste("The element 'fcst_year' is not defined in the recipe.", + "No forecast year will be used.")) } + ## TODO: Adapt and move this inside 'if'? + # fcst.sdate <- NULL + # for (syear in recipe$Analysis$Time$fcst_year) { + # for (sday in recipe$Analysis$Time$sdate) { + # fcst.sdate <- c(fcst.sdate, + # paste0(syear, + # sprintf("%04d", as.numeric(sday)))) + # } + # } + # fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) - - fcst.sdate <- NULL - for (syear in recipe$Analysis$Time$sdate$fcst_syear) { - for (sday in recipe$Analysis$Time$sdate$fcst_sday) { - fcst.sdate <- c(fcst.sdate, - paste0(syear, - sprintf("%04d", as.numeric(sday)))) - } - } - fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { - error(logger, - "The 'Regrid' element should specified the 'method' and 'type'.") - stop("EXECUTION FAILED") + error(recipe$Run$logger, + "The 'Regrid' element must specify the 'method' and 'type'.") + error_status <- T } -# more checks + # TODO: Add Workflow checks? # ... - # calculate number of workflows to create for each variable and + # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { - error(logger, "Only 1 Horizon can be specified in the recipe") - stop("EXECUTION FAILED") - } - nvar <- length(recipe$Analysis$Variables) - if (nvar > 2) { - error(logger, - "Only two type of Variables can be listed: ECVs and Indicators.") - stop("EXECUTION FAILED") + error(recipe$Run$logger, + "Only one single Horizon can be specified in the recipe") + error_status <- T } + + ## TODO: Refine this + # nvar <- length(recipe$Analysis$Variables) + # if (nvar > 2) { + # error(recipe$Run$logger, + # "Only two type of Variables can be listed: ECVs and Indicators.") + # stop("EXECUTION FAILED") + # } + # remove NULL or None Indicators or ECVs from the recipe: if (!is.null(recipe$Analysis$Variables$Indicators) && !is.list(recipe$Analysis$Variables$Indicators)) { @@ -99,82 +168,138 @@ check_recipe <- function(recipe, logger) { recipe$Analysis$Variables <- recipe$Analysis$Variables[ -which(names(recipe$Analysis$Variables) == 'ECVs')] } + + # Region checks: + LIMITS <- c('latmin', 'latmax', 'lonmin', 'lonmax') + if (!all(LIMITS %in% names(recipe$Analysis$Region))) { + error(recipe$Run$logger, + paste0("There must be 4 elements in 'Region': ", + paste(LIMITS, collapse = ", "), ".")) + error_status <- T + } + ## TODO: Implement multiple regions + # nregions <- length(recipe$Analysis$Region) + # for (i in 1:length(recipe$Analysis$Region)) { + # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { + # limits <- paste(limits, collapse = " ") + # error(recipe$Run$logger, + # paste0("Each region defined in element 'Region' ", + # "should have 4 elements: ", + # paste(limits, collapse = ", "), ".")) + # error_status <- T + # } + # if (length(recipe$Analysis$Region) > 1) { + # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { + # error(recipe$Run$logger, + # paste("If multiple regions are requested, each region must", + # "have a 'name'".) + # # are numeric? class list mode list + # } + # --------------------------------------------------------------------- + # WORKFLOW CHECKS + # --------------------------------------------------------------------- + # Only one Calibration method allowed: - if ((is.logical(recipe$Analysis$Workflow$Calibration[[1]]) && - recipe$Analysis$Workflow$Calibration[[1]] == FALSE) || - recipe$Analysis$Workflow$Calibration[[1]] == 'None' || - is.null(recipe$Analysis$Workflow$Calibration[[1]])) { - warn(logger, - "There is no Calibration method selected, raw data verification.") - recipe$Analysis$Workflow$Calibration[[1]] <- FALSE + if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && + recipe$Analysis$Workflow$Calibration$method == FALSE) || + tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || + is.null(recipe$Analysis$Workflow$Calibration$method)) { + warn(recipe$Run$logger, + "No Calibration method was specified, raw data verification.") + recipe$Analysis$Workflow$Calibration$method <- 'raw' } else { - # remove multiple calibration methods - if (is.null(names(recipe$Analysis$Workflow$Calibration))) { - error(logger, - "The 'Calibration' element should specified at least the 'method'.") - stop("EXECUTION FAILED") + if (is.null(recipe$Analysis$Workflow$Calibration$method)) { + error(recipe$Run$logger, + "The 'Calibration' element 'method' must be specified.") + error_status <- T } - } - - if ("Region" %in% names(recipe$Analysis)) { - nregions <- length(recipe$Analysis$Region$Regional) - limits <- c('latmin', 'latmax', 'lonmin', 'lonmax') - for (i in 1:length(recipe$Analysis$Region)) { - if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { - limits <- paste(limits, collapse = " ") - error(logger, - paste("Each region defined in element 'Regional'", - "should have 4 elements:", - limits)) - stop("EXECUTION FAILED") - } - # are numeric? class list mode list + } + # Anomalies + if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + "Parameter 'compute' must be defined under 'Anomalies'.") + error_status <- T + } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { + error(recipe$Run$logger, + paste("Parameter 'Anomalies:compute' must be a logical value", + "(True/False or yes/no).")) + error_status <- T + } else if ((recipe$Analysis$Workflow$Anomalies$compute) && + (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { + error(recipe$Run$logger, + paste("If anomaly computation is requested, parameter", + "'cross_validation' must be defined under 'Anomalies', + and it must be a logical value (True/False or yes/no).")) + error_status <- T } - } else { - error(logger, - paste("'Region'", - "should be defined", - limits)) - stop("EXECUTION FAILED") } - + # Skill + if (("Skill" %in% names(recipe$Analysis$Workflow)) && + (is.null(recipe$Analysis$Workflow$Skill$metric))) { + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Skill'.") + error_status <- T + } + # Probabilities + if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, + "Parameter 'percentiles' must be defined under 'Probabilities'.") + error_status <- T + } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- T + } + } + # --------------------------------------------------------------------- # RUN CHECKS # --------------------------------------------------------------------- - RUN_FIELDS = c("Loglevel","Terminal","output_dir","code_dir") - LOG_LEVELS = c("INFO","DEBUG","WARNING","ERROR") + RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") + LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") - if (!any(names(recipe) %in% "Run")) { - error(logger, "The recipe should contain an element called 'Run'.") + if (!("Run" %in% names(recipe))) { + stop("The recipe must contain an element named 'Run'.") } if (!all(RUN_FIELDS %in% names(recipe$Run))) { - error(logger, paste0("Run should contain the fields: ", - paste(RUN_FIELDS,collapse=", "), ".")) + error(recipe$Run$logger, paste("Recipe element 'Run' must contain", + "all of the following fields:", + paste(RUN_FIELDS, collapse=", "), ".")) + error_status <- T } if (!is.character(recipe$Run$output_dir)) { - error(logger, - paste("The Run element 'output_dir' in", recipe$filename,"file ", - "should be a character string indicating the path ", - "where to save the outputs.")) + error(recipe$Run$logger, + paste("The Run element 'output_dir' in", recipe$name, "file", + "should be a character string indicating the path where", + "the outputs should be saved.")) + error_status <- T } if (!is.character(recipe$Run$code_dir)) { - error(logger, - paste("The Run element 'code_dir' in", recipe$filename,"file ", - "should be a character string indicating the path ", + error(recipe$Run$logger, + paste("The Run element 'code_dir' in", recipe$name, "file ", + "should be a character string indicating the path", "where the code is.")) + error_status <- T } if (!is.logical(recipe$Run$Terminal)) { - error(logger, - paste("The Run element 'Terminal' in", recipe$filename,"file ", - "should be a boolean value indicating wether to print or not the log", - "in the terminal.")) + error(recipe$Run$logger, + paste("The Run element 'Terminal' in", recipe$name, "file ", + "should be a boolean value indicating whether or not to", + "print the logs in the terminal.")) + error_status <- T } - if (!is.character(recipe$Run$Loglevel) || !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { + ## TODO: Review this case, since default value is allowed + if (!is.character(recipe$Run$Loglevel) || + !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { error(logger, - paste("The Run element 'Loglevel' in", recipe$filename,"file ", - "should be a character string indicating one of the levels available: ", - paste0(LOG_LEVELS,collapse='/'))) + paste("The Run element 'Loglevel' in", recipe$name, "file", + "should be a character string specifying one of the levels available:", + paste0(LOG_LEVELS, collapse='/'))) + error_status <- T } # --------------------------------------------------------------------- @@ -182,10 +307,19 @@ check_recipe <- function(recipe, logger) { # --------------------------------------------------------------------- # Check workflow: need to define restrictions? # e.g. only one calibration method - nverifications <- check_number_of_dependent_verifications(recipe) - info(logger, paste("Start Dates", paste(fcst.sdate, collapse = " "))) - info(logger, "Recipe checked succsessfully.") - return(append(nverifications, fcst.sdate)) + ## TODO: Implement number of dependent verifications + #nverifications <- check_number_of_dependent_verifications(recipe) + # info(recipe$Run$logger, paste("Start Dates:", + # paste(fcst.sdate, collapse = " "))) + + # Return error if any check has failed + if (error_status) { + error(recipe$Run$logger, "RECIPE CHECK FAILED.") + stop("The recipe contains some errors. The full list is in the logs.") + } else { + info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") + # return(append(nverifications, fcst.sdate)) + } } check_number_of_dependent_verifications <- function(recipe) { diff --git a/tools/data_summary.R b/tools/data_summary.R index 8dcc9910..f437d431 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -7,15 +7,15 @@ data_summary <- function(data_cube, recipe) { # Get name, leadtime months and date range object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { - date_format <- '%b %Y' + date_format <- "%b %Y" } else if (recipe$Analysis$Variables$freq == "daily_mean") { - date_format <- '%b %d %Y' + date_format <- "%b %d %Y" } - months <- unique(format(as.Date(data_cube$Dates[[1]]), format = '%B')) - months <- paste(as.character(months), collapse=", ") + months <- unique(format(as.Date(data_cube$Dates[[1]]), format = "%B")) + months <- paste(as.character(months), collapse = ", ") sdate_min <- format(min(as.Date(data_cube$Dates[[1]])), format = date_format) sdate_max <- format(max(as.Date(data_cube$Dates[[1]])), format = date_format) - + # Create log instance and sink output to logfile and terminal info(recipe$Run$logger, "DATA SUMMARY:") info(recipe$Run$logger, paste(object_name, "months:", months)) @@ -35,4 +35,3 @@ data_summary <- function(data_cube, recipe) { } info(recipe$Run$logger, "---------------------------------------------") } - diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index dafc8704..18962f93 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -1,11 +1,12 @@ -# recipe: the content of the recipe -# verifications: the output from check_recipe -# folder: the name of the output folder for this run -# logger: the log file obtain from prepare_outputs -divide_recipe <- function(recipe, verifications, folder, logger) { - info(logger, "Spliting recipe in single verifications.") +# recipe: the recipe as returned by prepare_outputs() +divide_recipe <- function(recipe) { + + ## TODO: Implement dependent vs independent verifications? + info(recipe$Run$logger, "Spliting recipe in single verifications.") beta_recipe <- list(Description = append(recipe$Description, - "split version"), + list(Origin = paste("Atomic recipe,", + "split from:", + recipe$name))), Analysis = list(Horizon = recipe$Analysis$Horizon, Variables = NULL, Datasets = NULL, @@ -14,38 +15,38 @@ divide_recipe <- function(recipe, verifications, folder, logger) { Regrid = recipe$Analysis$Regrid, Workflow = recipe$Analysis$Workflow, Output_format = - recipe$Analysis$Output_format), - Run = recipe$Run) - # duplicate recipe by Variables considering dep and indep: - all_recipes <- list(beta_recipe) - i <- 1 # to get track of the recipe number - for (indep in verifications$independent) { - all_recipes[[i]]$Analysis$Variables <- indep - i = i + 1 - all_recipes <- append(all_recipes, list(beta_recipe)) + recipe$Analysis$Output_format), + Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", + "code_dir", "logfile")]) + + # duplicate recipe by independent variables: + all_recipes <- rep(list(beta_recipe), length(recipe$Analysis$Variables)) + for (var in 1:length(recipe$Analysis$Variables)) { + all_recipes[[var]]$Analysis$Variables <- recipe$Analysis$Variables[[var]] } - for (dep in verifications$dependent) { - all_recipes[[i]]$Analysis$Variables <- dep - i = i + 1 - all_recipes <- append(all_recipes, list(beta_recipe)) - } - all_recipes <- all_recipes[-length(all_recipes)] + # for (dep in verifications$dependent) { + # all_recipes[[i]]$Analysis$Variables <- dep + # i = i + 1 + # all_recipes <- append(all_recipes, list(beta_recipe)) + # } + # all_recipes <- all_recipes[-length(all_recipes)] # wth does this do + # duplicate recipe by Datasets: # check Systems if (recipe$Analysis$Datasets$Multimodel) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- list( - System = recipe$Analysis$Datasets$System, - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System, + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } } else { for (sys in 1:length(recipe$Analysis$Datasets$System)) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Datasets <- list( - System = recipe$Analysis$Datasets$System[[sys]], - Multimodel = recipe$Analysis$Datasets$Multimodel, - Reference = NULL) + all_recipes[[reci]]$Analysis$Datasets <- + list(System = recipe$Analysis$Datasets$System[[sys]], + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) } if (sys == 1) { recipes <- all_recipes @@ -72,28 +73,28 @@ divide_recipe <- function(recipe, verifications, folder, logger) { # Duplicate recipe by Region recipes <- list() for (reg in 1:length(recipe$Analysis$Region)) { - if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? + # if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Region <- - recipe$Analysis$Region[[reg]] + all_recipes[[reci]]$Analysis$Region <- recipe$Analysis$Region[[reg]] } recipes <- append(recipes, all_recipes) - } + # } } all_recipes <- recipes rm(list = 'recipes') + # Duplicate recipe by start date if (tolower(recipe$Analysis$Horizon) == 'seasonal') { - for (sday in 1:length(recipe$Analysis$Time$sdate$fcst_sday)) { + for (sdate in 1:length(recipe$Analysis$Time$sdate)) { for (reci in 1:length(all_recipes)) { - all_recipes[[reci]]$Analysis$Time <- list(sdate = list( - fcst_syear = recipe$Analysis$Time$sdate$fcst_syear, - fcst_sday = recipe$Analysis$Time$sdate$fcst_sday[[sday]]), - hcst_start = recipe$Analysis$Time$hcst_start, - hcst_end = recipe$Analysis$Time$hcst_end, - leadtimemin = recipe$Analysis$Time$leadtimemin, - leadtimemax = recipe$Analysis$Time$leadtimemax) + all_recipes[[reci]]$Analysis$Time <- + list(sdate = recipe$Analysis$Time$sdate[[sdate]], + fcst_year = recipe$Analysis$Time$fcst_year, + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + ftime_min = recipe$Analysis$Time$ftime_min, + ftime_max = recipe$Analysis$Time$ftime_max) } - if (sday == 1) { + if (sdate == 1) { recipes <- all_recipes } else { recipes <- append(recipes, all_recipes) @@ -102,12 +103,24 @@ divide_recipe <- function(recipe, verifications, folder, logger) { all_recipes <- recipes rm(list = 'recipes') } # Rest of horizons - # Finally, save all recipes in saparated yaml files + # Save all recipes in separate YAML files + ## TODO: Re-add recipe$Run$logger for (reci in 1:length(all_recipes)) { + if (reci < 10) { + recipe_number <- paste0("0", reci) + } else { + recipe_number <- reci + } write_yaml(all_recipes[[reci]], - paste0(folder, "/logs/recipes/recipe_", reci, ".yml")) + paste0(recipe$Run$output_dir, "/logs/recipes/recipe_", + recipe_number, ".yml")) + all_recipes[[reci]]$Run$logger <- recipe$Run$logger } - text <- paste0("See folder ",folder,"/logs/recipes/ to see the individual recipes.") - info(logger, text) + info(recipe$Run$logger, + paste("The main recipe has been divided into", length(all_recipes), + "atomic recipes.")) + text <- paste0("See output directory ", recipe$Run$output_dir, + "/logs/recipes/ to see all the individual atomic recipes.") + info(recipe$Run$logger, text) return(all_recipes) } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index a89e5e7b..1972aef0 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -1,12 +1,12 @@ #'Read recipe YAML file and create and store logfile info #' #'The purpose of this function is to read the recipe configuration for Auto-S2S -#'workflows and create logfiles stores in an the output directory specified in +#'workflows and create logfiles stored in an the output directory specified in #'the recipe. It returns an object of class logger that stores information on #'the recipe configuration and errors. #' #'@param recipe_file path to a YAML file with Auto-S2S configuration recipe -#' +#'@param disable_checks whether to disable the recipe checks #'@return list contaning recipe with logger, log file name and log dir name #' #'@import log4r @@ -20,10 +20,11 @@ #' #'@export -prepare_outputs <- function(recipe_file) { +prepare_outputs <- function(recipe_file, + disable_checks = FALSE) { -# recipe: the content of the readed recipe -# file: the recipe file name +# recipe_file: path to recipe YAML file +# disable_checks: If TRUE, does not perform checks on recipe recipe <- read_yaml(recipe_file) recipe$recipe_path <- recipe_file @@ -33,32 +34,30 @@ prepare_outputs <- function(recipe_file) { # Create output folders: folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) - print("Saving all outputs to:") print(output_dir) print(folder_name) - dir.create(file.path(output_dir, folder_name, 'outputs'), recursive = TRUE) dir.create(file.path(output_dir, folder_name, 'logs')) dir.create(file.path(output_dir, folder_name, 'logs', 'recipes')) - + # Copy recipe to output folder file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', 'recipes')) - + # Create log output file logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') file.create(logfile) - # Set default behaviour of log output file: + # Set default behaviour of logger if (is.null(recipe$Run)) { recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) } if (is.null(recipe$Run$Loglevel)) { recipe$Run$Loglevel <- 'INFO' } - if (!is.logical(recipe$Run$Terminal)) { recipe$Run$Terminal <- TRUE } + # logger set-up if (recipe$Run$Terminal) { logger <- log4r::logger(threshold = recipe$Run$Loglevel, appenders = list(console_appender(layout = default_log_layout()), @@ -69,13 +68,15 @@ prepare_outputs <- function(recipe_file) { appenders = list(file_appender(logfile, append = TRUE, layout = default_log_layout()))) } - recipe$Run$output_dir <- file.path(output_dir, folder_name) recipe$Run$logger <- logger recipe$Run$logfile <- logfile - - info(recipe$Run$logger, - "##### LOGGER SET UP AND OUTPUT DIRECTORY PREPARED #####") - + # Run recipe checker + if (disable_checks) { + warn(recipe$Run$logger, + "Recipe checks disabled. The recipe will not be checked for errors.") + } else { + check_recipe(recipe) + } return(recipe) } -- GitLab From c5e1839a5e200b6161d8a8ca295f60dd1fed4dbe Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 31 Jan 2023 15:12:40 +0100 Subject: [PATCH 72/80] Correct seasonal test --- modules/test_seasonal.R | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b22eb070..b8541488 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -5,30 +5,21 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -<<<<<<< HEAD recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" recipe <- prepare_outputs(recipe_file) -======= -recipe_file <- "recipes/tests/recipe_seasonal_two-variables.yml" -recipe <- prepare_outputs(recipe_file, disable_checks = T) -atomic_recipes <- divide_recipe(recipe) -## archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive ->>>>>>> e9b1f4f4a5c3de69fa938c01fe827a5789613247 -for (atomic_recipe in atomic_recipes) { - # Load datasets - data <- load_datasets(atomic_recipe) - # Calibrate datasets - calibrated_data <- calibrate_datasets(atomic_recipe, data) - # Compute anomalies - calibrated_data <- compute_anomalies(atomic_recipe, calibrated_data) - # Compute skill metrics - skill_metrics <- compute_skill_metrics(atomic_recipe, calibrated_data) - # Compute percentiles and probability bins - probabilities <- compute_probabilities(atomic_recipe, calibrated_data) - # Export all data to netCDF - save_data(atomic_recipe, calibrated_data, skill_metrics, probabilities) - # Plot data - plot_data(atomic_recipe, calibrated_data, skill_metrics, probabilities, - significance = T) -} +# Load datasets +data <- load_datasets(recipe) +# Calibrate datasets +calibrated_data <- calibrate_datasets(recipe, data) +# Compute anomalies +calibrated_data <- compute_anomalies(recipe, calibrated_data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(recipe, calibrated_data) +# Export all data to netCDF +save_data(recipe, calibrated_data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, calibrated_data, skill_metrics, probabilities, + significance = T) -- GitLab From b597dcd4f742e136fab2684bb14020011ed27549 Mon Sep 17 00:00:00 2001 From: allabres Date: Fri, 3 Feb 2023 12:41:02 +0100 Subject: [PATCH 73/80] Update archive.yml (variables, like tdps, added) --- conf/archive.yml | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 9d994ee5..e6567416 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,5 +1,3 @@ - - archive: src: "/esarchive/" System: @@ -9,17 +7,23 @@ archive: src: "exp/ecmwf/system5c3s/" daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/"} + "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", + "tdps":"_f6h/", "hurs":"_f6h/"} monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/"} + "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", + "tdps":"_f6h/"} nmember: fcst: 51 hcst: 25 calendar: "proleptic_gregorian" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: - name: "Méteo-France System 7" + name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system7c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", @@ -106,17 +110,28 @@ archive: daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/", + "g300":"_f1h-r1440x721cds/", "g500":"_f1h-r1440x721cds/", + "g850":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/"} + "tasmin":"_f1h-r1440x721cds/", + "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", + "ta850":"_f1h-r1440x721cds/", + "hurs":"_f1h-r1440x721cds/"} monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "g300":"_f1h-r1440x721cds/", "g500":"_f1h-r1440x721cds/", + "g850":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", - "tasmin":"_f1h-r1440x721cds/"} + "tasmin":"_f1h-r1440x721cds/", + "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", + "ta850":"_f1h-r1440x721cds/"} calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: @@ -127,7 +142,8 @@ archive: "prlr":"_f1h/", "sfcWind":"_f1h/"} monthly_mean: {"tas":"_f1h/","tasmin":"_f24h/", "tasmax":"_f24h/", "prlr":"_f1h/", - "sfcWind":"_f1h/", "rsds":"_f1h/"} + "sfcWind":"_f1h/", "rsds":"_f1h/", + "tdps":"_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" uerra: -- GitLab From 7bcb4a65a5ba612ab049a40998921580fe2d93da Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Feb 2023 13:22:40 +0100 Subject: [PATCH 74/80] Add system and reference name checks --- tools/check_recipe.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 541319f5..0f268733 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -15,6 +15,9 @@ check_recipe <- function(recipe) { PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", "Datasets") HORIZONS <- c("subseasonal", "seasonal", "decadal") + ARCHIVE_SEASONAL <- "conf/archive.yml" + ARCHIVE_DECADAL <- "conf/archive_decadal.yml" + # Define error status variable error_status <- F @@ -40,6 +43,7 @@ check_recipe <- function(recipe) { } # Check time settings if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- read_yaml(ARCHIVE_SEASONAL)$archive if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, paste0("The element 'Time' in the recipe must contain all of the ", @@ -48,6 +52,7 @@ check_recipe <- function(recipe) { error_status <- T } } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(ARCHIVE_DECADAL)$archive if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, paste0("The element 'Time' in the recipe must contain all of the ", @@ -56,6 +61,19 @@ check_recipe <- function(recipe) { error_status <- T } } + # Check system names + if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { + error(recipe$Run$logger, + "The specified System name was not found in the archive.") + error_status <- T + } + # Check reference names + if (!all(recipe$Analysis$Datasets$Reference$name %in% + names(archive$Reference))) { + error(recipe$Run$logger, + "The specified Reference name was not found in the archive.") + error_status <- T + } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { -- GitLab From e9261d8cb11c445f5fa5c5f96ae9987fb38b8361 Mon Sep 17 00:00:00 2001 From: allabres Date: Wed, 15 Feb 2023 10:26:42 +0100 Subject: [PATCH 75/80] Update variable-dictionary.yml --- conf/variable-dictionary.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 51252154..8631a318 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -1,4 +1,3 @@ - vars: ## NOTE: The units field in this file corresponds to CMOR standards. ## Some variables in esarchive may have different units than stated here. @@ -31,6 +30,11 @@ vars: long_name: "Surface Temperature" standard_name: "surface_temperature" accum: no + tdps: + units: "K" + long_name: "2 metre dewpoint temperature" + standard_name: + accum: no sfcWind: units: "m s-1" long_name: "Near-Surface Wind Speed" -- GitLab From 684d3ba205c0eccc37a5fab3381c3a8300b07fa8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 16 Feb 2023 10:31:25 +0100 Subject: [PATCH 76/80] Add garbage collection to data_summary --- tools/data_summary.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/data_summary.R b/tools/data_summary.R index f437d431..62956877 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -34,4 +34,5 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, i) } info(recipe$Run$logger, "---------------------------------------------") + gc() } -- GitLab From 7a3b33e07902229ff562537eefef6b01d107c4ff Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 17 Feb 2023 09:29:31 +0100 Subject: [PATCH 77/80] Add 'system' to S2S4E output format --- modules/Saving/paths2save.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 2d5a0a4e..5cb3326f 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -75,17 +75,19 @@ get_dir <- function(recipe, agg = "global") { } else { # Default generic output format based on FOCUS + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + # Get startdate or hindcast period if (!is.null(recipe$Analysis$Time$fcst_year)) { if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate + # decadal doesn't have sdate fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') } else { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) + recipe$Analysis$Time$sdate) } } else { if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate + # decadal doesn't have sdate fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end, sep = '_')) @@ -96,15 +98,14 @@ get_dir <- function(recipe, agg = "global") { calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) store.freq <- recipe$Analysis$Variables$freq - + ## TODO: Change "_country" switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, + "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, "/", - fcst.sdate, "/")}) + "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, "/", + fcst.sdate, "/")}) } return(dir) - } -- GitLab From b7a1cf77abf9241ca7c12a61a9bcc6f4e776830a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 17 Feb 2023 11:25:46 +0100 Subject: [PATCH 78/80] Make garbage collection invisible --- tools/data_summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 62956877..d8f2b1b6 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -34,5 +34,5 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, i) } info(recipe$Run$logger, "---------------------------------------------") - gc() + invisible(gc()) } -- GitLab From 5df801fadaadca22e1a4e8273d3b2a6caa8be662 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 22 Feb 2023 17:00:26 +0100 Subject: [PATCH 79/80] Allow tolerance in grid consistency check --- modules/Loading/Loading.R | 4 ++-- modules/Loading/Loading_decadal.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 53d41cc2..a6b185d2 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -291,7 +291,7 @@ load_datasets <- function(recipe) { # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -304,7 +304,7 @@ load_datasets <- function(recipe) { info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } - if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 0a95d9a4..e3677e1d 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -387,7 +387,7 @@ load_datasets <- function(recipe) { # lat and lon attributes if (!(recipe$Analysis$Regrid$type == 'none')) { - if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") @@ -401,7 +401,7 @@ load_datasets <- function(recipe) { stop("hcst and obs don't share the same latitudes.") } - if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", "Please check the original grids and the", "regrid parameters in your recipe.") -- GitLab From b248fb1cb1742d96856723a374b7260d334ad5b9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 23 Feb 2023 15:57:42 +0100 Subject: [PATCH 80/80] Change seasonal system ID names from /esarchive conventions to official names --- conf/archive.yml | 41 +++++++++++++------ conf/grid_description/griddes_system51c3s.txt | 17 ++++++++ conf/output_dictionaries/scorecards.yml | 21 +++++----- modules/Loading/Loading.R | 3 +- .../testing_recipes/recipe_seasonal-tests.yml | 8 ++-- .../recipe_system5c3s-rsds.yml | 4 +- .../testing_recipes/recipe_system5c3s-tas.yml | 4 +- .../recipe_system7c3s-prlr.yml | 4 +- .../testing_recipes/recipe_system7c3s-tas.yml | 4 +- .../recipe_tas-daily-regrid-to-reference.yml | 4 +- .../recipe_tas-daily-regrid-to-system.yml | 4 +- .../recipe_test-new-metrics.yml | 4 +- .../testing_recipes/recipe_test_anomalies.yml | 4 +- .../testing_recipes/recipe_testing_nadia.yml | 4 +- modules/Saving/paths2save.R | 13 +++--- recipes/recipe_splitting_example.yml | 6 +-- recipes/seasonal_oper.yml | 4 +- .../tests/recipe_seasonal_two-variables.yml | 4 +- tests/recipes/recipe-seasonal_daily_1.yml | 4 +- tests/recipes/recipe-seasonal_monthly_1.yml | 4 +- tools/check_recipe.R | 1 + 21 files changed, 97 insertions(+), 65 deletions(-) create mode 100644 conf/grid_description/griddes_system51c3s.txt diff --git a/conf/archive.yml b/conf/archive.yml index 0251beaf..decc387c 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -3,7 +3,7 @@ archive: src: "/esarchive/" System: - system5c3s: + ECMWF-SEAS5: name: "ECMWF SEAS5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" @@ -21,7 +21,24 @@ archive: calendar: "proleptic_gregorian" time_stamp_lag: "0" reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" - system7c3s: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5 (v5.1)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system51c3s/" + daily_mean: {"tas":"_f6h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", + "uas":"_f6h/", "vas":"_f6h/", "psl":"_f6h/", + "tdps":"_f6h/"} + monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", + "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", + "uas":"_f6h/", "vas":"_f6h/", "psl":"_f6h/", + "tdps":"_f6h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_system51c3s.txt" + Meteo-France-System7: name: "Meteo-France System 7" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/meteofrance/system7c3s/" @@ -34,7 +51,7 @@ archive: time_stamp_lag: "+1" calendar: "proleptic_gregorian" reference_grid: "conf/grid_description/griddes_system7c3s.txt" - system21_m1: + DWD-GCFS2.1: name: "DWD GCFS 2.1" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/dwd/system21_m1/" @@ -47,7 +64,7 @@ archive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system21_m1.txt" - system35c3s: + CMCC-SPS3.5: name: "CMCC-SPS3.5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/cmcc/system35c3s/" @@ -60,7 +77,7 @@ archive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system35c3s.txt" - system2c3s: + JMA-CPS2: name: "JMA System 2" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/jma/system2c3s/" @@ -72,7 +89,7 @@ archive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" - eccc1: + ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/eccc/eccc1/" @@ -84,8 +101,8 @@ archive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" - glosea6_system600-c3s: - name: "UKMO GloSea 6 6.0" + UK-MetOffice-Glosea600: + name: "UK MetOffice GloSea 6 (v6.0)" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ukmo/glosea6_system600-c3s/" monthly_mean: {"tas":"_f6h/", "tasmin":"_f24h/", @@ -96,7 +113,7 @@ archive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" - ncep-cfsv2: + NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? src: "exp/ncep/cfs-v2/" @@ -109,7 +126,7 @@ archive: time_stamp_lag: "0" reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: - era5: + ERA5: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" @@ -127,7 +144,7 @@ archive: "ta500":"_f1h-r1440x721cds/", "ta850":"_f1h-r1440x721cds/"} calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" - era5land: + ERA5-Land: name: "ERA5-Land" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5land/" @@ -138,7 +155,7 @@ archive: "sfcWind":"_f1h/", "rsds":"_f1h/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" - uerra: + UERRA: name: "ECMWF UERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/uerra_mescan/" diff --git a/conf/grid_description/griddes_system51c3s.txt b/conf/grid_description/griddes_system51c3s.txt new file mode 100644 index 00000000..9610e9ef --- /dev/null +++ b/conf/grid_description/griddes_system51c3s.txt @@ -0,0 +1,17 @@ +# +# Grid description file for ECMWF SEAS5 v5.1 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/output_dictionaries/scorecards.yml b/conf/output_dictionaries/scorecards.yml index c5071987..fa92042c 100644 --- a/conf/output_dictionaries/scorecards.yml +++ b/conf/output_dictionaries/scorecards.yml @@ -1,26 +1,29 @@ System: - system5c3s: + ECMWF-SEAS5: short_name: "ecmwfs5" display_name: "ECMWF System 5" - system7c3s: + ECMWF-SEAS5.1: + short_name: "ecmwfs51" + display_name: "ECMWF System 5.1" + Meteo-France-System7: short_name: "meteofrances7" display_name: "Meteo-France System 7" - system21_m1: + DWD-GCFS2.1: short_name: "dwds21" display_name: "DWD System 21" - system35c3s: + CMCC-SPS3.5: short_name: "cmccs35" display_name: "CMCC System 35" - system2c3s: + JMA-CPS2: short_name: "jmas2" display_name: "JMA System 2" - eccc1: + ECCC-CanCM4i: short_name: "ecccs1" display_name: "ECCC System 1" - glosea6_system600-c3s: + UK-MetOffice-Glosea600: short_name: "ukmos600" display_name: "UK Met Office System 600" - ncep-cfsv2: + NCEP-CFSv2: short_name: "nceps2" display_name: "NCEP System 2" Reference: @@ -33,5 +36,3 @@ Reference: uerra: short_name: "uerra_mescan" display_name: "UERRA MESCAN" - - diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index a6b185d2..598ddd0e 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -19,7 +19,6 @@ load_datasets <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name store.freq <- recipe$Analysis$Variables$freq @@ -55,7 +54,7 @@ load_datasets <- function(recipe) { reference_descrip <- archive$Reference[[ref.name]] freq.obs <- unlist(reference_descrip[[store.freq]][variable]) obs.dir <- reference_descrip$src - fcst.dir <- exp_descrip$src + fcst.dir <- exp_descrip$src hcst.dir <- exp_descrip$src fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index 4e8128e7..cda98c91 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -8,17 +8,17 @@ Analysis: freq: monthly_mean Datasets: System: - name: system21_m1 + name: ECMWF-SEAS5.1 Multimodel: False Reference: - name: era5 + name: ERA5 Time: - sdate: '1101' + sdate: '0101' fcst_year: hcst_start: '2000' hcst_end: '2015' ftime_min: 1 - ftime_max: 6 + ftime_max: 2 Region: latmin: 30 latmax: 50 diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml index 94fc716c..0cb2d29f 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 3a2bc72e..31ae079d 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: no Reference: - name: era5 + name: ERA5 Time: sdate: '0601' fcst_year: '2020' diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml index 23b630b5..58030bf3 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml index df82c349..c8d3b5e8 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml index 364d3dd6..b14c90e1 100644 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml @@ -9,10 +9,10 @@ Analysis: freq: daily_mean # Mandatory, str: either monthly_mean or daily_mean Datasets: System: - name: system5c3s # Mandatory, str: System codename. See docu. + name: ECMWF-SEAS5 # Mandatory, str: System codename. See docu. Multimodel: no # Mandatory, bool: Either yes/true or no/false Reference: - name: era5 # Mandatory, str: Reference codename. See docu. + name: ERA5 # Mandatory, str: Reference codename. See docu. Time: sdate: '1101' fcst_year: '2020' # Optional, int: Forecast year 'YYYY' diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml index 244a5654..5c899f97 100644 --- a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml @@ -8,10 +8,10 @@ Analysis: freq: daily_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: no Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' diff --git a/modules/Loading/testing_recipes/recipe_test-new-metrics.yml b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml index df84138d..b5745292 100644 --- a/modules/Loading/testing_recipes/recipe_test-new-metrics.yml +++ b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' diff --git a/modules/Loading/testing_recipes/recipe_test_anomalies.yml b/modules/Loading/testing_recipes/recipe_test_anomalies.yml index cdf5e3ca..287f9a98 100644 --- a/modules/Loading/testing_recipes/recipe_test_anomalies.yml +++ b/modules/Loading/testing_recipes/recipe_test_anomalies.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' diff --git a/modules/Loading/testing_recipes/recipe_testing_nadia.yml b/modules/Loading/testing_recipes/recipe_testing_nadia.yml index 60711981..e6b2bc02 100644 --- a/modules/Loading/testing_recipes/recipe_testing_nadia.yml +++ b/modules/Loading/testing_recipes/recipe_testing_nadia.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 5cb3326f..93196b86 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -19,12 +19,13 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "country" = {gg <- "-country"}, "global" = {gg <- ""}) + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { # Define output dir name accordint to Scorecards format dict <- read_yaml("conf/output_dictionaries/scorecards.yml") # Get necessary names - system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - reference <- dict$Reference[[recipe$Analysis$Datasets$Reference$name]]$short_name hcst_start <- recipe$Analysis$Time$hcst_start hcst_end <- recipe$Analysis$Time$hcst_end @@ -51,9 +52,7 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "probs" = {file <- paste0(var, gg, "-probs_", date)}, "bias" = {file <- paste0(var, gg, "-bias_", date)}) } - return(paste0(dir, file, ".nc")) - } get_dir <- function(recipe, agg = "global") { @@ -62,20 +61,18 @@ get_dir <- function(recipe, agg = "global") { # startdate, and aggregation. ## TODO: Get aggregation from recipe - outdir <- paste0(recipe$Run$output_dir, "/outputs/") ## TODO: multivar case variable <- recipe$Analysis$Variables$name + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) if (tolower(recipe$Analysis$Output_format) == 'scorecards') { # Define output dir name accordint to Scorecards format dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name dir <- paste0(outdir, "/", system, "/", variable, "/") - } else { # Default generic output format based on FOCUS - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) # Get startdate or hindcast period if (!is.null(recipe$Analysis$Time$fcst_year)) { if (tolower(recipe$Analysis$Horizon) == 'decadal') { diff --git a/recipes/recipe_splitting_example.yml b/recipes/recipe_splitting_example.yml index e62611ab..78a4d18c 100644 --- a/recipes/recipe_splitting_example.yml +++ b/recipes/recipe_splitting_example.yml @@ -17,11 +17,11 @@ Analysis: - {name: prlr, freq: monthly_mean} Datasets: System: # multiple systems for single model, split if Multimodel = F - - {name: system7c3s} - - {name: system5c3s} + - {name: Meteo-France-System7} + - {name: ECMWF-SEAS5} Multimodel: False # single option Reference: - - {name: era5} # multiple references for single model? + - {name: ERA5} # multiple references for single model? Time: sdate: # list, split - '1101' diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml index 5e5f61fc..a1351e37 100644 --- a/recipes/seasonal_oper.yml +++ b/recipes/seasonal_oper.yml @@ -26,10 +26,10 @@ Analysis: - None Datasets: System: - - name: system5c3s # list of strs + - name: ECMWF-SEAS5 # list of strs Multimodel: False # boolean, if true system above are aggregated into single multi-model Reference: # single dict? in the future multiple ref can be an asset - - {name: era5} # str + - {name: ERA5} # str Time: sdate: fcst_syear: ["2017"] # list of ints or None (case where only hcst is verfied) diff --git a/recipes/tests/recipe_seasonal_two-variables.yml b/recipes/tests/recipe_seasonal_two-variables.yml index 89406ece..0cef06b3 100644 --- a/recipes/tests/recipe_seasonal_two-variables.yml +++ b/recipes/tests/recipe_seasonal_two-variables.yml @@ -17,10 +17,10 @@ Analysis: - {name: prlr, freq: monthly_mean} Datasets: System: # multiple systems for single model, split if Multimodel = F - - {name: system5c3s} + - {name: ECMWF-SEAS5} Multimodel: False # single option Reference: - - {name: era5} # multiple references for single model? + - {name: ERA55} # multiple references for single model? Time: sdate: # list, split - '0101' diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index 52c7c0b8..afa0f496 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -8,10 +8,10 @@ Analysis: freq: daily_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1201' fcst_year: diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 00331332..68c58f83 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -8,10 +8,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system7c3s + name: Meteo-France-System7 Multimodel: False Reference: - name: era5 + name: ERA5 Time: sdate: '1101' fcst_year: '2020' diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 0f268733..559d0492 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -43,6 +43,7 @@ check_recipe <- function(recipe) { } # Check time settings if (tolower(recipe$Analysis$Horizon) == "seasonal") { + ## TODO: Specify filesystem archive <- read_yaml(ARCHIVE_SEASONAL)$archive if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, -- GitLab