From e78cfbe2c79df84bc9d38c08d9b17d43c6d2161f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 22 Jun 2022 11:36:49 +0200 Subject: [PATCH 01/68] Remove singleton dimensions after metric computation, Rename Save_data.R --- modules/Loading/testing_recipes/recipe_4.yml | 2 +- modules/Saving/{Save_data.R => Saving.R} | 0 modules/Skill/Skill.R | 24 ++++++++++++++++++++ modules/test_victoria.R | 2 +- 4 files changed, 26 insertions(+), 2 deletions(-) rename modules/Saving/{Save_data.R => Saving.R} (100%) diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index e787a9fd..6786bd3d 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -32,7 +32,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPS FRPS RPSS FRPSS BSS10 BSS90 + metric: RPS RPSS FRPSS BSS10 BSS90 Indicators: index: no Output_format: S2S4E diff --git a/modules/Saving/Save_data.R b/modules/Saving/Saving.R similarity index 100% rename from modules/Saving/Save_data.R rename to modules/Saving/Saving.R diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 5fb66b2c..6da63816 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -66,12 +66,15 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { if (metric %in% c('rps', 'frps')) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- list(skill) # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill$rpss <- .drop_dims(skill$rpss) + skill$sign <- .drop_dims(skill$sign) skill_metrics[[ metric ]] <- list(skill$rpss) skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) @@ -79,6 +82,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } else if (metric == 'bss10') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, ncores = ncores) + skill$rpss <- .drop_dims(skill$rpss) + skill$sign <- .drop_dims(skill$sign) skill_metrics[[ metric ]] <- list(skill$rpss) skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) @@ -86,6 +91,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } else if (metric == 'bss90') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, ncores = ncores) + skill$rpss <- .drop_dims(skill$rpss) + skill$sign <- .drop_dims(skill$sign) skill_metrics[[ metric ]] <- list(skill$rpss) skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) @@ -98,6 +105,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { method = 'pearson', memb_dim = memb_dim, ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) skill_metrics[[ metric ]] <- list(skill$corr) skill_metrics[[ paste0(metric, "_p.value") ]] <- list(skill$p.val) skill_metrics[[ paste0(metric, "_conf.low") ]] <- list(skill$conf.lower) @@ -123,3 +132,18 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { return(skill_metrics) } + +.drop_dims <- function(metric_array) { + # Drop all singleton dimensions except for 'ensemble' + metric_array <- drop(metric_array) + # If array has memb_exp (EnsCorr case), change name to 'ensemble' + # Otherwise, add ensemble dim + if ("exp_memb" %in% names(dim(metric_array))) { + names(dim(metric_array))[which(names(dim(metric_array)) == + "exp_memb")] <- "ensemble" + } else { + dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) + } + + return(metric_array) +} diff --git a/modules/test_victoria.R b/modules/test_victoria.R index a8125b16..512fac3a 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -5,7 +5,7 @@ recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") # Load datasets data <- load_datasets(recipe_file) -- GitLab From 7dbd77579879477b124df91ada968fc3cbb99fb7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 22 Jun 2022 14:31:54 +0200 Subject: [PATCH 02/68] Export daily mean skill metrics to netCDF --- modules/Loading/testing_recipes/recipe_3.yml | 6 +++--- modules/test_victoria.R | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml index 4488d43d..1f47545f 100644 --- a/modules/Loading/testing_recipes/recipe_3.yml +++ b/modules/Loading/testing_recipes/recipe_3.yml @@ -17,9 +17,9 @@ Analysis: fcst_syear: '2020' fcst_sday: '1101' hcst_start: '1993' - hcst_end: '2016' + hcst_end: '2003' leadtimemin: 0 - leadtimemax: 0 + leadtimemax: 1 Region: latmin: -10 latmax: 10 @@ -32,7 +32,7 @@ Analysis: Calibration: method: qmap Skill: - metric: RPSS + metric: FRPS RPSS Indicators: index: no Output_format: S2S4E diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 512fac3a..c547c3fc 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,6 +1,6 @@ -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") -- GitLab From f775e6f6fd128889776a5a728570f69e2fdd1be5 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 22 Jun 2022 14:32:59 +0200 Subject: [PATCH 03/68] Include option for daily data in saving module --- modules/Saving/Saving.R | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 66f25baa..315e83fc 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,19 +1,25 @@ ## TODO: Implement wrapper to get grid and time info? -get_times <- function(fcst.horizon, leadtimes, sdate) { +get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { # Generates time dimensions and the corresponding metadata. - - switch(fcst.horizon, + + if (store.freq == "monthly_mean") { + switch(fcst.horizon, ## TODO: Remove "sub_obs"? ## TODO: implement daily case - "seasonal" = {len <- length(leadtimes); ref <- 'months since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "sub_obs" = {len <- 52; ref <- 'week of the year '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'weeks since '; - stdname <- ''} - ) - + "seasonal" = {len <- length(leadtimes); ref <- 'months since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "sub_obs" = {len <- 52; ref <- 'week of the year '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'weeks since '; + stdname <- ''}) + + } else if (store.freq == "daily_mean") { + switch(fcst.horizon, + "seasonal" = {len <- length(leadtimes); ref <- 'days since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + } + ## TODO: Get correct date for each time step time <- 1:len ## Is this correct? I think it needs to be changed dim(time) <- length(time) @@ -74,7 +80,6 @@ save_metrics <- function(skill, recipe, data_cube, outfile, - leadtimes, agg="global") { # Define grid dimensions and names -- GitLab From 8574336fabdc8f2e634cf8a541aa0009b35ef7fb Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 27 Jun 2022 10:55:08 +0200 Subject: [PATCH 04/68] Correct time indices for daily case, change time unit format --- modules/Saving/Saving.R | 42 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 315e83fc..d78a6b90 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -2,35 +2,33 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { # Generates time dimensions and the corresponding metadata. + ## TODO: Get correct indices according to the start date for monthly case. + ## TODO: Fotmat units according to standards. if (store.freq == "monthly_mean") { switch(fcst.horizon, - ## TODO: Remove "sub_obs"? - ## TODO: implement daily case - "seasonal" = {len <- length(leadtimes); ref <- 'months since '; + "seasonal" = {time <- leadtimes; ref <- 'months since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "sub_obs" = {len <- 52; ref <- 'week of the year '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, "subseasonal" = {len <- 4; ref <- 'weeks since '; stdname <- ''}) + # time <- 1:len ## Is this correct? I think it needs to be changed } else if (store.freq == "daily_mean") { switch(fcst.horizon, - "seasonal" = {len <- length(leadtimes); ref <- 'days since '; + "seasonal" = {time <- leadtimes; ref <- 'days since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}) } - ## TODO: Get correct date for each time step - time <- 1:len ## Is this correct? I think it needs to be changed dim(time) <- length(time) # metadata <- list(time = list(standard_name = stdname, - metadata <- list(time = list(units = paste0(ref, sdate, ' 00:00:00'))) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'))) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' time_step <- 1 dim(time_step) <- length(time_step) - metadata <- list(time_step = list(units = paste0(ref, sdate, ' 00:00:00'))) + metadata <- list(time_step = list(units = paste0(ref, sdate, 'T00:00:00'))) attr(time_step, 'variables') <- metadata names(dim(time_step)) <- 'time_step' @@ -112,8 +110,24 @@ save_metrics <- function(skill, # Time data and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) - leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, - to = recipe$Analysis$Time$leadtimemax) + store.freq <- recipe$Analysis$Variables$freq + + # Generate vector containing leadtimes + if (store.freq == "monthly_mean") { + leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, + to = recipe$Analysis$Time$leadtimemax) + } else { + # leadtimes <- seq(1:dim(data$hcst$data)[['time']]) + dates <- data$hcst$Dates$start + lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) + dates <- unique(as.Date(dates, format = '%Y-%m-%d')) + init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), + as.numeric(recipe$Analysis$Time$sdate$fcst_sday)) + init_date <- as.Date(init_date, format = '%Y%m%d') + + leadtimes <- as.numeric(dates - init_date) + } + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, @@ -122,11 +136,11 @@ save_metrics <- function(skill, fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) } - times <- get_times(fcst.horizon, leadtimes, fcst.sdate) + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) time <- times$time time_step <- times$time_step - # Grid data and metadata + # Grid data and metadata # if (tolower(agg) == "country") { # country <- get_countries(grid) # ArrayToNc(append(country, time, skill, time_step), outfile) -- GitLab From a130f3794912d6e22dba38ef1461c09ddcab6af2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 27 Jun 2022 11:22:23 +0200 Subject: [PATCH 05/68] Add 'raw' option to Calibration module' --- modules/Calibration/Calibration.R | 163 ++++++++++++++++-------------- 1 file changed, 89 insertions(+), 74 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 1478ecd5..0346c897 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -13,49 +13,46 @@ calibrate_datasets <- function(data, recipe) { obs <- data$obs fcst <- data$fcst - # Calibration function params - method <- recipe$Analysis$Workflow$Calibration$method - mm <- recipe$Analysis$Datasets$Multimodel - ncores <- 4 - na.rm <- T + method <- tolower(recipe$Analysis$Workflow$Calibration$method) - # Replicate observation array for the multi-model case - if (mm) { - obs.mm <- obs$data - for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, obs$data, - along=which(names(dim(obs$data)) == 'dat')) - } - names(dim(obs.mm)) <- names(dim(obs$data)) - obs$data <- obs.mm - remove(obs.mm) - } + if (method == "raw") { + warning("The Calibration module has been called, but the calibration ", + "method in the recipe is 'raw'. The hcst and fcst will not be ", + "calibrated.") + fcst_calibrated <- fcst + hcst_calibrated <- hcst + CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" - if (recipe$Analysis$Variables$freq == "monthly_mean") { + } else { + # Calibration function params + mm <- recipe$Analysis$Datasets$Multimodel + ncores <- 4 + na.rm <- T - CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") - ## TODO: implement other calibration methods - ## TODO: Restructure the code? - if (!(method %in% CST_CALIB_METHODS)) { - stop("Calibration method in the recipe is not available for monthly", - " data.") - } else { - ## Alba's version of CST_Calibration (pending merge) is being used - # Calibrate the hindcast - hcst_calibrated <- CST_Calibration(hcst, obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - if (!is.null(fcst)) { - # Calibrate the forecast - fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + # Replicate observation array for the multi-model case + if (mm) { + obs.mm <- obs$data + for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, obs$data, + along=which(names(dim(obs$data)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs$data)) + obs$data <- obs.mm + remove(obs.mm) + } + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + ## TODO: Restructure the code? + if (!(method %in% CST_CALIB_METHODS)) { + stop("Calibration method in the recipe is not available for monthly", + " data.") + } else { + ## Alba's version of CST_Calibration (pending merge) is being used + # Calibrate the hindcast + hcst_calibrated <- CST_Calibration(hcst, obs, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -66,47 +63,65 @@ calibrate_datasets <- function(data, recipe) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) + if (!is.null(fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + + } + + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + # Daily data calibration using Quantile Mapping + if (!(method %in% c("qmap"))) { + stop("Calibration method in the recipe is not available at daily ", + "frequency. Only quantile mapping 'qmap' is implemented.") + } + # Calibrate the hindcast + hcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = NULL, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + ncores = ncores, + na.rm = na.rm) + + if (!is.null(fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = fcst, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + ncores = ncores, + na.rm = na.rm) } else { fcst_calibrated <- NULL } - - } - - } else if (recipe$Analysis$Variables$freq == "daily_mean") { - # Daily data calibration using Quantile Mapping - if (!(method %in% c("qmap"))) { - stop("Calibration method in the recipe is not available at daily ", - "frequency. Only quantile mapping 'qmap' is implemented.") + + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" } - # Calibrate the hindcast - hcst_calibrated <- CST_QuantileMapping(hcst, obs, - exp_cor = NULL, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, - method = "QUANT", - ncores = ncores, - na.rm = na.rm) - if (!is.null(fcst)) { - # Calibrate the forecast - fcst_calibrated <- CST_QuantileMapping(hcst, obs, - exp_cor = fcst, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, - method = "QUANT", - ncores = ncores, - na.rm = na.rm) - } else { - fcst_calibrated <- NULL - } } - - print("##### CALIBRATION COMPLETE #####") + + print(CALIB_MSG) ## TODO: Return observations too? ## TODO: Change naming convention? return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) + } -- GitLab From 14af366ce242fddfc70fd488bb53090c320ef71f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 27 Jun 2022 11:32:29 +0200 Subject: [PATCH 06/68] Fix minor bug --- modules/Calibration/Calibration.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 0346c897..5178b10b 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -28,7 +28,7 @@ calibrate_datasets <- function(data, recipe) { mm <- recipe$Analysis$Datasets$Multimodel ncores <- 4 na.rm <- T - + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case if (mm) { obs.mm <- obs$data @@ -114,7 +114,6 @@ calibrate_datasets <- function(data, recipe) { fcst_calibrated <- NULL } - CALIB_MSG <- "##### CALIBRATION COMPLETE #####" } } -- GitLab From b8fdc46036332ebc5caff1095eaa97bf44b174e1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 27 Jun 2022 15:31:46 +0200 Subject: [PATCH 07/68] Remove nord3 from MODULES file --- MODULES | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/MODULES b/MODULES index abb6e01d..0a01a979 100644 --- a/MODULES +++ b/MODULES @@ -21,17 +21,6 @@ elif [ $BSC_MACHINE == "nord3v2" ]; then module load R/4.1.2-foss-2019b module load OpenMPI/4.0.5-GCC-8.3.0-nord3-v2 -elif [ $BSC_MACHINE == "nord3" ]; then - - module use /gpfs/projects/bsc32/software/suselinux/11/modules/all - module unuse /apps/modules/modulefiles/applications /apps/modules/modulefiles/applications_bis - module unuse /apps/modules/modulefiles/compilers /apps/modules/modulefiles/tools - module unuse /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment - module unuse /apps/modules/PRACE - - module load CDO/1.9.8-foss-2019b - module load R/3.6.2-foss-2019b - else module load CDO/1.9.8-foss-2015a -- GitLab From f932377da5deaf619b17f43a0f0dda0ef7240b85 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 27 Jun 2022 15:56:57 +0200 Subject: [PATCH 08/68] Add check for consistency between hcst and obs grids --- modules/Loading/Loading.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 9f85c439..71e3e198 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -272,6 +272,14 @@ load_datasets <- function(recipe_file) { # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) + # Check for consistency between hcst and obs + if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + stop("hcst and obs don't share the same latitude.") + } + if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + stop("hcst and obs don't share the same longitude.") + } + # Print a summary of the loaded data for the user, for each object data_summary(hcst, store.freq) data_summary(obs, store.freq) -- GitLab From fa98826278b525ec9c81679f4e8d5d18305ae849 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 28 Jun 2022 11:43:06 +0200 Subject: [PATCH 09/68] Remove enscorr from skill_metrics before saving; include functions to build output paths --- modules/Saving/Saving.R | 13 ++++++++- modules/Saving/export_2_nc-s2s4e.R | 4 ++- modules/Saving/paths2save.R | 38 ++++++++++++++++++++++++++ modules/Saving/vitigeoss-vars-dict.yml | 20 ++++++++++++++ 4 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 modules/Saving/paths2save.R create mode 100644 modules/Saving/vitigeoss-vars-dict.yml diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index d78a6b90..4b321bf4 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,9 +1,11 @@ ## TODO: Implement wrapper to get grid and time info? +source("modules/Saving/paths2save.R") + get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { # Generates time dimensions and the corresponding metadata. ## TODO: Get correct indices according to the start date for monthly case. - ## TODO: Fotmat units according to standards. + ## TODO: Format units according to standards. if (store.freq == "monthly_mean") { switch(fcst.horizon, @@ -79,6 +81,15 @@ save_metrics <- function(skill, data_cube, outfile, agg="global") { + + ## TODO: Generate output file + + # Remove ensemble correlation from the list since it should be saved in + # a separate file, as it has 'ensemble' dim. + if ("enscorr" %in% names(skill)) { + enscorr_metrics <- grep("enscorr", names(skill)) + skill <- skill[-enscorr_metrics] + } # Define grid dimensions and names lalo <- c('longitude', 'latitude') diff --git a/modules/Saving/export_2_nc-s2s4e.R b/modules/Saving/export_2_nc-s2s4e.R index 9adb6d80..abf526e5 100644 --- a/modules/Saving/export_2_nc-s2s4e.R +++ b/modules/Saving/export_2_nc-s2s4e.R @@ -22,6 +22,7 @@ save_bias <- function(variable, obs <- data; units <- "ºC"; var.longname <- "Temperature bias" } else { + # Unit conversion data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast") obs <- data.conv$data$fcst; units <- data.conv$units; var.longname <- data.conv$var.longname @@ -199,7 +200,8 @@ save_forecast <- function(variable, } else { fcst <- Reorder(fcst, c('country','member', 'time')) } - + + # Unit conversion fcst.conv <- convert_data(list(fcst=fcst,test=fcst),variable,leadtimes,fcst.type,"forecast") fcst <- fcst.conv$data$fcst; units <- fcst.conv$units; var.longname <- fcst.conv$var.longname diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R new file mode 100644 index 00000000..af80b833 --- /dev/null +++ b/modules/Saving/paths2save.R @@ -0,0 +1,38 @@ +get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { + # This function builds the path of the output file based on directory, + # variable, forecast date, startdate, aggregation, forecast horizon and + # type of metric/forecast/probability. + + if (horizon == "seasonal") { + shortdate <- format(as.Date(as.character(sdate),"%Y%m%d"), "%m") + dd <- "month" + } else { + shortdate <- format(as.Date(as.character(sdate),"%Y%m%d"), "%V") + dd <- "week" + } + + switch (tolower(agg), + "country" = {gg <- "-country"}, + "global" = {gg <- ""}) + + switch (file.type, + "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}) + + return(paste0(dir, file, ".nc")) + +} + +get_dir <- function(outdir, variable, fcst.sdate, agg) { + # This function builds the path for the output directory. The output + # directories will be subdirectories within outdir, organized by variable, + # startdate, and aggregation. + + switch (tolower(agg), + "country" = {dir <- paste0(outdir, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", variable, "/", + fcst.sdate, "/")}) + + return(dir) + +} diff --git a/modules/Saving/vitigeoss-vars-dict.yml b/modules/Saving/vitigeoss-vars-dict.yml new file mode 100644 index 00000000..759a59ee --- /dev/null +++ b/modules/Saving/vitigeoss-vars-dict.yml @@ -0,0 +1,20 @@ + +vars: + +# ECVs + tas: + units: "°C" + longname: "2m temperature" + outname: "t2" + sfcWind: + units: "m/s" + longname: "Surface wind speed module" + outname: "wind" + rsds: + units: "W/m2" + longname: "shortwave radiation at ground" + outname: "rswin" + prlr: + units: "mm" + longname: "accumulated precipitation" + outname: "acprec" \ No newline at end of file -- GitLab From 128edf7a4c70e79ff87d5d52c05d5f51dfd00c8b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 28 Jun 2022 12:56:44 +0200 Subject: [PATCH 10/68] Save skill metrics in generated directory and file path --- modules/Loading/testing_recipes/recipe_4.yml | 2 +- modules/Saving/Saving.R | 46 ++++++++++---------- modules/Saving/paths2save.R | 29 ++++++------ modules/test_victoria.R | 9 ++-- 4 files changed, 47 insertions(+), 39 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 6786bd3d..5a4f2880 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -32,7 +32,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPS RPSS FRPSS BSS10 BSS90 + metric: RPS RPSS FRPSS BSS10 BSS90 EnsCorr Indicators: index: no Output_format: S2S4E diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 4b321bf4..ec25d67b 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -79,10 +79,10 @@ get_latlon <- function(latitude, longitude) { save_metrics <- function(skill, recipe, data_cube, - outfile, + outdir, agg="global") { - - ## TODO: Generate output file + # This function adds metadata to the skill metrics in 'skill' + # and exports them to a netCDF file inside 'outdir'. # Remove ensemble correlation from the list since it should be saved in # a separate file, as it has 'ensemble' dim. @@ -94,8 +94,7 @@ save_metrics <- function(skill, # Define grid dimensions and names lalo <- c('longitude', 'latitude') - # Remove singleton dimensions from each metric array and rearrange the - # longitude, latitude and time dimensions to the correct order. + # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { ## TODO: Implement metrics with additional non-singleton dimensions ## e.g. 'ensemble' in ensemble correlation. @@ -118,24 +117,23 @@ save_metrics <- function(skill, attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } - - # Time data and metadata + + # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq + var <- recipe$Analysis$Variables$name # Generate vector containing leadtimes if (store.freq == "monthly_mean") { leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, to = recipe$Analysis$Time$leadtimemax) } else { - # leadtimes <- seq(1:dim(data$hcst$data)[['time']]) dates <- data$hcst$Dates$start lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) dates <- unique(as.Date(dates, format = '%Y-%m-%d')) init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), as.numeric(recipe$Analysis$Time$sdate$fcst_sday)) init_date <- as.Date(init_date, format = '%Y%m%d') - leadtimes <- as.numeric(dates - init_date) } @@ -151,20 +149,24 @@ save_metrics <- function(skill, time <- times$time time_step <- times$time_step + ## TODO: Generate output filename + outfile <- get_filename(outdir, var, fcst.sdate, fcst.sdate, + agg, fcst.horizon, "skill") + # Grid data and metadata -# if (tolower(agg) == "country") { -# country <- get_countries(grid) -# ArrayToNc(append(country, time, skill, time_step), outfile) -# } else { - - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill, list(time_step)) - ArrayToNc(vars, outfile) + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill, time_step), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill, list(time_step)) + ArrayToNc(vars, outfile) + } + print("##### SKILL METRICS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index af80b833..08f1342e 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -1,37 +1,40 @@ +## TODO: Separate by time aggregation + get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { # This function builds the path of the output file based on directory, # variable, forecast date, startdate, aggregation, forecast horizon and # type of metric/forecast/probability. if (horizon == "seasonal") { - shortdate <- format(as.Date(as.character(sdate),"%Y%m%d"), "%m") + shortdate <- format(as.Date(as.character(fcst.sdate),"%Y%m%d"), "%m") dd <- "month" } else { - shortdate <- format(as.Date(as.character(sdate),"%Y%m%d"), "%V") + shortdate <- format(as.Date(as.character(fcst.sdate),"%Y%m%d"), "%V") dd <- "week" } - switch (tolower(agg), - "country" = {gg <- "-country"}, - "global" = {gg <- ""}) + switch(tolower(agg), + "country" = {gg <- "-country"}, + "global" = {gg <- ""}) - switch (file.type, - "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}) + switch(file.type, + "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, + "corr" = {file <- paste0(var, gg,"-corr_",dd,shortdate)}) return(paste0(dir, file, ".nc")) } -get_dir <- function(outdir, variable, fcst.sdate, agg) { +get_dir <- function(outdir, variable, fcst.sdate, agg = "global") { # This function builds the path for the output directory. The output # directories will be subdirectories within outdir, organized by variable, # startdate, and aggregation. - switch (tolower(agg), - "country" = {dir <- paste0(outdir, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", variable, "/", - fcst.sdate, "/")}) + switch(tolower(agg), + "country" = {dir <- paste0(outdir, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", variable, "/", + fcst.sdate, "/")}) return(dir) diff --git a/modules/test_victoria.R b/modules/test_victoria.R index c547c3fc..14477802 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,6 +1,6 @@ -recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -18,5 +18,8 @@ skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe, na.rm = T, ncores = 4) # Export skill metrics onto outfile -outfile <- "/esarchive/scratch/vagudets/auto-s2s-tests/files/skill/test-metrics.nc" -save_metrics(skill_metrics, recipe, data$hcst, outfile, agg = "global") +outdir <- get_dir(recipe$Run$output_dir, + recipe$Analysis$Variables$name, + fcst.sdate) +dir.create(outdir, showWarnings = FALSE, recursive = TRUE) +save_metrics(skill_metrics, recipe, data$hcst, outdir, agg = "global") -- GitLab From 7e236cf3b8b3336d9c6c273aeee19c898d3a1df8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 28 Jun 2022 15:14:51 +0200 Subject: [PATCH 11/68] Modify call to get_dir() --- modules/Saving/Saving.R | 6 +++--- modules/Saving/paths2save.R | 13 ++++++++++++- modules/test_victoria.R | 4 +--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index ec25d67b..273d8763 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -117,11 +117,10 @@ save_metrics <- function(skill, attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } - + # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - var <- recipe$Analysis$Variables$name # Generate vector containing leadtimes if (store.freq == "monthly_mean") { @@ -150,7 +149,8 @@ save_metrics <- function(skill, time_step <- times$time_step ## TODO: Generate output filename - outfile <- get_filename(outdir, var, fcst.sdate, fcst.sdate, + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, agg, fcst.horizon, "skill") # Grid data and metadata diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 08f1342e..44497841 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -25,11 +25,22 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { } -get_dir <- function(outdir, variable, fcst.sdate, agg = "global") { +get_dir <- function(recipe, agg = "global") { # This function builds the path for the output directory. The output # directories will be subdirectories within outdir, organized by variable, # startdate, and aggregation. + ## TODO: Get aggregation from recipe + + outdir <- recipe$Run$output_dir + variable <- recipe$Analysis$Variables$name + if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { + fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, + recipe$Analysis$Time$sdate$fcst_sday) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) + } + switch(tolower(agg), "country" = {dir <- paste0(outdir, "/", variable, "_country/", fcst.sdate, "/")}, diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 14477802..7f210156 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -18,8 +18,6 @@ skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe, na.rm = T, ncores = 4) # Export skill metrics onto outfile -outdir <- get_dir(recipe$Run$output_dir, - recipe$Analysis$Variables$name, - fcst.sdate) +outdir <- get_dir(recipe) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) save_metrics(skill_metrics, recipe, data$hcst, outdir, agg = "global") -- GitLab From ee56cd79e03a62deae9b5b177db149c24ae434fb Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 28 Jun 2022 16:36:59 +0200 Subject: [PATCH 12/68] Add function to generate global attributes --- modules/Saving/Saving.R | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 273d8763..c14cbd9e 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -2,6 +2,24 @@ source("modules/Saving/paths2save.R") +get_global_attributes <- function(recipe) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, " to ", + recipe$Analysis$Time$hcst_end) + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + + attrs <- list(reference_period = hcst_period, + institution = "BSC-CNS", + system = system, + reference = reference) + + return(attrs) + +} + get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { # Generates time dimensions and the corresponding metadata. ## TODO: Get correct indices according to the start date for monthly case. @@ -83,7 +101,7 @@ save_metrics <- function(skill, agg="global") { # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. - + # Remove ensemble correlation from the list since it should be saved in # a separate file, as it has 'ensemble' dim. if ("enscorr" %in% names(skill)) { @@ -102,6 +120,10 @@ save_metrics <- function(skill, Reorder(drop(x[[1]]), c(lalo, 'time'))}) } + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe) + attr(skill[[1]], 'global_attrs') <- global_attributes + for (i in 1:length(skill)) { ## TODO: create dictionary with proper metadata metric <- names(skill[i]) @@ -114,7 +136,7 @@ save_metrics <- function(skill, } metadata <- list(metric = list(name = metric, standard_name = sdname)) - attr(skill[[i]], 'variables') <- metadata + attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } -- GitLab From fd3f8e1b404089800fa82bfaab7c3ca547432402 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 28 Jun 2022 16:54:09 +0200 Subject: [PATCH 13/68] Add recipe for unit testing --- .../recipe_unit-test-daily.yml | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 modules/Loading/testing_recipes/recipe_unit-test-daily.yml diff --git a/modules/Loading/testing_recipes/recipe_unit-test-daily.yml b/modules/Loading/testing_recipes/recipe_unit-test-daily.yml new file mode 100644 index 00000000..1ce56fad --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_unit-test-daily.yml @@ -0,0 +1,44 @@ +Description: + Author: V. Agudetse + Info: Unit test for daily data, regridding to system + +Analysis: + Horizon: seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: + fcst_syear: '2020' + fcst_sday: '1101' + hcst_start: '1993' + hcst_end: '1996' + leadtimemin: 0 + leadtimemax: 0 + Region: + latmin: -2 + latmax: 2 + lonmin: 0 + lonmax: 4 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: qmap + Skill: + metric: RPSS + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 853f68560a2a2ca929fe77d11a80d3bd7d2c2647 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 28 Jun 2022 16:54:43 +0200 Subject: [PATCH 14/68] Add ensemble mean correlation 'corr' to Skill module --- modules/Loading/testing_recipes/recipe_4.yml | 2 +- modules/Skill/Skill.R | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 5a4f2880..88c3cdc7 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -32,7 +32,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPS RPSS FRPSS BSS10 BSS90 EnsCorr + metric: RPS RPSS FRPSS BSS10 BSS90 EnsCorr Corr Indicators: index: no Output_format: S2S4E diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 6da63816..aa79c983 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -62,6 +62,12 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } else { Fair <- F } + # Whether to compute correlation for the ensemble mean or for each member + if (metric == 'corr') { + memb <- F + } else if (metric == 'enscorr') { + memb <- T + } # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, @@ -97,13 +103,14 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) # Ensemble mean correlation - } else if (metric == 'enscorr') { + } else if (metric %in% c('enscorr', 'corr')) { ## TODO: Implement option for Kendall and Spearman methods? skill <- s2dv::Corr(exp$data, obs$data, dat_dim = 'dat', time_dim = time_dim, method = 'pearson', memb_dim = memb_dim, + memb = memb, ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) -- GitLab From 4da7a8c0e65f707cd6c6e2e04532d36a95fdc2d2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 29 Jun 2022 16:25:29 +0200 Subject: [PATCH 15/68] Create function to save hcst/fcst, add more global attributes --- modules/Saving/Saving.R | 116 ++++++++++++++++++++++++++++++++---- modules/Saving/paths2save.R | 15 +++-- 2 files changed, 114 insertions(+), 17 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index c14cbd9e..54670992 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -5,16 +5,16 @@ source("modules/Saving/paths2save.R") get_global_attributes <- function(recipe) { # Generates metadata of interest to add to the global attributes of the # netCDF files. - - hcst_period <- paste0(recipe$Analysis$Time$hcst_start, " to ", - recipe$Analysis$Time$hcst_end) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name + + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) attrs <- list(reference_period = hcst_period, institution = "BSC-CNS", - system = system, - reference = reference) + system = parameters$Datasets$System$name, + reference = parameters$Datasets$Reference$name + calibration_method = parameters$Workflow$Calibration$method) return(attrs) @@ -85,6 +85,100 @@ get_latlon <- function(latitude, longitude) { } + +save_forecast <- function(data_cube, + recipe, + outdir, + agg="global") { + + ## Loop by year to save the hcst? Will need to select correct year + lalo <- c('longitude','latitude') + variable <- data_cube$Variable$varName + var.longname <- attr(data_cube$Variable, 'variable')$long_name + + fcst <- data_cube$data + if (tolower(agg) == "global") { + fcst <- Reorder(fcst, c(lalo, 'ensemble', 'time')) + } else { + fcst <- Reorder(fcst, c('country', 'ensemble', 'time')) + } + + # Add metadata + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.sdname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo,'ensemble', 'time') + var.expname <- variable + var.sdname <- var.longname + var.units <- attr(data_cube$Variable, 'variable')$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst, 'variables') <- metadata + names(dim(fcst)) <- dims + # Add global attributes + global_attributes <- get_global_attributes(recipe) + attr(fcst, 'global_attrs') <- global_attributes + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + if (store.freq == "monthly_mean") { + leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, + to = recipe$Analysis$Time$leadtimemax) + } else { + dates <- data$hcst$Dates$start + lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) + dates <- unique(as.Date(dates, format = '%Y-%m-%d')) + init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), + as.numeric(recipe$Analysis$Time$sdate$fcst_sday)) + init_date <- as.Date(init_date, format = '%Y%m%d') + leadtimes <- as.numeric(dates - init_date) + } + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { + fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, + recipe$Analysis$Time$sdate$fcst_sday) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + time_step <- times$time_step + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "ensemble") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst, time_step), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst, list(time_step)) + ArrayToNc(vars, outfile) + } + + print("##### FCST SAVED TO NETCDF FILE #####") + +} + ## TODO: Place inside a function somewhere # if (tolower(agg) == "country"){ # load(mask.path) @@ -111,7 +205,6 @@ save_metrics <- function(skill, # Define grid dimensions and names lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { ## TODO: Implement metrics with additional non-singleton dimensions @@ -135,7 +228,6 @@ save_metrics <- function(skill, dims <- c(lalo, 'time') } metadata <- list(metric = list(name = metric, standard_name = sdname)) - attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -143,7 +235,6 @@ save_metrics <- function(skill, # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - # Generate vector containing leadtimes if (store.freq == "monthly_mean") { leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, @@ -157,7 +248,6 @@ save_metrics <- function(skill, init_date <- as.Date(init_date, format = '%Y%m%d') leadtimes <- as.numeric(dates - init_date) } - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, @@ -170,12 +260,12 @@ save_metrics <- function(skill, time <- times$time time_step <- times$time_step - ## TODO: Generate output filename + # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, fcst.sdate, fcst.sdate, agg, fcst.horizon, "skill") - # Grid data and metadata + # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) ArrayToNc(append(country, time, skill, time_step), outfile) diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 44497841..fd3fb6a8 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -19,7 +19,8 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { switch(file.type, "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, - "corr" = {file <- paste0(var, gg,"-corr_",dd,shortdate)}) + "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "ensemble" = {file <- paste0(var, gg, "_", date, "_", shortdate)}) return(paste0(dir, file, ".nc")) @@ -31,6 +32,7 @@ get_dir <- function(recipe, agg = "global") { # startdate, and aggregation. ## TODO: Get aggregation from recipe + ## TODO: Add time frequency outdir <- recipe$Run$output_dir variable <- recipe$Analysis$Variables$name @@ -38,13 +40,18 @@ get_dir <- function(recipe, agg = "global") { fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, recipe$Analysis$Time$sdate$fcst_sday) } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate$fcst_sday) } + + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + store.freq <- recipe$Analysis$Variables$freq switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", variable, + "country" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", variable, "/", + "global" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, "/", fcst.sdate, "/")}) return(dir) -- GitLab From a57789a58cf310bf13683f2796c4b9826235492a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 29 Jun 2022 16:26:01 +0200 Subject: [PATCH 16/68] Change output directory --- modules/Loading/testing_recipes/recipe_3.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml index 1f47545f..1db811db 100644 --- a/modules/Loading/testing_recipes/recipe_3.yml +++ b/modules/Loading/testing_recipes/recipe_3.yml @@ -39,5 +39,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From ffb9dbb31cb7c164c56ed1af5c5acb0a2289f946 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 30 Jun 2022 12:37:56 +0200 Subject: [PATCH 17/68] Change 'corr' and 'enscorr' naming convention --- modules/Saving/Saving.R | 7 +++---- modules/Skill/Skill.R | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 54670992..0516c044 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -13,7 +13,7 @@ get_global_attributes <- function(recipe) { attrs <- list(reference_period = hcst_period, institution = "BSC-CNS", system = parameters$Datasets$System$name, - reference = parameters$Datasets$Reference$name + reference = parameters$Datasets$Reference$name, calibration_method = parameters$Workflow$Calibration$method) return(attrs) @@ -85,7 +85,6 @@ get_latlon <- function(latitude, longitude) { } - save_forecast <- function(data_cube, recipe, outdir, @@ -198,8 +197,8 @@ save_metrics <- function(skill, # Remove ensemble correlation from the list since it should be saved in # a separate file, as it has 'ensemble' dim. - if ("enscorr" %in% names(skill)) { - enscorr_metrics <- grep("enscorr", names(skill)) + if ("corr" %in% names(skill)) { + enscorr_metrics <- grep("^corr", names(skill)) skill <- skill[-enscorr_metrics] } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index aa79c983..97d3dbb8 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -64,9 +64,9 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } # Whether to compute correlation for the ensemble mean or for each member if (metric == 'corr') { - memb <- F - } else if (metric == 'enscorr') { memb <- T + } else if (metric == 'enscorr') { + memb <- F } # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { -- GitLab From 0123f7fd1c853c6b1b86f66aafa2e22eb9b78fab Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 30 Jun 2022 16:30:50 +0200 Subject: [PATCH 18/68] Return metrics as a list of arrays instead of list of lists --- modules/Saving/Saving.R | 14 ++++++-------- modules/Skill/Skill.R | 26 +++++++++++++------------- modules/Skill/s2s.metrics.R | 3 ++- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 0516c044..81a0c0b9 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -94,8 +94,10 @@ save_forecast <- function(data_cube, lalo <- c('longitude','latitude') variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq - fcst <- data_cube$data + fcst <- data_cube$data ## TODO: Select year if (tolower(agg) == "global") { fcst <- Reorder(fcst, c(lalo, 'ensemble', 'time')) } else { @@ -125,10 +127,6 @@ save_forecast <- function(data_cube, global_attributes <- get_global_attributes(recipe) attr(fcst, 'global_attrs') <- global_attributes - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - # Generate vector containing leadtimes ## TODO: Move to a separate function? if (store.freq == "monthly_mean") { @@ -136,9 +134,9 @@ save_forecast <- function(data_cube, to = recipe$Analysis$Time$leadtimemax) } else { dates <- data$hcst$Dates$start - lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) + lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) # dates <- unique(as.Date(dates, format = '%Y-%m-%d')) - init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), + init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), # as.numeric(recipe$Analysis$Time$sdate$fcst_sday)) init_date <- as.Date(init_date, format = '%Y%m%d') leadtimes <- as.numeric(dates - init_date) @@ -209,7 +207,7 @@ save_metrics <- function(skill, ## TODO: Implement metrics with additional non-singleton dimensions ## e.g. 'ensemble' in ensemble correlation. skill <- lapply(skill, function(x) { - Reorder(drop(x[[1]]), c(lalo, 'time'))}) + Reorder(drop(x), c(lalo, 'time'))}) } # Add global and variable attributes diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 97d3dbb8..19619a71 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -73,7 +73,7 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- list(skill) + skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { @@ -81,8 +81,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { Fair = Fair, ncores = ncores) skill$rpss <- .drop_dims(skill$rpss) skill$sign <- .drop_dims(skill$sign) - skill_metrics[[ metric ]] <- list(skill$rpss) - skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { @@ -90,8 +90,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { prob_thresholds = 0.1, Fair = Fair, ncores = ncores) skill$rpss <- .drop_dims(skill$rpss) skill$sign <- .drop_dims(skill$sign) - skill_metrics[[ metric ]] <- list(skill$rpss) - skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { @@ -99,8 +99,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { prob_thresholds = 0.9, Fair = Fair, ncores = ncores) skill$rpss <- .drop_dims(skill$rpss) skill$sign <- .drop_dims(skill$sign) - skill_metrics[[ metric ]] <- list(skill$rpss) - skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation } else if (metric %in% c('enscorr', 'corr')) { @@ -114,17 +114,17 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { ncores = ncores) skill <- lapply(skill, function(x) { .drop_dims(x)}) - skill_metrics[[ metric ]] <- list(skill$corr) - skill_metrics[[ paste0(metric, "_p.value") ]] <- list(skill$p.val) - skill_metrics[[ paste0(metric, "_conf.low") ]] <- list(skill$conf.lower) - skill_metrics[[ paste0(metric, "_conf.up") ]] <- list(skill$conf.upper) + skill_metrics[[ metric ]] <- skill$corr + skill_metrics[[ paste0(metric, "_p.value") ]] <- skill$p.val + skill_metrics[[ paste0(metric, "_conf.low") ]] <- skill$conf.lower + skill_metrics[[ paste0(metric, "_conf.up") ]] <- skill$conf.upper # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics ## Retain _specs in metric name for clarity? - metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name + metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name skill <- Compute_verif_metrics(exp$data, obs$data, - skill_metrics = metric, + skill_metrics = metric_name, verif.dims=c("syear", "sday", "sweek"), na.rm = na.rm, ncores = ncores) diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 7246dd09..7c0aa30e 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -101,7 +101,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, data <- Subset(data, c('ensemble'), list(1), drop='selected') data[!is.finite(data)] <- NaN metric <- paste0(metric, "_specs") - metrics_data[[ metric ]] <- data ## previously: list(data) + metrics_data <- data + # metrics_data[[ metric ]] <- data ## previously: list(data) } else if (metric == "corr_eno") { # computes ensemble mean -- GitLab From 2895e22b9dc865995479e29e58ba03f2a72c3e69 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 4 Jul 2022 12:23:23 +0200 Subject: [PATCH 19/68] Add function to save the forecast and hindcast to netCDF files --- modules/Saving/Saving.R | 121 ++++++++++++++++++++-------------------- modules/test_victoria.R | 2 + 2 files changed, 64 insertions(+), 59 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 81a0c0b9..039bf91a 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -92,41 +92,13 @@ save_forecast <- function(data_cube, ## Loop by year to save the hcst? Will need to select correct year lalo <- c('longitude','latitude') + variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name + global_attributes <- get_global_attributes(recipe) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq - fcst <- data_cube$data ## TODO: Select year - if (tolower(agg) == "global") { - fcst <- Reorder(fcst, c(lalo, 'ensemble', 'time')) - } else { - fcst <- Reorder(fcst, c('country', 'ensemble', 'time')) - } - - # Add metadata - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units - } else { - dims <- c(lalo,'ensemble', 'time') - var.expname <- variable - var.sdname <- var.longname - var.units <- attr(data_cube$Variable, 'variable')$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst, 'variables') <- metadata - names(dim(fcst)) <- dims - # Add global attributes - global_attributes <- get_global_attributes(recipe) - attr(fcst, 'global_attrs') <- global_attributes - # Generate vector containing leadtimes ## TODO: Move to a separate function? if (store.freq == "monthly_mean") { @@ -134,42 +106,73 @@ save_forecast <- function(data_cube, to = recipe$Analysis$Time$leadtimemax) } else { dates <- data$hcst$Dates$start - lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) # + lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) dates <- unique(as.Date(dates, format = '%Y-%m-%d')) - init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), # + init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), as.numeric(recipe$Analysis$Time$sdate$fcst_sday)) init_date <- as.Date(init_date, format = '%Y%m%d') leadtimes <- as.numeric(dates - init_date) } - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) - time <- times$time - time_step <- times$time_step - # Generate name of output file - outfile <- get_filename(outdir, data_cube$Variable$varName, - fcst.sdate, fcst.sdate, - agg, fcst.horizon, "ensemble") + ## TODO: Select year + sdates <- seq(1:length(data_cube$load_parameters$dat1$file_date[[1]])) + for (i in sdates) { + # Select year from array and rearrange dimensions + fcst <- data_cube$data[ , , , , i, , , , ] # Select year + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst, time_step), outfile) - } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst, list(time_step)) - ArrayToNc(vars, outfile) + # Add metadata + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.sdname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.longname + var.units <- attr(data_cube$Variable, 'variable')$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + global_attributes <- get_global_attributes(recipe) + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + time_step <- times$time_step + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "ensemble") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst, time_step), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst, list(time_step)) + ArrayToNc(vars, outfile) + } } print("##### FCST SAVED TO NETCDF FILE #####") diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 7f210156..9ad269f3 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -21,3 +21,5 @@ skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, outdir <- get_dir(recipe) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) save_metrics(skill_metrics, recipe, data$hcst, outdir, agg = "global") +save_forecast(calibrated_data$hcst, recipe, outdir, agg = "global") +save_forecast(calibrated_data$fcst, recipe, outdir, agg = "global") -- GitLab From 8afce8c88e0446bece371911e5e7ff38c2acf735 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 5 Jul 2022 11:44:42 +0200 Subject: [PATCH 20/68] If no regridding is performed upon loading, don't check whether hcst and obs have the same grid --- modules/Loading/Loading.R | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 71e3e198..c7b3ed26 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -271,13 +271,14 @@ load_datasets <- function(recipe_file) { # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) - - # Check for consistency between hcst and obs - if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { - stop("hcst and obs don't share the same latitude.") - } - if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { - stop("hcst and obs don't share the same longitude.") + if (!(recipe$Analysis$Regrid$type == 'none')) { + # Check for consistency between hcst and obs + if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + stop("hcst and obs don't share the same latitude.") + } + if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + stop("hcst and obs don't share the same longitude.") + } } # Print a summary of the loaded data for the user, for each object -- GitLab From 810b97238d751dbaddfdf8a107b40704545ba950 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 5 Jul 2022 15:05:13 +0200 Subject: [PATCH 21/68] Handle dropping of singleton dimensions when time = 1 --- modules/Saving/Saving.R | 6 +++++- modules/Skill/Skill.R | 11 +++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 039bf91a..506705c6 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -119,6 +119,10 @@ save_forecast <- function(data_cube, for (i in sdates) { # Select year from array and rearrange dimensions fcst <- data_cube$data[ , , , , i, , , , ] # Select year + + if (!("time" %in% dim(fcst))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } if (tolower(agg) == "global") { fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) } else { @@ -210,7 +214,7 @@ save_metrics <- function(skill, ## TODO: Implement metrics with additional non-singleton dimensions ## e.g. 'ensemble' in ensemble correlation. skill <- lapply(skill, function(x) { - Reorder(drop(x), c(lalo, 'time'))}) + Reorder(x, c(lalo, 'time'))}) } # Add global and variable attributes diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 19619a71..e4318875 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -141,15 +141,18 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } .drop_dims <- function(metric_array) { - # Drop all singleton dimensions except for 'ensemble' + # Drop all singleton dimensions metric_array <- drop(metric_array) + # If time happened to be a singleton dimension, add it back in the array + if (!("time" %in% names(dim(metric_array)))) { + dim(metric_array) <- c("time" = 1, dim(metric_array)) + } # If array has memb_exp (EnsCorr case), change name to 'ensemble' - # Otherwise, add ensemble dim if ("exp_memb" %in% names(dim(metric_array))) { names(dim(metric_array))[which(names(dim(metric_array)) == "exp_memb")] <- "ensemble" - } else { - dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) + # } else { + # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) } return(metric_array) -- GitLab From e1c2a954df7be776f82cb3957aceb64d803416cd Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 7 Jul 2022 14:55:36 +0200 Subject: [PATCH 22/68] Save correct time units when exporting to netCDF for daily and monthly cases --- modules/Loading/Loading.R | 3 ++- modules/Saving/Saving.R | 37 ++++++++++++++++++++----------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index c7b3ed26..8171a5a3 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -270,9 +270,10 @@ load_datasets <- function(recipe_file) { dim(obs) <- default_dims # Convert obs to s2dv_cube + + # Check for consistency between hcst and obs grid obs <- as.s2dv_cube(obs) if (!(recipe$Analysis$Regrid$type == 'none')) { - # Check for consistency between hcst and obs if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { stop("hcst and obs don't share the same latitude.") } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 506705c6..710541cb 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -101,16 +101,18 @@ save_forecast <- function(data_cube, # Generate vector containing leadtimes ## TODO: Move to a separate function? + + dates <- sort(as.Date(data$hcst$Dates$start)) + n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate$fcst_sday), + format = '%Y%m%d') if (store.freq == "monthly_mean") { - leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, - to = recipe$Analysis$Time$leadtimemax) + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% months(1) } else { - dates <- data$hcst$Dates$start - lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) - dates <- unique(as.Date(dates, format = '%Y-%m-%d')) - init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), - as.numeric(recipe$Analysis$Time$sdate$fcst_sday)) - init_date <- as.Date(init_date, format = '%Y%m%d') + # Get time difference in days leadtimes <- as.numeric(dates - init_date) } @@ -120,7 +122,7 @@ save_forecast <- function(data_cube, # Select year from array and rearrange dimensions fcst <- data_cube$data[ , , , , i, , , , ] # Select year - if (!("time" %in% dim(fcst))) { + if (!("time" %in% names(dim(fcst)))) { dim(fcst) <- c("time" = 1, dim(fcst)) } if (tolower(agg) == "global") { @@ -240,16 +242,17 @@ save_metrics <- function(skill, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq # Generate vector containing leadtimes + dates <- sort(as.Date(data$hcst$Dates$start)) + n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate$fcst_sday), + format = '%Y%m%d') if (store.freq == "monthly_mean") { - leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, - to = recipe$Analysis$Time$leadtimemax) + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% months(1) } else { - dates <- data$hcst$Dates$start - lubridate::year(dates) <- as.numeric(recipe$Analysis$Time$hcst_start) - dates <- unique(as.Date(dates, format = '%Y-%m-%d')) - init_date <- paste0(as.numeric(recipe$Analysis$Time$hcst_start), - as.numeric(recipe$Analysis$Time$sdate$fcst_sday)) - init_date <- as.Date(init_date, format = '%Y%m%d') + # Get time difference in days leadtimes <- as.numeric(dates - init_date) } # If a fcst is provided, use that as the ref. year. Otherwise use 1970. -- GitLab From 280109e6f0da6e8fcb4356c47d07e197f5da3cd7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 8 Jul 2022 13:19:37 +0200 Subject: [PATCH 23/68] Remove TODOs; apply .drop_dims to specsVerification metrics --- modules/Loading/testing_recipes/recipe_2.yml | 12 ++++++------ modules/Saving/Saving.R | 5 ++--- modules/Skill/Skill.R | 15 ++++++++++----- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml index b8f2d13a..a8d671ec 100644 --- a/modules/Loading/testing_recipes/recipe_2.yml +++ b/modules/Loading/testing_recipes/recipe_2.yml @@ -15,11 +15,11 @@ Analysis: Time: sdate: fcst_syear: '2020' - fcst_sday: '1101' + fcst_sday: '0601' hcst_start: '1993' - hcst_end: '2016' - leadtimemin: 2 - leadtimemax: 4 + hcst_end: '2006' + leadtimemin: 0 + leadtimemax: 2 Region: latmin: -10 latmax: 10 @@ -30,9 +30,9 @@ Analysis: type: to_system Workflow: Calibration: - method: SBC + method: raw Skill: - metric: RPSS + metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS Indicators: index: no Output_format: S2S4E diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 710541cb..ca735d47 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -22,8 +22,8 @@ get_global_attributes <- function(recipe) { get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { # Generates time dimensions and the corresponding metadata. - ## TODO: Get correct indices according to the start date for monthly case. - ## TODO: Format units according to standards. + ## TODO: Add calendar + ## TODO: Subseasonal and decadal if (store.freq == "monthly_mean") { switch(fcst.horizon, @@ -40,7 +40,6 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { } dim(time) <- length(time) - # metadata <- list(time = list(standard_name = stdname, sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'))) attr(time, 'variables') <- metadata diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index e4318875..d1335e0d 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -123,11 +123,16 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # Compute SpecsVerification version of the metrics ## Retain _specs in metric name for clarity? metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name - skill <- Compute_verif_metrics(exp$data, obs$data, - skill_metrics = metric_name, - verif.dims=c("syear", "sday", "sweek"), - na.rm = na.rm, - ncores = ncores) + suppressWarnings({ + invisible(capture.output( + skill <- Compute_verif_metrics(exp$data, obs$data, + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) + )) + }) + skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill } -- GitLab From ef85b20f7552bd2b0b3d4f7f5036cb12e481c24a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 11 Jul 2022 16:50:15 +0200 Subject: [PATCH 24/68] Add function to save observations; changes to testing recipes --- modules/Loading/testing_recipes/recipe_1.yml | 3 +- modules/Loading/testing_recipes/recipe_2.yml | 4 +- modules/Loading/testing_recipes/recipe_4.yml | 2 +- modules/Saving/Saving.R | 144 +++++++++++++++++-- modules/Saving/paths2save.R | 7 +- modules/test_victoria.R | 6 +- 6 files changed, 147 insertions(+), 19 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml index 041c44af..b1133cf0 100644 --- a/modules/Loading/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -33,7 +33,8 @@ Analysis: Calibration: method: qmap # Mandatory, str: Calibration method. See docu. Skill: - metric: RPSS #Mandatory, str: Skill metric or list of skill metrics. See docu. + metric: RPSS FRPSS # str: Skill metric or list of skill metrics. See docu. + prob: [[1/3, 2/3], [1/10]] # frac: Probability bins Indicators: index: no Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml index a8d671ec..920d4940 100644 --- a/modules/Loading/testing_recipes/recipe_2.yml +++ b/modules/Loading/testing_recipes/recipe_2.yml @@ -39,5 +39,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 88c3cdc7..15a2f9cf 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -18,7 +18,7 @@ Analysis: fcst_sday: '1101' hcst_start: '1993' hcst_end: '2016' - leadtimemin: 2 + leadtimemin: 0 leadtimemax: 3 Region: latmin: -10 diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index ca735d47..5caae2b3 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -5,7 +5,6 @@ source("modules/Saving/paths2save.R") get_global_attributes <- function(recipe) { # Generates metadata of interest to add to the global attributes of the # netCDF files. - parameters <- recipe$Analysis hcst_period <- paste0(parameters$Time$hcst_start, " to ", parameters$Time$hcst_end) @@ -67,15 +66,14 @@ get_latlon <- function(latitude, longitude) { # Adds dimensions and metadata to lat and lon # latitude: array containing the latitude values # longitude: array containing the longitude values + ## TODO: Extract metadata from s2dv_cube dim(longitude) <- length(longitude) - ## TODO: Extract metadata from s2dv_cube metadata <- list(longitude = list(units = 'degrees_east')) attr(longitude, 'variables') <- metadata names(dim(longitude)) <- 'longitude' dim(latitude) <- length(latitude) - ## TODO: Extract metadata from s2dv_cube metadata <- list(latitude = list(units = 'degrees_north')) attr(latitude, 'variables') <- metadata names(dim(latitude)) <- 'latitude' @@ -88,9 +86,15 @@ save_forecast <- function(data_cube, recipe, outdir, agg="global") { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" - ## Loop by year to save the hcst? Will need to select correct year - lalo <- c('longitude','latitude') + lalo <- c('longitude', 'latitude') variable <- data_cube$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name @@ -100,7 +104,6 @@ save_forecast <- function(data_cube, # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- sort(as.Date(data$hcst$Dates$start)) n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] @@ -115,8 +118,7 @@ save_forecast <- function(data_cube, leadtimes <- as.numeric(dates - init_date) } - ## TODO: Select year - sdates <- seq(1:length(data_cube$load_parameters$dat1$file_date[[1]])) + sdates <- seq(1:dim(data_cube$data)['syear'][[1]]) for (i in sdates) { # Select year from array and rearrange dimensions fcst <- data_cube$data[ , , , , i, , , , ] # Select year @@ -132,7 +134,7 @@ save_forecast <- function(data_cube, # Add metadata if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') + dims <- c('Country', 'time') var.expname <- paste0(variable, '_country') var.sdname <- paste0("Country-Aggregated ", var.longname) var.units <- attr(data_cube$Variable, 'variable')$units @@ -156,14 +158,134 @@ save_forecast <- function(data_cube, # Select start date fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + # Get time dimension values and metadata times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) time <- times$time time_step <- times$time_step + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "exp") + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst, time_step), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst, list(time_step)) + ArrayToNc(vars, outfile) + } + } + print("##### FCST SAVED TO NETCDF FILE #####") +} + + +save_observations <- function(data_cube, + recipe, + outdir, + agg="global") { + # Loops over the years in the s2dv_cube containing the observations and + # exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$Variable$varName + var.longname <- attr(data_cube$Variable, 'variable')$long_name + global_attributes <- get_global_attributes(recipe) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- sort(as.Date(data$hcst$Dates$start)) + n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate$fcst_sday), + format = '%Y%m%d') + if (store.freq == "monthly_mean") { + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% months(1) + } else { + # Get time difference in days + leadtimes <- as.numeric(dates - init_date) + } + + sdates <- seq(1:dim(data_cube$data)['syear'][[1]]) + for (i in sdates) { + # Select year from array and rearrange dimensions + fcst <- data_cube$data[ , , , , i, , , , ] # Select year + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } + + # Add metadata + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.sdname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.sdname <- var.longname + var.units <- attr(data_cube$Variable, 'variable')$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + global_attributes <- get_global_attributes(recipe) + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$Dates$start[i]) + } + # Ensure the year is correct if the first leadtime goes to the next year + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + time_step <- times$time_step + # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, fcst.sdate, fcst.sdate, - agg, fcst.horizon, "ensemble") + agg, fcst.horizon, "obs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -180,7 +302,7 @@ save_forecast <- function(data_cube, } } - print("##### FCST SAVED TO NETCDF FILE #####") + print("##### OBS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index fd3fb6a8..50157c10 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -6,10 +6,10 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { # type of metric/forecast/probability. if (horizon == "seasonal") { - shortdate <- format(as.Date(as.character(fcst.sdate),"%Y%m%d"), "%m") + shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%m") dd <- "month" } else { - shortdate <- format(as.Date(as.character(fcst.sdate),"%Y%m%d"), "%V") + shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%V") dd <- "week" } @@ -20,7 +20,8 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { switch(file.type, "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, - "ensemble" = {file <- paste0(var, gg, "_", date, "_", shortdate)}) + "exp" = {file <- paste0(var, gg, "_", date, "_", shortdate)}, + "obs" = {file <- paste0(var, gg, "-obs_", date, "_", shortdate)}) return(paste0(dir, file, ".nc")) diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 9ad269f3..5859660c 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -21,5 +21,9 @@ skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, outdir <- get_dir(recipe) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) save_metrics(skill_metrics, recipe, data$hcst, outdir, agg = "global") +# Export hindcast, forecast and observations onto outfile save_forecast(calibrated_data$hcst, recipe, outdir, agg = "global") -save_forecast(calibrated_data$fcst, recipe, outdir, agg = "global") +if (!is.null(calibrated_data$fcst)) { + save_forecast(calibrated_data$fcst, recipe, outdir, agg = "global") +} +save_observations(data$obs, recipe, outdir, agg = "global") -- GitLab From 4a99f80b925db0eca593857573b6abd9b3ac3adc Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 11 Jul 2022 16:50:56 +0200 Subject: [PATCH 25/68] Apply .drop_dims() to SpecsVerification metrics --- modules/Skill/Skill.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index d1335e0d..4c061866 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -121,18 +121,18 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics - ## Retain _specs in metric name for clarity? + ## Retain _specs in metric name for clarity metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name - suppressWarnings({ - invisible(capture.output( - skill <- Compute_verif_metrics(exp$data, obs$data, - skill_metrics = metric_name, - verif.dims=c("syear", "sday", "sweek"), - na.rm = na.rm, - ncores = ncores) - )) - }) + skill <- Compute_verif_metrics(exp$data, obs$data, + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) skill <- .drop_dims(skill) + if (metric_name == "frps") { + # Compute yearly mean for FRPS + skill <- colMeans(skill, dims = 1) + } skill_metrics[[ metric ]] <- skill } -- GitLab From 980357346663a8b49669701b7e9adb71c0a1d6c2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 13 Jul 2022 11:32:34 +0200 Subject: [PATCH 26/68] Add Sys.time() to global attributes when exporting files --- modules/Saving/Saving.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 5caae2b3..acfcbe19 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -13,7 +13,8 @@ get_global_attributes <- function(recipe) { institution = "BSC-CNS", system = parameters$Datasets$System$name, reference = parameters$Datasets$Reference$name, - calibration_method = parameters$Workflow$Calibration$method) + calibration_method = parameters$Workflow$Calibration$method, + computed_on = Sys.time()) return(attrs) -- GitLab From 3756f0836e5fb3039d8bfa437d0049a94651f6fe Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 13 Jul 2022 16:28:42 +0200 Subject: [PATCH 27/68] Add probability bins and quantiles (WIP) --- modules/Loading/testing_recipes/recipe_4.yml | 3 +- modules/Skill/Skill.R | 38 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 15a2f9cf..c5478c42 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -19,7 +19,7 @@ Analysis: hcst_start: '1993' hcst_end: '2016' leadtimemin: 0 - leadtimemax: 3 + leadtimemax: 2 Region: latmin: -10 latmax: 10 @@ -33,6 +33,7 @@ Analysis: method: mse_min Skill: metric: RPS RPSS FRPSS BSS10 BSS90 EnsCorr Corr + prob: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no Output_format: S2S4E diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 4c061866..cc89dadb 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -145,6 +145,44 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } +compute_probabilities <- function(data, recipe, na.rm = T, ncores = 1) { + + named_probs <- list() + named_quantiles <- list() + if (is.null(recipe$Analysis$Workflow$Skill$prob)) { + stop("Quantiles and probability bins have been requested, but no ", + "thresholds are provided in the recipe.") + } else { + for (element in recipe$Analysis$Workflow$Skill$prob) { + thresholds <- sapply(element, function (x) eval(parse(text = x))) + probs <- Compute_probs(data$data, thresholds, + ncores = ncores, + na.rm = na.rm) + for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { + named_quantiles <- append(named_quantiles, + list(probs$quantiles[i, , , , , , ,])) + names(named_quantiles)[length(named_quantiles)] <- paste0("quantile_", + as.integer(thresholds[i]*100)) + } + for (i in seq(1:dim(probs$probs)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs$probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, list(probs$probs[i, , , , , , , ,])) + names(named_probs)[length(named_probs)] <- name_i + } + # remove(probs) + } + return(list(probs=named_probs, quantiles=named_quantiles)) + } +} + + .drop_dims <- function(metric_array) { # Drop all singleton dimensions metric_array <- drop(metric_array) -- GitLab From a28534fe041a695453921d3d6d7525ea0d42cb9a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 14 Jul 2022 13:15:33 +0200 Subject: [PATCH 28/68] Add functions to save percentiles and probability category bins --- modules/Saving/Saving.R | 206 ++++++++++++++++++++++++++++++++++-- modules/Saving/paths2save.R | 6 +- modules/test_victoria.R | 9 ++ 3 files changed, 213 insertions(+), 8 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index acfcbe19..63cdf7fe 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -8,13 +8,14 @@ get_global_attributes <- function(recipe) { parameters <- recipe$Analysis hcst_period <- paste0(parameters$Time$hcst_start, " to ", parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) attrs <- list(reference_period = hcst_period, institution = "BSC-CNS", system = parameters$Datasets$System$name, reference = parameters$Datasets$Reference$name, calibration_method = parameters$Workflow$Calibration$method, - computed_on = Sys.time()) + computed_on = current_time) return(attrs) @@ -92,7 +93,6 @@ save_forecast <- function(data_cube, # data_cube: s2dv_cube containing the data and metadata # recipe: the auto-s2s recipe # outdir: directory where the files should be saved - # type: 'exp' (hcst and fcst) or 'obs' # agg: aggregation, "global" or "country" lalo <- c('longitude', 'latitude') @@ -119,8 +119,8 @@ save_forecast <- function(data_cube, leadtimes <- as.numeric(dates - init_date) } - sdates <- seq(1:dim(data_cube$data)['syear'][[1]]) - for (i in sdates) { + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + for (i in syears) { # Select year from array and rearrange dimensions fcst <- data_cube$data[ , , , , i, , , , ] # Select year @@ -222,8 +222,8 @@ save_observations <- function(data_cube, leadtimes <- as.numeric(dates - init_date) } - sdates <- seq(1:dim(data_cube$data)['syear'][[1]]) - for (i in sdates) { + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + for (i in syears) { # Select year from array and rearrange dimensions fcst <- data_cube$data[ , , , , i, , , , ] # Select year @@ -412,3 +412,197 @@ save_metrics <- function(skill, } +save_quantiles <- function(quantiles, + recipe, + data_cube, + outdir, + agg="global") { + # This function adds metadata to the quantiles + # and exports them to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + quantiles <- lapply(quantiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe) + attr(quantiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(quantiles)) { + ## TODO: create dictionary with proper metadata + ## TODO: replace with proper standard names + quantile <- names(quantiles[i]) + if (tolower(agg) == "country") { + sdname <- paste0(quantile, " percentile") + dims <- c('Country', 'time') + } else { + sdname <- paste0(quantile, " percentile") + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = quantile, standard_name = sdname)) + attr(quantiles[[i]], 'variables') <- metadata + names(dim(quantiles[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + # Generate vector containing leadtimes + dates <- sort(as.Date(data$hcst$Dates$start)) + n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate$fcst_sday), + format = '%Y%m%d') + if (store.freq == "monthly_mean") { + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% months(1) + } else { + # Get time difference in days + leadtimes <- as.numeric(dates - init_date) + } + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { + fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, + recipe$Analysis$Time$sdate$fcst_sday) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + time_step <- times$time_step + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "quantiles") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, quantiles, time_step), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, quantiles, list(time_step)) + ArrayToNc(vars, outfile) + } + + print("##### PERCENTILES SAVED TO NETCDF FILE #####") + +} + +save_probabilities <- function(probs, + recipe, + data_cube, + outdir, + agg="global") { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports the corresponding category probabilities to a netCDF file. + # probs: array containing the probability data + # recipe: the auto-s2s recipe + # data_cube: s2dv_cube containing the data and metadata + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$Variable$varName + var.longname <- attr(data_cube$Variable, 'variable')$long_name + global_attributes <- get_global_attributes(recipe) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- sort(as.Date(data$hcst$Dates$start)) + n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate$fcst_sday), + format = '%Y%m%d') + if (store.freq == "monthly_mean") { + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% months(1) + } else { + # Get time difference in days + leadtimes <- as.numeric(dates - init_date) + } + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(probs, function(x) { + x[i, , , ]}) + # Restore time dimension if the arrays are missing it + if (!("time" %in% names(dim(probs_syear[[1]])))) { + probs_syear <- lapply(probs_syear, function(x) { + dim(x) <- c("time" = 1, dim(x))}) + } + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) + } else { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) + } + + ## TODO: Replace for loop with something more efficient? + for (bin in 1:length(probs_syear)) { + ## TODO: create dictionary with proper metadata + ## TODO: replace with proper standard names + prob_bin <- names(probs_syear[bin]) + if (tolower(agg) == "country") { + sdname <- paste0(prob_bin, " probability category") + dims <- c('Country', 'time') + } else { + sdname <- paste0(prob_bin, " probability category") + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, standard_name = sdname)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + global_attributes <- get_global_attributes(recipe) + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + time_step <- times$time_step + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear, time_step), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear, list(time_step)) + ArrayToNc(vars, outfile) + } + } + print("##### PROBABILITIES SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 50157c10..2024ce4e 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -20,8 +20,10 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { switch(file.type, "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, - "exp" = {file <- paste0(var, gg, "_", date, "_", shortdate)}, - "obs" = {file <- paste0(var, gg, "-obs_", date, "_", shortdate)}) + "exp" = {file <- paste0(var, gg, "_", date)}, + "obs" = {file <- paste0(var, gg, "-obs_", date)}, + "quantiles" = {file <- paste0(var, gg, "-percentiles_", dd, shortdate)}, + "probs" = {file <- paste0(var, gg, "-probs_", date)}) return(paste0(dir, file, ".nc")) diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 5859660c..1a749064 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -17,10 +17,19 @@ calibrated_data <- calibrate_datasets(data, recipe) skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe, na.rm = T, ncores = 4) +probabilities <- compute_probabilities(calibrated_data$hcst, recipe, + na.rm = T, ncores = 4) + + # Export skill metrics onto outfile outdir <- get_dir(recipe) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) save_metrics(skill_metrics, recipe, data$hcst, outdir, agg = "global") + +# Export quantiles onto outfile +save_quantiles(probabilities$quantiles, recipe, data$hcst, outdir, + agg = "global") + # Export hindcast, forecast and observations onto outfile save_forecast(calibrated_data$hcst, recipe, outdir, agg = "global") if (!is.null(calibrated_data$fcst)) { -- GitLab From 88345e0ad532280f7f96830e0a846bb42c2f256b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 15 Jul 2022 10:46:55 +0200 Subject: [PATCH 29/68] Change 'quantiles' to 'percentiles'; change netCDF time units to 'hours since'; minor format fixes --- modules/Saving/Saving.R | 103 +++++++++++------------------------- modules/Saving/paths2save.R | 3 +- modules/Skill/Skill.R | 5 +- modules/test_victoria.R | 22 ++++---- 4 files changed, 45 insertions(+), 88 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 63cdf7fe..1bd7adcc 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -16,29 +16,21 @@ get_global_attributes <- function(recipe) { reference = parameters$Datasets$Reference$name, calibration_method = parameters$Workflow$Calibration$method, computed_on = current_time) - + return(attrs) - } get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { # Generates time dimensions and the corresponding metadata. ## TODO: Add calendar ## TODO: Subseasonal and decadal + ## TODO: Remove time_step var? - if (store.freq == "monthly_mean") { - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'months since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'weeks since '; - stdname <- ''}) - # time <- 1:len ## Is this correct? I think it needs to be changed - - } else if (store.freq == "daily_mean") { - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'days since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}) - } + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}) dim(time) <- length(time) sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting @@ -61,7 +53,6 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { names(dim(sdate)) <- 'sdate' return(list(time_step=time_step, time=time, sdate=sdate)) - } get_latlon <- function(latitude, longitude) { @@ -111,13 +102,8 @@ save_forecast <- function(data_cube, init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate$fcst_sday), format = '%Y%m%d') - if (store.freq == "monthly_mean") { - # Get time difference in months - leadtimes <- interval(init_date, dates) %/% months(1) - } else { - # Get time difference in days - leadtimes <- as.numeric(dates - init_date) - } + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) syears <- seq(1:dim(data_cube$data)['syear'][[1]]) for (i in syears) { @@ -214,13 +200,8 @@ save_observations <- function(data_cube, init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate$fcst_sday), format = '%Y%m%d') - if (store.freq == "monthly_mean") { - # Get time difference in months - leadtimes <- interval(init_date, dates) %/% months(1) - } else { - # Get time difference in days - leadtimes <- as.numeric(dates - init_date) - } + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) syears <- seq(1:dim(data_cube$data)['syear'][[1]]) for (i in syears) { @@ -302,9 +283,7 @@ save_observations <- function(data_cube, ArrayToNc(vars, outfile) } } - print("##### OBS SAVED TO NETCDF FILE #####") - } ## TODO: Place inside a function somewhere @@ -370,13 +349,8 @@ save_metrics <- function(skill, init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate$fcst_sday), format = '%Y%m%d') - if (store.freq == "monthly_mean") { - # Get time difference in months - leadtimes <- interval(init_date, dates) %/% months(1) - } else { - # Get time difference in days - leadtimes <- as.numeric(dates - init_date) - } + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, @@ -407,45 +381,43 @@ save_metrics <- function(skill, vars <- c(vars, skill, list(time_step)) ArrayToNc(vars, outfile) } - print("##### SKILL METRICS SAVED TO NETCDF FILE #####") - } -save_quantiles <- function(quantiles, +save_percentiles <- function(percentiles, recipe, data_cube, outdir, agg="global") { - # This function adds metadata to the quantiles + # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. # Define grid dimensions and names lalo <- c('longitude', 'latitude') # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { - quantiles <- lapply(quantiles, function(x) { + percentiles <- lapply(percentiles, function(x) { Reorder(x, c(lalo, 'time'))}) } # Add global and variable attributes global_attributes <- get_global_attributes(recipe) - attr(quantiles[[1]], 'global_attrs') <- global_attributes + attr(percentiles[[1]], 'global_attrs') <- global_attributes - for (i in 1:length(quantiles)) { + for (i in 1:length(percentiles)) { ## TODO: create dictionary with proper metadata ## TODO: replace with proper standard names - quantile <- names(quantiles[i]) + percentile <- names(percentiles[i]) if (tolower(agg) == "country") { - sdname <- paste0(quantile, " percentile") + sdname <- paste0(gsub("^.*_", "", percentile), "th percentile") dims <- c('Country', 'time') } else { - sdname <- paste0(quantile, " percentile") + sdname <- paste0(gsub("^.*_", "", percentile), "th percentile") dims <- c(lalo, 'time') } - metadata <- list(metric = list(name = quantile, standard_name = sdname)) - attr(quantiles[[i]], 'variables') <- metadata - names(dim(quantiles[[i]])) <- dims + metadata <- list(metric = list(name = percentile, standard_name = sdname)) + attr(percentiles[[i]], 'variables') <- metadata + names(dim(percentiles[[i]])) <- dims } # Time indices and metadata @@ -458,13 +430,9 @@ save_quantiles <- function(quantiles, init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate$fcst_sday), format = '%Y%m%d') - if (store.freq == "monthly_mean") { - # Get time difference in months - leadtimes <- interval(init_date, dates) %/% months(1) - } else { - # Get time difference in days - leadtimes <- as.numeric(dates - init_date) - } + # Get time difference in hours + leadtimes <- interval(init_date, dates) %/% hours(1) + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, @@ -480,24 +448,22 @@ save_quantiles <- function(quantiles, # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, fcst.sdate, fcst.sdate, - agg, fcst.horizon, "quantiles") + agg, fcst.horizon, "percentiles") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, quantiles, time_step), outfile) + ArrayToNc(append(country, time, percentiles, time_step), outfile) } else { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, quantiles, list(time_step)) + vars <- c(vars, percentiles, list(time_step)) ArrayToNc(vars, outfile) } - print("##### PERCENTILES SAVED TO NETCDF FILE #####") - } save_probabilities <- function(probs, @@ -530,13 +496,8 @@ save_probabilities <- function(probs, init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate$fcst_sday), format = '%Y%m%d') - if (store.freq == "monthly_mean") { - # Get time difference in months - leadtimes <- interval(init_date, dates) %/% months(1) - } else { - # Get time difference in days - leadtimes <- as.numeric(dates - init_date) - } + # Get time difference in hours + leadtimes <- interval(init_date, dates) %/% hours(1) syears <- seq(1:dim(data_cube$data)['syear'][[1]]) for (i in syears) { diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 2024ce4e..cabcccc0 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -22,7 +22,8 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, "exp" = {file <- paste0(var, gg, "_", date)}, "obs" = {file <- paste0(var, gg, "-obs_", date)}, - "quantiles" = {file <- paste0(var, gg, "-percentiles_", dd, shortdate)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, + shortdate)}, "probs" = {file <- paste0(var, gg, "-probs_", date)}) return(paste0(dir, file, ".nc")) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index cc89dadb..c337e8a9 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -161,7 +161,7 @@ compute_probabilities <- function(data, recipe, na.rm = T, ncores = 1) { for (i in seq(1:dim(probs$quantiles)['bin'][[1]])) { named_quantiles <- append(named_quantiles, list(probs$quantiles[i, , , , , , ,])) - names(named_quantiles)[length(named_quantiles)] <- paste0("quantile_", + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", as.integer(thresholds[i]*100)) } for (i in seq(1:dim(probs$probs)['bin'][[1]])) { @@ -178,7 +178,7 @@ compute_probabilities <- function(data, recipe, na.rm = T, ncores = 1) { } # remove(probs) } - return(list(probs=named_probs, quantiles=named_quantiles)) + return(list(probs=named_probs, percentiles=named_quantiles)) } } @@ -197,6 +197,5 @@ compute_probabilities <- function(data, recipe, na.rm = T, ncores = 1) { # } else { # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) } - return(metric_array) } diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 1a749064..0a5834b8 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,6 +1,5 @@ - -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -17,22 +16,19 @@ calibrated_data <- calibrate_datasets(data, recipe) skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe, na.rm = T, ncores = 4) -probabilities <- compute_probabilities(calibrated_data$hcst, recipe, +probs <- compute_probabilities(calibrated_data$hcst, recipe, na.rm = T, ncores = 4) - # Export skill metrics onto outfile outdir <- get_dir(recipe) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) -save_metrics(skill_metrics, recipe, data$hcst, outdir, agg = "global") - -# Export quantiles onto outfile -save_quantiles(probabilities$quantiles, recipe, data$hcst, outdir, - agg = "global") - +save_metrics(skill_metrics, recipe, data$hcst, outdir) +# Export percentiles and probability bins onto outfile +save_percentiles(probs$percentiles, recipe, data$hcst, outdir) +save_probabilities(probs$probs, recipe, data$hcst, outdir) # Export hindcast, forecast and observations onto outfile -save_forecast(calibrated_data$hcst, recipe, outdir, agg = "global") +save_forecast(calibrated_data$hcst, recipe, outdir) if (!is.null(calibrated_data$fcst)) { - save_forecast(calibrated_data$fcst, recipe, outdir, agg = "global") + save_forecast(calibrated_data$fcst, recipe, outdir) } -save_observations(data$obs, recipe, outdir, agg = "global") +save_observations(data$obs, recipe, outdir) -- GitLab From 6c084a120dfcf44eb37f41af1ffad821ec7e02cc Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 15 Jul 2022 10:47:23 +0200 Subject: [PATCH 30/68] Add prob thresholds to recipes for testing --- modules/Loading/testing_recipes/recipe_1.yml | 2 +- modules/Loading/testing_recipes/recipe_3.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml index b1133cf0..624f1b38 100644 --- a/modules/Loading/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -34,7 +34,7 @@ Analysis: method: qmap # Mandatory, str: Calibration method. See docu. Skill: metric: RPSS FRPSS # str: Skill metric or list of skill metrics. See docu. - prob: [[1/3, 2/3], [1/10]] # frac: Probability bins + prob: [[1/3, 2/3], [1/10, 9/10]] # frac: Probability bins Indicators: index: no Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml index 1db811db..53fe0719 100644 --- a/modules/Loading/testing_recipes/recipe_3.yml +++ b/modules/Loading/testing_recipes/recipe_3.yml @@ -33,6 +33,7 @@ Analysis: method: qmap Skill: metric: FRPS RPSS + prob: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no Output_format: S2S4E -- GitLab From f73a80749d1f23b876a368e52e9a69e26b9fa1e7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 15 Jul 2022 15:55:59 +0200 Subject: [PATCH 31/68] Change leadtimemin and leadtimemax to ftime_min and ftime_max --- modules/Loading/Loading.R | 8 ++++---- modules/Loading/testing_recipes/recipe_1.yml | 4 ++-- modules/Loading/testing_recipes/recipe_2.yml | 5 +++-- modules/Loading/testing_recipes/recipe_3.yml | 4 ++-- modules/Loading/testing_recipes/recipe_4.yml | 4 ++-- modules/Loading/testing_recipes/recipe_5.yml | 4 ++-- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 8171a5a3..455ed483 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -42,13 +42,13 @@ load_datasets <- function(recipe_file) { idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, - recipe$Analysis$Time$leadtimemin, - recipe$Analysis$Time$leadtimemax, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, time_freq=store.freq) idxs$fcst <- get_timeidx(sdates$fcst, - recipe$Analysis$Time$leadtimemin, - recipe$Analysis$Time$leadtimemax, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, time_freq=store.freq) ## TODO: Examine this verifications part, verify if it's necessary diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml index 624f1b38..02a7a1df 100644 --- a/modules/Loading/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -19,8 +19,8 @@ Analysis: fcst_sday: '1101' # Mandatory, int: Start date, 'MMDD' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' - leadtimemin: 0 # Mandatory, int: First leadtime time step in months - leadtimemax: 1 # Mandatory, int: Last leadtime time step in months + ftime_min: 0 # Mandatory, int: First leadtime time step in months + ftime_max: 1 # Mandatory, int: Last leadtime time step in months Region: latmin: -10 # Mandatory, int: minimum latitude latmax: 10 # Mandatory, int: maximum latitude diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml index 920d4940..7c1980c4 100644 --- a/modules/Loading/testing_recipes/recipe_2.yml +++ b/modules/Loading/testing_recipes/recipe_2.yml @@ -18,8 +18,8 @@ Analysis: fcst_sday: '0601' hcst_start: '1993' hcst_end: '2006' - leadtimemin: 0 - leadtimemax: 2 + ftime_min: 0 + ftime_max: 2 Region: latmin: -10 latmax: 10 @@ -33,6 +33,7 @@ Analysis: method: raw Skill: metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + prob: [[1/3, 2/3]] Indicators: index: no Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml index 53fe0719..418e50e7 100644 --- a/modules/Loading/testing_recipes/recipe_3.yml +++ b/modules/Loading/testing_recipes/recipe_3.yml @@ -18,8 +18,8 @@ Analysis: fcst_sday: '1101' hcst_start: '1993' hcst_end: '2003' - leadtimemin: 0 - leadtimemax: 1 + ftime_min: 0 + ftime_max: 1 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index c5478c42..3a936123 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -18,8 +18,8 @@ Analysis: fcst_sday: '1101' hcst_start: '1993' hcst_end: '2016' - leadtimemin: 0 - leadtimemax: 2 + ftime_min: 0 + ftime_max: 2 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_5.yml b/modules/Loading/testing_recipes/recipe_5.yml index a3da66a5..e05e890a 100644 --- a/modules/Loading/testing_recipes/recipe_5.yml +++ b/modules/Loading/testing_recipes/recipe_5.yml @@ -18,8 +18,8 @@ Analysis: fcst_sday: '0301' hcst_start: '1993' hcst_end: '2016' - leadtimemin: 0 - leadtimemax: 0 + ftime_min: 0 + ftime_max: 0 Region: latmin: -10 latmax: 10 -- GitLab From f87f6cbf199f95d4c26aec594815e851a8387e13 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 15 Jul 2022 15:57:52 +0200 Subject: [PATCH 32/68] Add variable dictionary --- conf/variable-dictionary.yml | 33 ++++++++++++++++++++++++++ modules/Saving/vitigeoss-vars-dict.yml | 20 ---------------- 2 files changed, 33 insertions(+), 20 deletions(-) create mode 100644 conf/variable-dictionary.yml delete mode 100644 modules/Saving/vitigeoss-vars-dict.yml diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml new file mode 100644 index 00000000..e1a0e702 --- /dev/null +++ b/conf/variable-dictionary.yml @@ -0,0 +1,33 @@ + +vars: + +# ECVs + tas: + units: "K" + long_name: "Near-Surface Air Temperature" + standard_name: "air_temperature" + outname: "t2" + tasmax: + units: "K" + long_name: "Maximum Near-Surface Air Temperature" + standard_name: "air_temperature" + tasmin: + units: "K" + long_name: "Minimum Near-Surface Air Temperature" + standard_name: "air_temperature" + sfcWind: + units: "m s-1" + long_name: "Near-Surface Wind Speed" + standard_name: "wind_speed" + outname: "wind" + rsds: + units: "W m-2" + long_name: "Surface Downwelling Shortwave Radiation" + standard_name: "surface_downwelling_shortwave_flux_in_air" + positive: "down" + outname: "rswin" + prlr: + units: "mm" + long_name: "Total precipitation" + standard_name: "total_precipitation_flux" #? Not in CF + outname: "acprec" diff --git a/modules/Saving/vitigeoss-vars-dict.yml b/modules/Saving/vitigeoss-vars-dict.yml deleted file mode 100644 index 759a59ee..00000000 --- a/modules/Saving/vitigeoss-vars-dict.yml +++ /dev/null @@ -1,20 +0,0 @@ - -vars: - -# ECVs - tas: - units: "°C" - longname: "2m temperature" - outname: "t2" - sfcWind: - units: "m/s" - longname: "Surface wind speed module" - outname: "wind" - rsds: - units: "W/m2" - longname: "shortwave radiation at ground" - outname: "rswin" - prlr: - units: "mm" - longname: "accumulated precipitation" - outname: "acprec" \ No newline at end of file -- GitLab From 55c914ee8dddcf8dd4945b01fd703f4bbe41b21e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 18 Jul 2022 14:24:35 +0200 Subject: [PATCH 33/68] Add a TODO comment --- modules/Loading/Loading.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 455ed483..a97f0fdb 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -138,9 +138,15 @@ load_datasets <- function(recipe_file) { latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(hcst))] <- dim(hcst) dim(hcst) <- default_dims +# default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) +# names(dim(attr(hcst, "Variables")$common$time))[which(names(dim( +# attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" +# dim(attr(hcst, "Variables")$common$time) <- default_time_dims } # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates$start + ## (sday, sweek, syear instead of file_date) hcst <- as.s2dv_cube(hcst) # Load forecast -- GitLab From 5d3b04c9de4c8281f09ff87d28443eb66b4a0254 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 19 Jul 2022 16:44:20 +0200 Subject: [PATCH 34/68] Change 'enscorr' to 'corr' --- modules/Saving/Saving.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 1bd7adcc..9bd0d90e 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,4 +1,5 @@ -## TODO: Implement wrapper to get grid and time info? +## TODO: Add function to save corr +## TODO: Save obs percentiles source("modules/Saving/paths2save.R") @@ -306,8 +307,8 @@ save_metrics <- function(skill, # Remove ensemble correlation from the list since it should be saved in # a separate file, as it has 'ensemble' dim. if ("corr" %in% names(skill)) { - enscorr_metrics <- grep("^corr", names(skill)) - skill <- skill[-enscorr_metrics] + corr_metrics <- grep("^corr", names(skill)) + skill <- skill[-corr_metrics] } # Define grid dimensions and names -- GitLab From 5407c63b0d9437129af96d4d99c1294ae5a68b55 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 25 Jul 2022 15:58:36 +0200 Subject: [PATCH 35/68] Set parameter wet.day to FALSE in CST_QuantileMapping() to avoid undesired zeros --- modules/Calibration/Calibration.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 5178b10b..6a7eb1dd 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -96,6 +96,7 @@ calibrate_datasets <- function(data, recipe) { "ensemble"), sample_length = NULL, method = "QUANT", + wet.day = FALSE, ncores = ncores, na.rm = na.rm) @@ -108,6 +109,7 @@ calibrate_datasets <- function(data, recipe) { "ensemble"), sample_length = NULL, method = "QUANT", + wet.day = FALSE, ncores = ncores, na.rm = na.rm) } else { -- GitLab From 6a7303dd0638ae7abd456109e37bda3f2391fc42 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Jul 2022 09:18:16 +0200 Subject: [PATCH 36/68] Add 'complete' message for quantiles' --- modules/Skill/Skill.R | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index c337e8a9..c099124c 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -71,14 +71,14 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) + Fair = Fair, ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) + Fair = Fair, ncores = ncores) skill$rpss <- .drop_dims(skill$rpss) skill$sign <- .drop_dims(skill$sign) skill_metrics[[ metric ]] <- skill$rpss @@ -134,15 +134,10 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { skill <- colMeans(skill, dims = 1) } skill_metrics[[ metric ]] <- skill - } - } - print("##### SKILL METRIC COMPUTATION COMPLETE #####") - return(skill_metrics) - } compute_probabilities <- function(data, recipe, na.rm = T, ncores = 1) { @@ -178,8 +173,9 @@ compute_probabilities <- function(data, recipe, na.rm = T, ncores = 1) { } # remove(probs) } - return(list(probs=named_probs, percentiles=named_quantiles)) } + print("##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + return(list(probs=named_probs, percentiles=named_quantiles)) } -- GitLab From 2335fb4148b29ebee1352feb5141e9e7b75c7970 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Jul 2022 10:59:36 +0200 Subject: [PATCH 37/68] Include unit tests in this branch, modify seasonal unit test --- tests/recipes/recipe-decadal_daily_1.yml | 44 ++++ tests/recipes/recipe-decadal_monthly_1.yml | 45 ++++ tests/recipes/recipe-decadal_monthly_2.yml | 44 ++++ tests/recipes/recipe-seasonal_monthly_1.yml | 43 ++++ tests/test_decadal.R | 8 + tests/test_seasonal.R | 9 + tests/testthat/test-decadal_daily_1.R | 221 ++++++++++++++++++++ tests/testthat/test-decadal_monthly_1.R | 197 +++++++++++++++++ tests/testthat/test-decadal_monthly_2.R | 214 +++++++++++++++++++ tests/testthat/test-seasonal_monthly.R | 190 +++++++++++++++++ 10 files changed, 1015 insertions(+) create mode 100644 tests/recipes/recipe-decadal_daily_1.yml create mode 100644 tests/recipes/recipe-decadal_monthly_1.yml create mode 100644 tests/recipes/recipe-decadal_monthly_2.yml create mode 100644 tests/recipes/recipe-seasonal_monthly_1.yml create mode 100644 tests/test_decadal.R create mode 100644 tests/test_seasonal.R create mode 100644 tests/testthat/test-decadal_daily_1.R create mode 100644 tests/testthat/test-decadal_monthly_1.R create mode 100644 tests/testthat/test-decadal_monthly_2.R create mode 100644 tests/testthat/test-seasonal_monthly.R diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml new file mode 100644 index 00000000..ed81146f --- /dev/null +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -0,0 +1,44 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 + member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 + Time: + fcst: [2017,2018] + hcst_start: 1990 + hcst_end: 1992 + season: 'Annual' + ftime_min: 2 + ftime_max: 4 + Region: + latmin: 10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 15 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Calibration: + method: qmap + Skill: + metric: RPSS + Indicators: + index: FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml new file mode 100644 index 00000000..8312f81a --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -0,0 +1,45 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1 + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + sdate: + fcst: 2021 + hcst_start: 1991 + hcst_end: 1994 +# season: 'Annual' + ftime_min: 0 + ftime_max: 2 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Calibration: + method: bias + Skill: + metric: RPSS + Indicators: + index: FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml new file mode 100644 index 00000000..040bbe3e --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -0,0 +1,44 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 #CanESM5 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst: [2020,2021] + hcst_start: 1990 + hcst_end: 1992 +# season: 'Annual' + ftime_min: 0 + ftime_max: 13 + Region: + latmin: -60 #-90 + latmax: -55 #90 + lonmin: -2 + lonmax: 2 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Calibration: + method: bias + Skill: + metric: RPSS + Indicators: + index: FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml new file mode 100644 index 00000000..d8b32a61 --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: + fcst_syear: '2020' + fcst_sday: '1101' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 0 + ftime_max: 2 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPSS + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/test_decadal.R b/tests/test_decadal.R new file mode 100644 index 00000000..84a23b62 --- /dev/null +++ b/tests/test_decadal.R @@ -0,0 +1,8 @@ +library(testthat) + +path_testthat <- file.path('./tests/testthat/') +files_testthat <- list.files('./tests/testthat/', pattern = 'decadal') + +for (i_file in 1:length(files_testthat)) { + source(paste0('./tests/testthat/', files_testthat[i_file])) +} diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R new file mode 100644 index 00000000..4718e3d4 --- /dev/null +++ b/tests/test_seasonal.R @@ -0,0 +1,9 @@ +library(testthat) + +path_testthat <- file.path('./tests/testthat/') +files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') + +for (i_file in 1:length(files_testthat)) { + source(paste0('./tests/testthat/', files_testthat[i_file])) +} + diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R new file mode 100644 index 00000000..15020591 --- /dev/null +++ b/tests/testthat/test-decadal_daily_1.R @@ -0,0 +1,221 @@ +context("Decadal daily data - 1") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") + +recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +#recipe <- read_yaml(recipe_file) +## Calibrate datasets +#suppressWarnings({invisible(capture.output( +# calibrated_data <- calibrate_datasets(data, recipe) +#))}) +# +## Compute skill metrics +#suppressWarnings({invisible(capture.output( +#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) +#))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 3, time = 90) +) +# hcst data +expect_equal( +as.vector(drop(data$hcst$data)[1, 2:3, 1:3, 1, 1]), +c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(data$hcst$data)[2, , 89:90, 1, 1]), +c(301.6978, 308.9792, 308.4501, 302.1620, 307.6034, 307.6388), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +301.2666, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(285.9326, 314.9579), +tolerance = 0.0001 +) + +# fcst data +expect_equal( +as.vector(drop(data$fcst$data)[1, , 1:3, 1, 1]), +c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(data$fcst$data)[2, , 89:90, 1, 1]), +c(305.3428, 305.0657, 305.5445, 305.5681), +tolerance = 0.0001 +) + +# time value +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1991-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1992-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1992-01-02 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[1, 1, 3, 90], +as.POSIXct("1993-03-31 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[1, 1, 2, 90], +as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') +) + +}) + +##====================================== +#test_that("2. Calibration", { +# +#expect_equal( +#is.list(calibrated_data), +#TRUE +#) +#expect_equal( +#names(calibrated_data), +#c("hcst", "fcst") +#) +#expect_equal( +#class(calibrated_data$hcst), +#"s2dv_cube" +#) +#expect_equal( +#class(calibrated_data$fcst), +#"s2dv_cube" +#) +#expect_equal( +#dim(calibrated_data$hcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(calibrated_data$fcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#mean(calibrated_data$fcst$data), +#291.8375, +#tolerance = 0.0001 +#) +#expect_equal( +#mean(calibrated_data$hcst$data), +#289.6679, +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +#c(286.3895, 286.6408, 290.6652, 288.3759), +#tolerance = 0.0001 +#) +#expect_equal( +#range(calibrated_data$fcst$data), +#c(287.2173, 297.4578), +#tolerance = 0.0001 +#) +# +#}) +# +# +##====================================== +#test_that("3. Metrics", { +# +#expect_equal( +#is.list(skill_metrics), +#TRUE +#) +#expect_equal( +#names(skill_metrics), +#c("rpss", "rpss_significance") +#) +#expect_equal( +#class(skill_metrics$rpss[[1]]), +#"array" +#) +#expect_equal( +#dim(skill_metrics$rpss[[1]]), +#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(skill_metrics$rpss_significance[[1]]), +#dim(skill_metrics$rpss[[1]]) +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +#c(-0.2857143, -1.2500000, -1.8928571), +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +#rep(FALSE, 3) +#) +# +#}) + + diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R new file mode 100644 index 00000000..19560921 --- /dev/null +++ b/tests/testthat/test-decadal_monthly_1.R @@ -0,0 +1,197 @@ +context("Decadal monthly data - 1") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") + +recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +recipe <- read_yaml(recipe_file) +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data <- calibrate_datasets(data, recipe) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, + recipe, na.rm = T, ncores = 4) +))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(drop(data$hcst$data)[,1:2,1,2,3]), +c(291.3831, 291.6227, 292.3012, 290.9779), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +287.3804, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(281.7395, 294.2467), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1991-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1992-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[10], +as.POSIXct("1993-01-16 12:00:00", tz = 'UTC') +) + +}) + +#====================================== +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(calibrated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(calibrated_data$fcst$data), +c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +mean(calibrated_data$fcst$data), +291.8375, +tolerance = 0.0001 +) +expect_equal( +mean(calibrated_data$hcst$data), +289.6679, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +c(286.3895, 286.6408, 290.6652, 288.3759), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$fcst$data), +c(287.2173, 297.4578), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance") +) +expect_equal( +class(skill_metrics$rpss[[1]]), +"array" +) +expect_equal( +dim(skill_metrics$rpss[[1]]), +c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(skill_metrics$rpss_significance[[1]]), +dim(skill_metrics$rpss[[1]]) +) +expect_equal( +as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +c(-0.2857143, -1.2500000, -1.8928571), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +rep(FALSE, 3) +) + +}) + + diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R new file mode 100644 index 00000000..ac80fe77 --- /dev/null +++ b/tests/testthat/test-decadal_monthly_2.R @@ -0,0 +1,214 @@ +context("Decadal monthly data - 2") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") + +recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +#recipe <- read_yaml(recipe_file) +## Calibrate datasets +#suppressWarnings({invisible(capture.output( +# calibrated_data <- calibrate_datasets(data, recipe) +#))}) +# +## Compute skill metrics +#suppressWarnings({invisible(capture.output( +#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) +#))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 3, time = 14) +) +#expect_equal( +#dim(data$fcst$Dates$start), +#c(time = 14) +#) +# hcst data +expect_equal( +as.vector(drop(data$hcst$data)[1, , 1:2, 2, 2]), +c(272.8613, 271.0689, 270.8007, 273.5594, 272.1561, 272.8729), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +269.8822, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(253.8541, 276.6805), +tolerance = 0.0001 +) +# fcst data +expect_equal( +as.vector(drop(data$fcst$data)[1, , 1:2, 2, 2]), +c(271.7708, 271.8424, 272.4980, 273.5842), +tolerance = 0.0001 +) +expect_equal( +mean(data$fcst$data), +271.2158, +tolerance = 0.0001 +) + +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1990-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1991-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[10], +as.POSIXct("1991-02-15", tz = 'UTC') +) + +}) + +#====================================== +#test_that("2. Calibration", { +# +#expect_equal( +#is.list(calibrated_data), +#TRUE +#) +#expect_equal( +#names(calibrated_data), +#c("hcst", "fcst") +#) +#expect_equal( +#class(calibrated_data$hcst), +#"s2dv_cube" +#) +#expect_equal( +#class(calibrated_data$fcst), +#"s2dv_cube" +#) +#expect_equal( +#dim(calibrated_data$hcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(calibrated_data$fcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#mean(calibrated_data$fcst$data), +#291.8375, +#tolerance = 0.0001 +#) +#expect_equal( +#mean(calibrated_data$hcst$data), +#289.6679, +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +#c(286.3895, 286.6408, 290.6652, 288.3759), +#tolerance = 0.0001 +#) +#expect_equal( +#range(calibrated_data$fcst$data), +#c(287.2173, 297.4578), +#tolerance = 0.0001 +#) +# +#}) +# +# +##====================================== +#test_that("3. Metrics", { +# +#expect_equal( +#is.list(skill_metrics), +#TRUE +#) +#expect_equal( +#names(skill_metrics), +#c("rpss", "rpss_significance") +#) +#expect_equal( +#class(skill_metrics$rpss[[1]]), +#"array" +#) +#expect_equal( +#dim(skill_metrics$rpss[[1]]), +#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(skill_metrics$rpss_significance[[1]]), +#dim(skill_metrics$rpss[[1]]) +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +#c(-0.2857143, -1.2500000, -1.8928571), +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +#rep(FALSE, 3) +#) +# +#}) +# +# diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R new file mode 100644 index 00000000..dbd1ee12 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly.R @@ -0,0 +1,190 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +recipe <- read_yaml(recipe_file) + +suppressWarnings({invisible(capture.output( +calibrated_data <- calibrate_datasets(data, recipe) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, + recipe, na.rm = T, ncores = 4) +))}) + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +dim(data$hcst$Dates$start), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(293.9651, 295.9690, 290.6771, 290.7957), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +290.8758, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(284.7413, 299.6219), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1993-12-01", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1994-12-01", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1994-01-01", tz = 'UTC') +) +expect_equal( +(data$obs$Dates$start)[10], +as.POSIXct("1995-02-14", tz = 'UTC') +) + +}) + +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(calibrated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(calibrated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +mean(calibrated_data$fcst$data), +291.1218, +tolerance = 0.0001 +) +expect_equal( +mean(calibrated_data$hcst$data), +289.8596, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +c(287.7982, 287.0422, 290.4297), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$fcst$data), +c(283.5374, 306.2353), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 3, longitude = 3) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(skill_metrics$rpss)[, 2, 3], +c(-1.153829, -1.114743, -1.392457), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$rpss_significance)[, 2, 3], +rep(FALSE, 3) +) + +}) -- GitLab From 790137a6af0b321d0e93b31234358972fdf66380 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Jul 2022 11:09:48 +0200 Subject: [PATCH 38/68] Change skill metric array dimensions in unit test --- tests/testthat/test-seasonal_monthly.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index dbd1ee12..60d9a1a5 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -171,7 +171,7 @@ class(skill_metrics$rpss), ) expect_equal( dim(skill_metrics$rpss), -c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 3, longitude = 3) +c(time = 3, latitude = 3, longitude = 3) ) expect_equal( dim(skill_metrics$rpss_significance), -- GitLab From 3867f1ce873399432150b67431cf47354a98dfd6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Jul 2022 11:19:07 +0200 Subject: [PATCH 39/68] Fix seasonal monthly unit test --- tests/testthat/test-seasonal_monthly.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 60d9a1a5..1cb88618 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -178,12 +178,12 @@ dim(skill_metrics$rpss_significance), dim(skill_metrics$rpss) ) expect_equal( -as.vector(skill_metrics$rpss)[, 2, 3], +as.vector(skill_metrics$rpss[, 2, 3]), c(-1.153829, -1.114743, -1.392457), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$rpss_significance)[, 2, 3], +as.vector(skill_metrics$rpss_significance[, 2, 3]), rep(FALSE, 3) ) -- GitLab From 31f2511b579d6e6da9f3fc859dd07f6dbd2fc835 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Jul 2022 13:35:25 +0200 Subject: [PATCH 40/68] Remove duplicated hcst, fcst and obs --- modules/Calibration/Calibration.R | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 6a7eb1dd..efe1ce2d 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -9,9 +9,6 @@ calibrate_datasets <- function(data, recipe) { # # data: list of s2dv_cube objects containing the hcst, obs and fcst. # recipe: object obtained when passing the .yml recipe file to read_yaml() - hcst <- data$hcst - obs <- data$obs - fcst <- data$fcst method <- tolower(recipe$Analysis$Workflow$Calibration$method) @@ -19,8 +16,8 @@ calibrate_datasets <- function(data, recipe) { warning("The Calibration module has been called, but the calibration ", "method in the recipe is 'raw'. The hcst and fcst will not be ", "calibrated.") - fcst_calibrated <- fcst - hcst_calibrated <- hcst + fcst_calibrated <- data$fcst + hcst_calibrated <- data$hcst CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" } else { @@ -32,12 +29,12 @@ calibrate_datasets <- function(data, recipe) { # Replicate observation array for the multi-model case if (mm) { obs.mm <- obs$data - for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, obs$data, - along=which(names(dim(obs$data)) == 'dat')) + for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, data$obs$data, + along=which(names(dim(data$obs$data)) == 'dat')) } names(dim(obs.mm)) <- names(dim(obs$data)) - obs$data <- obs.mm + data$obs$data <- obs.mm remove(obs.mm) } @@ -52,7 +49,7 @@ calibrate_datasets <- function(data, recipe) { } else { ## Alba's version of CST_Calibration (pending merge) is being used # Calibrate the hindcast - hcst_calibrated <- CST_Calibration(hcst, obs, + hcst_calibrated <- CST_Calibration(data$hcst, data$obs, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -65,7 +62,7 @@ calibrate_datasets <- function(data, recipe) { ncores = ncores) if (!is.null(fcst)) { # Calibrate the forecast - fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -89,7 +86,7 @@ calibrate_datasets <- function(data, recipe) { "frequency. Only quantile mapping 'qmap' is implemented.") } # Calibrate the hindcast - hcst_calibrated <- CST_QuantileMapping(hcst, obs, + hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, exp_cor = NULL, sample_dims = c("syear", "time", @@ -102,8 +99,8 @@ calibrate_datasets <- function(data, recipe) { if (!is.null(fcst)) { # Calibrate the forecast - fcst_calibrated <- CST_QuantileMapping(hcst, obs, - exp_cor = fcst, + fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = data$fcst, sample_dims = c("syear", "time", "ensemble"), -- GitLab From 9348f3a90557e4ba283661b403ad16c7c402708c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Jul 2022 13:39:25 +0200 Subject: [PATCH 41/68] Fix bug related to last commit --- modules/Calibration/Calibration.R | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index efe1ce2d..273f5a08 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -60,7 +60,7 @@ calibrate_datasets <- function(data, recipe) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) - if (!is.null(fcst)) { + if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, cal.method = method, @@ -76,9 +76,7 @@ calibrate_datasets <- function(data, recipe) { } else { fcst_calibrated <- NULL } - } - } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { @@ -97,7 +95,7 @@ calibrate_datasets <- function(data, recipe) { ncores = ncores, na.rm = na.rm) - if (!is.null(fcst)) { + if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, exp_cor = data$fcst, @@ -112,14 +110,9 @@ calibrate_datasets <- function(data, recipe) { } else { fcst_calibrated <- NULL } - } - } - - print(CALIB_MSG) +print(CALIB_MSG) ## TODO: Return observations too? - ## TODO: Change naming convention? return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) - } -- GitLab From 4fbeabe6e76cb888594d1788225f46e777491159 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 27 Jul 2022 13:08:34 +0200 Subject: [PATCH 42/68] Revise unit test for new dev; add calibration and skill test to monthly_2 test --- .../testing_recipes/recipe_decadal.yml | 3 +- .../testing_recipes/recipe_decadal_daily.yml | 3 +- tests/recipes/recipe-decadal_daily_1.yml | 1 + tests/recipes/recipe-decadal_monthly_1.yml | 2 +- tests/recipes/recipe-decadal_monthly_2.yml | 7 +- tests/testthat/test-decadal_daily_1.R | 2 +- tests/testthat/test-decadal_monthly_1.R | 60 ++++- tests/testthat/test-decadal_monthly_2.R | 215 ++++++++++-------- 8 files changed, 181 insertions(+), 112 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index d4e568d1..0e9e1851 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -32,7 +32,8 @@ Analysis: Calibration: method: bias Skill: - metric: RPSS + metric: RPSS Corr + prob: [[1/3, 2/3]] Indicators: index: FALSE Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index 91a957df..4f7608cf 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -32,7 +32,8 @@ Analysis: Calibration: method: qmap Skill: - metric: RPSS + metric: RPSS FRPSS + prob: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index ed81146f..f6b4921b 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -33,6 +33,7 @@ Analysis: method: qmap Skill: metric: RPSS + prob: [[[1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 8312f81a..02e3e349 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -14,7 +14,6 @@ Analysis: Reference: name: ERA5 #JRA-55 Time: - sdate: fcst: 2021 hcst_start: 1991 hcst_end: 1994 @@ -34,6 +33,7 @@ Analysis: method: bias Skill: metric: RPSS + prob: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 040bbe3e..6a5fc617 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -27,12 +27,13 @@ Analysis: lonmax: 2 #359.9 Regrid: method: bilinear - type: to_system #to_reference + type: to_system Workflow: Calibration: - method: bias + method: raw Skill: - metric: RPSS + metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + prob: [[1/3, 2/3]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 15020591..84a528cd 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -5,7 +5,7 @@ context("Decadal daily data - 1") source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 19560921..e771cf29 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -5,7 +5,7 @@ context("Decadal monthly data - 1") source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml" @@ -25,6 +25,10 @@ suppressWarnings({invisible(capture.output( skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe, na.rm = T, ncores = 4) ))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(calibrated_data$hcst, recipe, + na.rm = T, ncores = 4) +))}) #====================================== @@ -171,27 +175,67 @@ names(skill_metrics), c("rpss", "rpss_significance") ) expect_equal( -class(skill_metrics$rpss[[1]]), +class(skill_metrics$rpss), "array" ) expect_equal( -dim(skill_metrics$rpss[[1]]), -c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +dim(skill_metrics$rpss), +c(time = 3, latitude = 5, longitude = 4) ) expect_equal( -dim(skill_metrics$rpss_significance[[1]]), -dim(skill_metrics$rpss[[1]]) +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) ) expect_equal( -as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +as.vector(drop(skill_metrics$rpss)[, 2, 3]), c(-0.2857143, -1.2500000, -1.8928571), tolerance = 0.0001 ) expect_equal( -as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +as.vector(drop(skill_metrics$rpss_significance)[, 2, 3]), rep(FALSE, 3) ) +# Probs +expect_equal( +names(probs), +c('probs', 'percentiles') +) +expect_equal( +names(probs$probs), +c('prob_b33', 'prob_33_to_66', 'prob_a66', 'prob_b10', 'prob_10_to_90', 'prob_a90') +) +expect_equal( +names(probs$percentiles), +c('percentile_33', 'percentile_66', 'percentile_10', 'percentile_90') +) +expect_equal( +dim(probs$probs$prob_b33), +c(syear = 4, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(probs$percentiles$percentile_33), +c(time = 3, latitude = 5, longitude = 4) +) +expect_equal( +as.vector(probs$probs$prob_b33[, 1, 2, 2]), +c(0.0, 0.5, 0.0, 1.0) +) +expect_equal( +as.vector(probs$probs$prob_10_to_90[, 1, 2, 2]), +c(1.0, 1.0, 0.5, 0.5) +) +expect_equal( +as.vector(probs$percentiles$percentile_33[, 1, 2]), +c(293.7496, 287.4263, 285.8295), +tolerance = 0.0001 +) +expect_equal( +as.vector(probs$percentiles$percentile_10[, 1, 2]), +c(293.1772, 286.9533, 284.7887), +tolerance = 0.0001 +) + }) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index ac80fe77..a3325a7f 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -5,7 +5,7 @@ context("Decadal monthly data - 2") source("modules/Loading/Loading_decadal.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") -source("modules/Saving/Save_data.R") +source("modules/Saving/Saving.R") recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" @@ -14,17 +14,22 @@ suppressWarnings({invisible(capture.output( data <- load_datasets(recipe_file) ))}) -#recipe <- read_yaml(recipe_file) -## Calibrate datasets -#suppressWarnings({invisible(capture.output( -# calibrated_data <- calibrate_datasets(data, recipe) -#))}) -# -## Compute skill metrics -#suppressWarnings({invisible(capture.output( -#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, -# recipe, na.rm = T, ncores = 4) -#))}) +recipe <- read_yaml(recipe_file) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data <- calibrate_datasets(data, recipe) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, + recipe, na.rm = T, ncores = 4) +))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(calibrated_data$hcst, recipe, + na.rm = T, ncores = 4) +))}) #====================================== @@ -126,89 +131,105 @@ as.POSIXct("1991-02-15", tz = 'UTC') }) #====================================== -#test_that("2. Calibration", { -# -#expect_equal( -#is.list(calibrated_data), -#TRUE -#) -#expect_equal( -#names(calibrated_data), -#c("hcst", "fcst") -#) -#expect_equal( -#class(calibrated_data$hcst), -#"s2dv_cube" -#) -#expect_equal( -#class(calibrated_data$fcst), -#"s2dv_cube" -#) -#expect_equal( -#dim(calibrated_data$hcst$data), -#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) -#) -#expect_equal( -#dim(calibrated_data$fcst$data), -#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) -#) -#expect_equal( -#mean(calibrated_data$fcst$data), -#291.8375, -#tolerance = 0.0001 -#) -#expect_equal( -#mean(calibrated_data$hcst$data), -#289.6679, -#tolerance = 0.0001 -#) -#expect_equal( -#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), -#c(286.3895, 286.6408, 290.6652, 288.3759), -#tolerance = 0.0001 -#) -#expect_equal( -#range(calibrated_data$fcst$data), -#c(287.2173, 297.4578), -#tolerance = 0.0001 -#) -# -#}) -# -# +test_that("2. Calibration", { + +expect_equal( +names(calibrated_data), +c("hcst", "fcst") +) +expect_equal( +calibrated_data, +data[1:2] +) + +}) + + ##====================================== -#test_that("3. Metrics", { -# -#expect_equal( -#is.list(skill_metrics), -#TRUE -#) -#expect_equal( -#names(skill_metrics), -#c("rpss", "rpss_significance") -#) -#expect_equal( -#class(skill_metrics$rpss[[1]]), -#"array" -#) -#expect_equal( -#dim(skill_metrics$rpss[[1]]), -#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) -#) -#expect_equal( -#dim(skill_metrics$rpss_significance[[1]]), -#dim(skill_metrics$rpss[[1]]) -#) -#expect_equal( -#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), -#c(-0.2857143, -1.2500000, -1.8928571), -#tolerance = 0.0001 -#) -#expect_equal( -#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), -#rep(FALSE, 3) -#) -# -#}) -# -# +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss_specs", "bss90_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") +) +expect_equal( +class(skill_metrics$rpss_specs), +"array" +) +expect_equal( +all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(time = 14, latitude = 8, longitude = 5)))), +TRUE +) +expect_equal( +as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), +c(-0.3333333, 0.1666667, -0.3333333), +tolerance = 0.0001 +) +expect_equal( +all(is.na(skill_metrics$bss90_specs)), +TRUE +) +expect_equal( +as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), +c(0.4474382, 0.1026333, 0.4042823), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$frps_specs[6:8, 1, 2]), +c(0.4444444, 0.2222222, 0.4444444), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$frpss_specs[4:7, 1, 5]), +c( 1.0, -0.5, -0.5, 0.5), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$bss10_specs[6:8, 1, 2]), +c(0.5, -0.5, -0.5), +) +expect_equal( +as.vector(skill_metrics$frps[6:8, 1, 2]), +c(0.4444444, 0.2222222, 0.4444444), +tolerance = 0.0001 +) + +# Probs +expect_equal( +names(probs), +c('probs', 'percentiles') +) +expect_equal( +names(probs$probs), +c('prob_b33', 'prob_33_to_66', 'prob_a66') +) +expect_equal( +names(probs$percentiles), +c('percentile_33', 'percentile_66') +) +expect_equal( +dim(probs$probs$prob_b33), +c(syear = 3, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(probs$percentiles$percentile_33), +c(time = 14, latitude = 8, longitude = 5) +) +expect_equal( +as.vector(probs$probs$prob_b33[, 1, 2, 2]), +c(0.0, 0.3333333, 0.6666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +c(271.7508, 273.1682, 274.1937), +tolerance = 0.0001 +) + +}) + + -- GitLab From 96072f3ab9912b278a4fde892db4639de206ad9b Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 27 Jul 2022 13:13:34 +0200 Subject: [PATCH 43/68] Fix typo --- tests/recipes/recipe-decadal_daily_1.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index f6b4921b..119facb9 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -33,7 +33,7 @@ Analysis: method: qmap Skill: metric: RPSS - prob: [[[1/10, 9/10]] + prob: [[1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E -- GitLab From 57d03994439534262b6cabaa7126844224e4dd1f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Jul 2022 10:37:30 +0200 Subject: [PATCH 44/68] Add daily seasonal unit tests, change sdate and fcst_year params, fix bug to load daily data w/o fcst --- modules/Loading/Loading.R | 18 +- modules/Loading/dates2load.R | 16 +- modules/Loading/testing_recipes/recipe_4.yml | 5 +- modules/Saving/Saving.R | 26 +-- modules/Saving/paths2save.R | 8 +- tests/recipes/recipe-seasonal_daily_1.yml | 42 +++++ tests/testthat/test-seasonal_daily.R | 175 +++++++++++++++++++ 7 files changed, 252 insertions(+), 38 deletions(-) create mode 100644 tests/recipes/recipe-seasonal_daily_1.yml create mode 100644 tests/testthat/test-seasonal_daily.R diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 543bb55f..c017e0f4 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -46,11 +46,13 @@ load_datasets <- function(recipe_file) { recipe$Analysis$Time$ftime_min, recipe$Analysis$Time$ftime_max, time_freq=store.freq) - - idxs$fcst <- get_timeidx(sdates$fcst, - recipe$Analysis$Time$ftime_min, - recipe$Analysis$Time$ftime_max, - time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } ## TODO: Examine this verifications part, verify if it's necessary # stream <- verifications$stream @@ -139,10 +141,6 @@ load_datasets <- function(recipe_file) { latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(hcst))] <- dim(hcst) dim(hcst) <- default_dims -# default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) -# names(dim(attr(hcst, "Variables")$common$time))[which(names(dim( -# attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" -# dim(attr(hcst, "Variables")$common$time) <- default_time_dims } # Convert hcst to s2dv_cube object @@ -152,7 +150,7 @@ load_datasets <- function(recipe_file) { # Load forecast #------------------------------------------------------------------- - if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)){ + if (!is.null(recipe$Analysis$Time$fcst_year)) { # the call uses file_date instead of fcst_syear so that it can work # with the daily case and the current version of startR not allowing # multiple dims split diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index fe4228c0..4f8f814a 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -23,15 +23,15 @@ dates2load <- function(recipe, logger){ # hcst dates file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), - recipe$sdate$fcst_sday) + recipe$sdate) if (temp_freq == "monthly_mean"){ file_dates <- .add_dims(file_dates, "hcst") } # fcst dates (if fcst_year empty it creates an empty object) - if (! is.null(recipe$sdate$fcst_syear)){ - file_dates.fcst <- paste0(recipe$sdate$fcst_syear, recipe$sdate$fcst_sday) + if (! is.null(recipe$fcst_year)){ + file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) if (temp_freq == "monthly_mean"){ file_dates.fcst <- .add_dims(file_dates.fcst, "fcst") } @@ -74,13 +74,13 @@ get_timeidx <- function(sdates, ltmin, ltmax, idx_max <- sdates + months(ltmax+1) - days(1) indxs <- array(numeric(), c(file_date=length(sdates), - time=(as.integer(idx_max[1]-idx_min[1])+1) - #syear=length(sdates) - #sday=1,sweek=1)) + time = (as.integer(idx_max[1]-idx_min[1])+1) + #syear = length(sdates), + #sday = 1, sweek = 1, )) for (sdate in 1:length(sdates)) { - days <- seq(idx_min[sdate],idx_max[sdate], by='days') + days <- seq(idx_min[sdate], idx_max[sdate], by='days') indxs[sdate,] <- days[!(format(days, "%m%d") == "0229")] } indxs <- as.POSIXct(indxs*86400, @@ -88,7 +88,7 @@ get_timeidx <- function(sdates, ltmin, ltmax, lubridate::hour(indxs) <- 12 lubridate::minute(indxs) <- 00 dim(indxs) <- c(file_date=length(sdates), - time=(as.integer(idx_max[1] - idx_min[1]) + 1)) + time=(as.integer(idx_max[1] - idx_min[1]) + 1)) } else if (time_freq == "monthly_mean") { diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 3a936123..1416e515 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -13,9 +13,8 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' + sdate: '1101' + fcst_year: '2020' hcst_start: '1993' hcst_end: '2016' ftime_min: 0 diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 9bd0d90e..dca4cba5 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -101,7 +101,7 @@ save_forecast <- function(data_cube, n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate$fcst_sday), + recipe$Analysis$Time$sdate), format = '%Y%m%d') # Get time difference in months leadtimes <- interval(init_date, dates) %/% hours(1) @@ -199,7 +199,7 @@ save_observations <- function(data_cube, n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate$fcst_sday), + recipe$Analysis$Time$sdate), format = '%Y%m%d') # Get time difference in months leadtimes <- interval(init_date, dates) %/% hours(1) @@ -348,16 +348,16 @@ save_metrics <- function(skill, n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate$fcst_sday), + recipe$Analysis$Time$sdate), format = '%Y%m%d') # Get time difference in months leadtimes <- interval(init_date, dates) %/% hours(1) # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) } times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) @@ -429,17 +429,17 @@ save_percentiles <- function(percentiles, n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate$fcst_sday), + recipe$Analysis$Time$sdate), format = '%Y%m%d') # Get time difference in hours leadtimes <- interval(init_date, dates) %/% hours(1) # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) } times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) @@ -495,7 +495,7 @@ save_probabilities <- function(probs, n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate$fcst_sday), + recipe$Analysis$Time$sdate), format = '%Y%m%d') # Get time difference in hours leadtimes <- interval(init_date, dates) %/% hours(1) diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index cabcccc0..c243fb39 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -40,11 +40,11 @@ get_dir <- function(recipe, agg = "global") { outdir <- recipe$Run$output_dir variable <- recipe$Analysis$Variables$name - if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) } else { - fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate$fcst_sday) + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) } calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml new file mode 100644 index 00000000..724d2c13 --- /dev/null +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -0,0 +1,42 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1201' + fcst_year: + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 0 + ftime_max: 0 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: conservative + type: to_system + Workflow: + Calibration: + method: qmap + Skill: + metric: EnsCorr_specs + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R new file mode 100644 index 00000000..157d8006 --- /dev/null +++ b/tests/testthat/test-seasonal_daily.R @@ -0,0 +1,175 @@ +#context("Seasonal daily data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +recipe_file <- "tests/recipes/recipe-seasonal_daily_1.yml" + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe_file) +))}) + +recipe <- read_yaml(recipe_file) + +suppressWarnings({invisible(capture.output( +calibrated_data <- calibrate_datasets(data, recipe) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, + recipe, na.rm = T, ncores = 4) +))}) + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +dim(data$obs$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 31, latitude = 4, longitude = 4, ensemble = 1) +) +expect_equal( +dim(data$obs$Dates$start), +c(sday = 1, sweek = 1, syear = 4, time = 31) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(295.5691, 291.7752, 294.0874, 290.1173), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +288.3723, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(280.1490, 298.2324), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$Dates$start)[1], +as.POSIXct("1993-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[2], +as.POSIXct("1994-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$Dates$start)[5], +as.POSIXct("1993-12-02 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$obs$Dates$start)[10], +as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') +) + +}) + +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(calibrated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +NULL, +calibrated_data$fcst +) +expect_equal( +mean(calibrated_data$hcst$data), +289.6468, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(calibrated_data$hcst$data)[1, 1:4, 2, 3, 4]), +c(295.1077, 294.2161, 294.5801, 292.6326), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$hcst$data), +c(283.9447, 297.7496), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("enscorr_specs") +) +expect_equal( +class(skill_metrics$enscorr_specs), +"array" +) +expect_equal( +dim(skill_metrics$enscorr_specs), +c(time = 31, latitude = 4, longitude = 4) +) +expect_equal( +skill_metrics$enscorr_specs[1:3, 1, 1], +c(0.8159317, 0.8956195, 0.8355627) +) +}) -- GitLab From 8f23e81408274a3ad09a004991edb21af65558ec Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Jul 2022 17:06:33 +0200 Subject: [PATCH 45/68] Change first forecast time from 0 to 1 --- modules/Loading/testing_recipes/recipe_1.yml | 9 ++++----- modules/Loading/testing_recipes/recipe_2.yml | 9 ++++----- modules/Loading/testing_recipes/recipe_3.yml | 9 ++++----- modules/Loading/testing_recipes/recipe_4.yml | 4 ++-- modules/Loading/testing_recipes/recipe_5.yml | 9 ++++----- .../testing_recipes/recipe_circular-sort-test.yml | 9 ++++----- 6 files changed, 22 insertions(+), 27 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml index 02a7a1df..cc142aa4 100644 --- a/modules/Loading/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -14,13 +14,12 @@ Analysis: Reference: name: era5 # Mandatory, str: Reference codename. See docu. Time: - sdate: - fcst_syear: '2020' # Optional, int: Forecast year 'YYYY' - fcst_sday: '1101' # Mandatory, int: Start date, 'MMDD' + sdate: '1101' + fcst_year: '2020' # Optional, int: Forecast year 'YYYY' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' - ftime_min: 0 # Mandatory, int: First leadtime time step in months - ftime_max: 1 # Mandatory, int: Last leadtime time step in months + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: latmin: -10 # Mandatory, int: minimum latitude latmax: 10 # Mandatory, int: maximum latitude diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml index 7c1980c4..9bb2f8a7 100644 --- a/modules/Loading/testing_recipes/recipe_2.yml +++ b/modules/Loading/testing_recipes/recipe_2.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '0601' + sdate: '0601' + fcst_year: '2020' hcst_start: '1993' hcst_end: '2006' - ftime_min: 0 - ftime_max: 2 + ftime_min: 1 + ftime_max: 3 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml index 418e50e7..014abe9e 100644 --- a/modules/Loading/testing_recipes/recipe_3.yml +++ b/modules/Loading/testing_recipes/recipe_3.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' + sdate: '1101' + fcst_year: '2020' hcst_start: '1993' hcst_end: '2003' - ftime_min: 0 - ftime_max: 1 + ftime_min: 1 + ftime_max: 2 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 1416e515..e22fb9ee 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -17,8 +17,8 @@ Analysis: fcst_year: '2020' hcst_start: '1993' hcst_end: '2016' - ftime_min: 0 - ftime_max: 2 + ftime_min: 1 + ftime_max: 3 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_5.yml b/modules/Loading/testing_recipes/recipe_5.yml index e05e890a..3bfad08e 100644 --- a/modules/Loading/testing_recipes/recipe_5.yml +++ b/modules/Loading/testing_recipes/recipe_5.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: # - fcst_sday: '0301' + sdate: '0301' + fcst_year: # hcst_start: '1993' hcst_end: '2016' - ftime_min: 0 - ftime_max: 0 + ftime_min: 1 + ftime_max: 1 Region: latmin: -10 latmax: 10 diff --git a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml b/modules/Loading/testing_recipes/recipe_circular-sort-test.yml index 3c49f807..700fd3b2 100644 --- a/modules/Loading/testing_recipes/recipe_circular-sort-test.yml +++ b/modules/Loading/testing_recipes/recipe_circular-sort-test.yml @@ -14,13 +14,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: - fcst_sday: '1101' + sdate: '1101' + fcst_year: hcst_start: '1993' hcst_end: '2003' - leadtimemin: 1 - leadtimemax: 1 + leadtimemin: 2 + leadtimemax: 2 Region: latmin: -10 latmax: 10 -- GitLab From d7cbc87de1c907ddb2122e6bdc6969a286b9701c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Jul 2022 17:07:07 +0200 Subject: [PATCH 46/68] Change first forecast time from 0 to 1, change unit tests --- modules/Loading/dates2load.R | 12 ++++++------ tests/recipes/recipe-seasonal_daily_1.yml | 4 ++-- tests/recipes/recipe-seasonal_monthly_1.yml | 9 ++++----- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index 4f8f814a..bff4b3eb 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -70,11 +70,11 @@ get_timeidx <- function(sdates, ltmin, ltmax, if (time_freq == "daily_mean"){ sdates <- ymd(sdates) - idx_min <- sdates + months(ltmin) - idx_max <- sdates + months(ltmax+1) - days(1) + idx_min <- sdates + months(ltmin - 1) + idx_max <- sdates + months(ltmax) - days(1) indxs <- array(numeric(), c(file_date=length(sdates), - time = (as.integer(idx_max[1]-idx_min[1])+1) + time = (as.integer(idx_max[1]-idx_min[1]+1)) #syear = length(sdates), #sday = 1, sweek = 1, )) @@ -88,12 +88,12 @@ get_timeidx <- function(sdates, ltmin, ltmax, lubridate::hour(indxs) <- 12 lubridate::minute(indxs) <- 00 dim(indxs) <- c(file_date=length(sdates), - time=(as.integer(idx_max[1] - idx_min[1]) + 1)) + time=(as.integer(idx_max[1]-idx_min[1]+1))) } else if (time_freq == "monthly_mean") { - idx_min <- ltmin + 1 - idx_max <- ltmax + 1 + idx_min <- ltmin + idx_max <- ltmax indxs <- indices(idx_min:idx_max) } diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index 724d2c13..7e31e03e 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -17,8 +17,8 @@ Analysis: fcst_year: hcst_start: '1993' hcst_end: '1996' - ftime_min: 0 - ftime_max: 0 + ftime_min: 1 + ftime_max: 1 Region: latmin: 17 latmax: 20 diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index d8b32a61..78c7718b 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -13,13 +13,12 @@ Analysis: Reference: name: era5 Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' + sdate: '1101' + fcst_year: '2020' hcst_start: '1993' hcst_end: '1996' - ftime_min: 0 - ftime_max: 2 + ftime_min: 1 + ftime_max: 3 Region: latmin: 17 latmax: 20 -- GitLab From cfae9b2eb9a5b97681bf0b1b33de9f559561e541 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 08:50:43 +0200 Subject: [PATCH 47/68] fix typo; add 'context()' --- modules/Loading/dates2load.R | 2 +- tests/testthat/test-seasonal_daily.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index bff4b3eb..e1e8f89e 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -88,7 +88,7 @@ get_timeidx <- function(sdates, ltmin, ltmax, lubridate::hour(indxs) <- 12 lubridate::minute(indxs) <- 00 dim(indxs) <- c(file_date=length(sdates), - time=(as.integer(idx_max[1]-idx_min[1]+1))) + time=(as.integer(idx_max[1]-idx_min[1])+1)) } else if (time_freq == "monthly_mean") { diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 157d8006..071a81f6 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -1,4 +1,4 @@ -#context("Seasonal daily data") +context("Seasonal daily data") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") -- GitLab From 0f52b6f4e5d59b8621422a13e4296f157e5ec71a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 09:08:23 +0200 Subject: [PATCH 48/68] Remove seasonal daily from unit testing --- tests/test_seasonal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R index 4718e3d4..07937875 100644 --- a/tests/test_seasonal.R +++ b/tests/test_seasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') +files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal_monthly') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) -- GitLab From 25ea0628af52ce88b672ed925bdda0006066f4eb Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 10:11:39 +0200 Subject: [PATCH 49/68] Add CRPS and CRPSS to the Skill module --- modules/Loading/testing_recipes/recipe_6.yml | 43 ++++++++++++++++++++ modules/Skill/Skill.R | 29 ++++++++++--- 2 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_6.yml diff --git a/modules/Loading/testing_recipes/recipe_6.yml b/modules/Loading/testing_recipes/recipe_6.yml new file mode 100644 index 00000000..c1e7478a --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_6.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FCRPS FCRPSS + prob: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index c099124c..3d09ab7a 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -8,6 +8,8 @@ # - ask Carlos which decadal metrics he is currently using source("modules/Skill/s2s.metrics.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-CRPSS/R/CRPS.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-CRPSS/R/CRPSS.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -57,7 +59,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { skill_metrics <- list() for (metric in strsplit(metrics, " ")[[1]]) { # Whether the fair version of the metric is to be computed - if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) { + if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', + 'fcrps', 'fcrpss')) { Fair <- T } else { Fair <- F @@ -86,8 +89,9 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - prob_thresholds = 0.1, Fair = Fair, ncores = ncores) + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, + ncores = ncores) skill$rpss <- .drop_dims(skill$rpss) skill$sign <- .drop_dims(skill$sign) skill_metrics[[ metric ]] <- skill$rpss @@ -95,12 +99,27 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - prob_thresholds = 0.9, Fair = Fair, ncores = ncores) + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, + ncores = ncores) skill$rpss <- .drop_dims(skill$rpss) skill$sign <- .drop_dims(skill$sign) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # CRPS and FCRPS + } else if (metric %in% c('crps', 'fcrps')) { + skill <- CRPS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # CRPSS and FCRPSS + } else if (metric %in% c('crpss', 'fcrpss')) { + skill <- CRPSS(exp$data, obs$data, time_dim = time_dim, + memb_dim = memb_dim, Fair = Fair, ncores = ncores) + skill$crpss <- .drop_dims(skill$crpss) + skill$sign <- .drop_dims(skill$sign) + skill_metrics[[ metric ]] <- skill$crpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation } else if (metric %in% c('enscorr', 'corr')) { -- GitLab From eeeb89677e667358fa95cc56a2171f71f9cab7bf Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 11:28:36 +0200 Subject: [PATCH 50/68] Modify recipe 6, add check for _specs metrics --- modules/Loading/testing_recipes/recipe_6.yml | 2 +- modules/Skill/Skill.R | 24 ++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_6.yml b/modules/Loading/testing_recipes/recipe_6.yml index c1e7478a..d57f832b 100644 --- a/modules/Loading/testing_recipes/recipe_6.yml +++ b/modules/Loading/testing_recipes/recipe_6.yml @@ -31,7 +31,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPS RPSS CRPS CRPSS FCRPS FCRPSS + metric: CRPS CRPSS FCRPS FCRPSS prob: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 3d09ab7a..4e16142e 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -10,6 +10,7 @@ source("modules/Skill/s2s.metrics.R") source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-CRPSS/R/CRPS.R") source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-CRPSS/R/CRPSS.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RandomWalkTest.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -82,8 +83,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - skill$rpss <- .drop_dims(skill$rpss) - skill$sign <- .drop_dims(skill$sign) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign @@ -92,8 +93,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, ncores = ncores) - skill$rpss <- .drop_dims(skill$rpss) - skill$sign <- .drop_dims(skill$sign) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign @@ -102,8 +103,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, ncores = ncores) - skill$rpss <- .drop_dims(skill$rpss) - skill$sign <- .drop_dims(skill$sign) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # CRPS and FCRPS @@ -116,8 +117,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - skill$crpss <- .drop_dims(skill$crpss) - skill$sign <- .drop_dims(skill$sign) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign @@ -142,12 +143,17 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # Compute SpecsVerification version of the metrics ## Retain _specs in metric name for clarity metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name + if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', + 'rpss'))) { + stop("Some of the requested metrics are not available.") + } skill <- Compute_verif_metrics(exp$data, obs$data, skill_metrics = metric_name, verif.dims=c("syear", "sday", "sweek"), na.rm = na.rm, ncores = ncores) - skill <- .drop_dims(skill) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) if (metric_name == "frps") { # Compute yearly mean for FRPS skill <- colMeans(skill, dims = 1) -- GitLab From ad850b2e067abf414e782f2ace6438b0b86393d3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 11:58:14 +0200 Subject: [PATCH 51/68] Fix bug with lapply in skill module, add metrics to monthly unit test --- modules/Loading/testing_recipes/recipe_6.yml | 2 +- modules/Skill/Skill.R | 3 +-- tests/recipes/recipe-seasonal_monthly_1.yml | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_6.yml b/modules/Loading/testing_recipes/recipe_6.yml index d57f832b..bc8a9d27 100644 --- a/modules/Loading/testing_recipes/recipe_6.yml +++ b/modules/Loading/testing_recipes/recipe_6.yml @@ -31,7 +31,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: CRPS CRPSS FCRPS FCRPSS + metric: CRPS CRPSS FCRPS FCRPSS FRPS_Specs prob: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 4e16142e..b7ab115e 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -152,8 +152,7 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { verif.dims=c("syear", "sday", "sweek"), na.rm = na.rm, ncores = ncores) - skill <- lapply(skill, function(x) { - .drop_dims(x)}) + skill <- .drop_dims(skill) if (metric_name == "frps") { # Compute yearly mean for FRPS skill <- colMeans(skill, dims = 1) diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 78c7718b..766b7e04 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -31,7 +31,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPSS + metric: RPSS CRPSS EnsCorr Corr Enscorr_specs Indicators: index: no Output_format: S2S4E -- GitLab From 7433e44e12b23c1c959b46934fbc7556d4ab9f62 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 12:10:00 +0200 Subject: [PATCH 52/68] Fix seasonal monthly unit test --- tests/testthat/test-seasonal_monthly.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 1cb88618..0aa33b22 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -163,7 +163,9 @@ TRUE ) expect_equal( names(skill_metrics), -c("rpss", "rpss_significance") +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_p.value", "enscorr_conf.low", "enscorr_conf.up", "corr", + "corr_p.value", "corr_conf.low", "corr_conf.up", "enscorr_specs") ) expect_equal( class(skill_metrics$rpss), -- GitLab From 6a14c6c56372a38c4fe5658a64138a58e01812c8 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 16:49:46 +0200 Subject: [PATCH 53/68] Temporarily source CRPS and CRPSS locally (no internet on nord4) --- modules/Skill/CRPS.R | 119 +++++++++++++++++++++++ modules/Skill/CRPSS.R | 172 +++++++++++++++++++++++++++++++++ modules/Skill/RandomWalkTest.R | 82 ++++++++++++++++ modules/Skill/Skill.R | 6 +- 4 files changed, 376 insertions(+), 3 deletions(-) create mode 100644 modules/Skill/CRPS.R create mode 100644 modules/Skill/CRPSS.R create mode 100644 modules/Skill/RandomWalkTest.R diff --git a/modules/Skill/CRPS.R b/modules/Skill/CRPS.R new file mode 100644 index 00000000..942ec9e4 --- /dev/null +++ b/modules/Skill/CRPS.R @@ -0,0 +1,119 @@ +#'Compute the Continuous Ranked Probability Score +#' +#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill metric +#'to evaluate the full distribution of probabilistic forecasts. It has a negative +#'orientation (i.e., the higher-quality forecast the smaller CRPS) and it rewards +#'the forecast that has probability concentration around the observed value. In case +#'of a deterministic forecast, the CRPS is reduced to the mean absolute error. It has +#'the same units as the data. The function is based on enscrps_cpp from SpecsVerification. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param Fair A logical indicating whether to compute the FairCRPS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of CRPS with the same dimensions as "exp" except the +#''time_dim' and 'memb_dim' dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- CRPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom SpecsVerification enscrps_cpp +#'@importFrom ClimProjDiags Subset +#'@export +CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + Fair = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim'.")) + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + crps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim), + output_dims = time_dim, + fun = .CRPS, Fair = Fair, + ncores = ncores)$output1 + + # Return only the mean RPS + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + + return(crps) +} + +.CRPS <- function(exp, obs, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate] + + if (Fair) { # FairCRPS + R_new <- Inf + } else {R_new <- NA} + + crps <- SpecsVerification::enscrps_cpp(ens = exp, obs = obs, R_new = R_new) + + return(crps) +} diff --git a/modules/Skill/CRPSS.R b/modules/Skill/CRPSS.R new file mode 100644 index 00000000..9f5edbd5 --- /dev/null +++ b/modules/Skill/CRPSS.R @@ -0,0 +1,172 @@ +#'Compute the Continuous Ranked Probability Skill Score +#' +#'The Continuous Ranked Probability Skill Score (CRPSS; Wilks, 2011) is the skill score +#'based on the Continuous Ranked Probability Score (CRPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The CRPSS ranges between minus infinite and 1. If the +#'CRPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill. +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as CRPSS = 1 - CRPS_exp / CRPS_ref. +#'The statistical significance is obtained based on a Random Walk test at the +#'95% confidence level (DelSole and Tippett, 2016). +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param Fair A logical indicating whether to compute the FairCRPSS (the +#' potential CRPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$crpss}{ +#' A numerical array of the CRPSS with the same dimensions as "exp" except the +#' 'time_dim' and 'memb_dim' dimensions. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the CRPSS with the same +#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'res <- CRPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- CRPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + Fair = FALSE, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]),1)){ + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else {stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length=1).")} + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute CRPSS + if (!is.null(ref)) { # use "ref" as reference forecast + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim, + ref = c(time_dim, memb_dim)) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim) + } + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + Fair = Fair, + ncores = ncores) + + return(output) +} + +.CRPSS <- function(exp, obs, ref = NULL, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate] + # ref: [sdate, memb] or NULL + + # CRPS of the forecast + crps_exp <- .CRPS(exp = exp, obs = obs, Fair = Fair) + + # CRPS of the reference forecast + if (is.null(ref)){ + ## using climatology as reference forecast + ## all the time steps are used as if they were members + ## then, ref dimensions are [sdate, memb], both with length(sdate) + ref <- array(data = obs, dim = c(member = length(obs))) + ref <- InsertDim(data = ref, posdim = 1, lendim = length(obs), name = 'sdate') + } + crps_ref <- .CRPS(exp = ref, obs = obs, Fair = Fair) + + # CRPSS + crpss <- 1 - mean(crps_exp) / mean(crps_ref) + + # Significance + sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref)$signif + + return(list(crpss = crpss, sign = sign)) +} diff --git a/modules/Skill/RandomWalkTest.R b/modules/Skill/RandomWalkTest.R new file mode 100644 index 00000000..adeadc1e --- /dev/null +++ b/modules/Skill/RandomWalkTest.R @@ -0,0 +1,82 @@ +#'Random walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common reference) based on Random Walks, with significance estimate at the 95% +#'confidence level, as in DelSole and Tippett (2016). +#' +#'@param skill_A A numerical array of the time series of the skill with the +#' forecaster A's. +#'@param skill_B A numerical array of the time series of the skill with the +#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill positively oriented). If $score is positive +#' forecaster A is better than forecaster B, and if $score is negative +#' forecaster B is better than forecaster B. +#'} +#'\item{$signif}{ +#' A logical array with the same dimensions as the input arrays except +#' 'time_dim'. Whether the difference is significant or not at the 5% +#' significance level. +#'} +#' +#'@examples +#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' skill_A <- abs(fcst_A - reference) +#' skill_B <- abs(fcst_B - reference) +#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +#' +#'@import multiApply +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ + + ## Check inputs + if (is.null(skill_A) | is.null(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if(!is.numeric(skill_A) | !is.numeric(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A),dim(skill_B))){ + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + if(!is.character(time_dim)){ + stop("Parameter 'time_dim' must be a character string.") + } + if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- multiApply::Apply(data = list(skill_A, skill_B), + target_dims = time_dim, + fun = .RandomWalkTest, + ncores = ncores) + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B){ + + score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + + ## TRUE if significant (if last value is above or below 2*sqrt(N)) + signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), + yes = TRUE, no = FALSE) + + return(list("score"=score[length(skill_A)],"signif"=signif)) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index b7ab115e..7aa6714f 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -8,9 +8,9 @@ # - ask Carlos which decadal metrics he is currently using source("modules/Skill/s2s.metrics.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-CRPSS/R/CRPS.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-CRPSS/R/CRPSS.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RandomWalkTest.R") +source("modules/Skill/CRPS.R") +source("modules/Skill/CRPSS.R") +source("modules/Skill/RandomWalkTest.R") ## TODO: Implement this in the future ## Which parameter are required? -- GitLab From c985808b31ac353d141fd000858ba604e9d3ba23 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Jul 2022 16:50:11 +0200 Subject: [PATCH 54/68] fix daily seasonal unit test --- tests/test_seasonal.R | 2 +- tests/testthat/test-seasonal_daily.R | 22 +++++++--------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R index 07937875..4718e3d4 100644 --- a/tests/test_seasonal.R +++ b/tests/test_seasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal_monthly') +files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 071a81f6..1ff81f5c 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -39,8 +39,8 @@ class(data$hcst), "s2dv_cube" ) expect_equal( -class(data$fcst), -"s2dv_cube" +data$fcst, +NULL ) expect_equal( class(data$obs), @@ -52,10 +52,6 @@ c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", ) expect_equal( names(data$hcst), -names(data$fcst) -) -expect_equal( -names(data$hcst), names(data$obs) ) expect_equal( @@ -64,7 +60,7 @@ c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, lon ) expect_equal( dim(data$obs$data), -c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 31, latitude = 4, longitude = 4, ensemble = 1) +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 1) ) expect_equal( dim(data$obs$Dates$start), @@ -119,18 +115,14 @@ class(calibrated_data$hcst), "s2dv_cube" ) expect_equal( -class(calibrated_data$fcst), -"s2dv_cube" +calibrated_data$fcst, +NULL ) expect_equal( dim(calibrated_data$hcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) ) expect_equal( -NULL, -calibrated_data$fcst -) -expect_equal( mean(calibrated_data$hcst$data), 289.6468, tolerance = 0.0001 @@ -145,7 +137,6 @@ range(calibrated_data$hcst$data), c(283.9447, 297.7496), tolerance = 0.0001 ) - }) @@ -170,6 +161,7 @@ c(time = 31, latitude = 4, longitude = 4) ) expect_equal( skill_metrics$enscorr_specs[1:3, 1, 1], -c(0.8159317, 0.8956195, 0.8355627) +c(0.8159317, 0.8956195, 0.8355627), +tolerance=0.0001 ) }) -- GitLab From f8b2e3b78a5a93d17e5ef726c343756fac3dc8f0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 4 Aug 2022 15:02:16 +0200 Subject: [PATCH 55/68] Remove time_step variable from netCDFs, fix bug (calling object in global env inside function) --- modules/Saving/Saving.R | 54 ++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 33 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index dca4cba5..375b9248 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -25,7 +25,6 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { # Generates time dimensions and the corresponding metadata. ## TODO: Add calendar ## TODO: Subseasonal and decadal - ## TODO: Remove time_step var? switch(fcst.horizon, "seasonal" = {time <- leadtimes; ref <- 'hours since '; @@ -39,12 +38,6 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { attr(time, 'variables') <- metadata names(dim(time)) <- 'time' - time_step <- 1 - dim(time_step) <- length(time_step) - metadata <- list(time_step = list(units = paste0(ref, sdate, 'T00:00:00'))) - attr(time_step, 'variables') <- metadata - names(dim(time_step)) <- 'time_step' - sdate <- 1:length(sdate) dim(sdate) <- length(sdate) metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), @@ -53,7 +46,7 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { attr(sdate, 'variables') <- metadata names(dim(sdate)) <- 'sdate' - return(list(time_step=time_step, time=time, sdate=sdate)) + return(list(time=time, sdate=sdate)) } get_latlon <- function(latitude, longitude) { @@ -97,8 +90,8 @@ save_forecast <- function(data_cube, # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- sort(as.Date(data$hcst$Dates$start)) - n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -149,7 +142,6 @@ save_forecast <- function(data_cube, # Get time dimension values and metadata times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) time <- times$time - time_step <- times$time_step # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, @@ -159,14 +151,14 @@ save_forecast <- function(data_cube, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, fcst, time_step), outfile) + ArrayToNc(append(country, time, fcst), outfile) } else { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst, list(time_step)) + vars <- c(vars, fcst) ArrayToNc(vars, outfile) } } @@ -195,8 +187,8 @@ save_observations <- function(data_cube, # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- sort(as.Date(data$hcst$Dates$start)) - n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -263,7 +255,6 @@ save_observations <- function(data_cube, # Get time dimension values and metadata times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) time <- times$time - time_step <- times$time_step # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, @@ -273,14 +264,14 @@ save_observations <- function(data_cube, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, fcst, time_step), outfile) + ArrayToNc(append(country, time, fcst), outfile) } else { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst, list(time_step)) + vars <- c(vars, fcst) ArrayToNc(vars, outfile) } } @@ -344,8 +335,8 @@ save_metrics <- function(skill, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq # Generate vector containing leadtimes - dates <- sort(as.Date(data$hcst$Dates$start)) - n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -362,7 +353,6 @@ save_metrics <- function(skill, times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) time <- times$time - time_step <- times$time_step # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, @@ -372,14 +362,14 @@ save_metrics <- function(skill, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, skill, time_step), outfile) + ArrayToNc(append(country, time, skill), outfile) } else { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill, list(time_step)) + vars <- c(vars, skill) ArrayToNc(vars, outfile) } print("##### SKILL METRICS SAVED TO NETCDF FILE #####") @@ -425,8 +415,8 @@ save_percentiles <- function(percentiles, fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq # Generate vector containing leadtimes - dates <- sort(as.Date(data$hcst$Dates$start)) - n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -444,7 +434,6 @@ save_percentiles <- function(percentiles, times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) time <- times$time - time_step <- times$time_step # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, @@ -454,14 +443,14 @@ save_percentiles <- function(percentiles, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, percentiles, time_step), outfile) + ArrayToNc(append(country, time, percentiles), outfile) } else { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, percentiles, list(time_step)) + vars <- c(vars, percentiles) ArrayToNc(vars, outfile) } print("##### PERCENTILES SAVED TO NETCDF FILE #####") @@ -491,8 +480,8 @@ save_probabilities <- function(probs, # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- sort(as.Date(data$hcst$Dates$start)) - n_steps <- dim(data$hcst$data)['time'][[1]] # number of time steps + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps dates <- dates[1:n_steps] init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -545,7 +534,6 @@ save_probabilities <- function(probs, # Get time dimension values and metadata times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) time <- times$time - time_step <- times$time_step # Generate name of output file outfile <- get_filename(outdir, data_cube$Variable$varName, @@ -555,14 +543,14 @@ save_probabilities <- function(probs, # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear, time_step), outfile) + ArrayToNc(append(country, time, probs_syear), outfile) } else { latitude <- data_cube$lat[1:length(data_cube$lat)] longitude <- data_cube$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear, list(time_step)) + vars <- c(vars, probs_syear) ArrayToNc(vars, outfile) } } -- GitLab From e717bc97f4eb1af1ffc883eab0de61cedae8975b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 4 Aug 2022 15:13:44 +0200 Subject: [PATCH 56/68] Remove ncores and na.rm from main script and include them in recipe --- modules/Calibration/Calibration.R | 4 ++-- modules/Loading/testing_recipes/recipe_4.yml | 4 +++- modules/Skill/Skill.R | 8 ++++++-- modules/test_victoria.R | 8 +++----- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 273f5a08..9553b5da 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -23,8 +23,8 @@ calibrate_datasets <- function(data, recipe) { } else { # Calibration function params mm <- recipe$Analysis$Datasets$Multimodel - ncores <- 4 - na.rm <- T + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case if (mm) { diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index e22fb9ee..daefa06d 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -31,10 +31,12 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPS RPSS FRPSS BSS10 BSS90 EnsCorr Corr + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr prob: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no + ncores: 1 + remove_NAs: yes Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 7aa6714f..636f0271 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -48,7 +48,7 @@ source("modules/Skill/RandomWalkTest.R") # " running Skill module ", "\n", # " it can call ", metric_fun )) -compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { +compute_skill_metrics <- function(exp, obs, recipe) { # exp: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -57,6 +57,8 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) + ncores <- recipe$Analysis$ncores + na.rm = recipe$Analysis$remove_NAs skill_metrics <- list() for (metric in strsplit(metrics, " ")[[1]]) { # Whether the fair version of the metric is to be computed @@ -164,8 +166,10 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { return(skill_metrics) } -compute_probabilities <- function(data, recipe, na.rm = T, ncores = 1) { +compute_probabilities <- function(data, recipe) { + ncores <- recipe$Analysis$ncores + na.rm = recipe$Analysis$remove_NAs named_probs <- list() named_quantiles <- list() if (is.null(recipe$Analysis$Workflow$Skill$prob)) { diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 0a5834b8..3218ded1 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,5 +1,5 @@ -recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") @@ -13,11 +13,9 @@ recipe <- read_yaml(recipe_file) # Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) -probs <- compute_probabilities(calibrated_data$hcst, recipe, - na.rm = T, ncores = 4) +probs <- compute_probabilities(calibrated_data$hcst, recipe) # Export skill metrics onto outfile outdir <- get_dir(recipe) -- GitLab From 068c65d9ab167c5642da7bed85ad656415eb2e71 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 4 Aug 2022 15:26:40 +0200 Subject: [PATCH 57/68] Change probs to pProbabilities:percentiles in recipe --- modules/Loading/testing_recipes/recipe_4.yml | 3 ++- modules/Skill/Skill.R | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index daefa06d..717ef692 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -32,7 +32,8 @@ Analysis: method: mse_min Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr - prob: [[1/3, 2/3], [1/10, 9/10]] + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] Indicators: index: no ncores: 1 diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 636f0271..870ba4df 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -172,11 +172,11 @@ compute_probabilities <- function(data, recipe) { na.rm = recipe$Analysis$remove_NAs named_probs <- list() named_quantiles <- list() - if (is.null(recipe$Analysis$Workflow$Skill$prob)) { + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { stop("Quantiles and probability bins have been requested, but no ", "thresholds are provided in the recipe.") } else { - for (element in recipe$Analysis$Workflow$Skill$prob) { + for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { thresholds <- sapply(element, function (x) eval(parse(text = x))) probs <- Compute_probs(data$data, thresholds, ncores = ncores, -- GitLab From e97c2955ddedafe5b425e90992d8c7a785ba5ec9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 4 Aug 2022 16:28:58 +0200 Subject: [PATCH 58/68] Add default option for ncores and na.rm --- modules/Calibration/Calibration.R | 13 +++++++++++-- modules/Skill/Skill.R | 27 ++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 9553b5da..85b1b007 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -23,8 +23,17 @@ calibrate_datasets <- function(data, recipe) { } else { # Calibration function params mm <- recipe$Analysis$Datasets$Multimodel - ncores <- recipe$Analysis$ncores - na.rm <- recipe$Analysis$remove_NAs + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" # Replicate observation array for the multi-model case if (mm) { diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 870ba4df..7c6820da 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -57,10 +57,18 @@ compute_skill_metrics <- function(exp, obs, recipe) { time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) - ncores <- recipe$Analysis$ncores - na.rm = recipe$Analysis$remove_NAs + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } skill_metrics <- list() - for (metric in strsplit(metrics, " ")[[1]]) { + for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', 'fcrps', 'fcrpss')) { @@ -168,8 +176,17 @@ compute_skill_metrics <- function(exp, obs, recipe) { compute_probabilities <- function(data, recipe) { - ncores <- recipe$Analysis$ncores - na.rm = recipe$Analysis$remove_NAs + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + named_probs <- list() named_quantiles <- list() if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { -- GitLab From 8a27b9baf10e14b656a8b7b10cc60e5d9451e462 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 4 Aug 2022 17:36:24 +0200 Subject: [PATCH 59/68] Add function to save ensemble correlation to separate file --- modules/Saving/Saving.R | 87 +++++++++++++++++++++++++++++++++++++++++ modules/test_victoria.R | 4 ++ 2 files changed, 91 insertions(+) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 375b9248..5334ec77 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -375,6 +375,93 @@ save_metrics <- function(skill, print("##### SKILL METRICS SAVED TO NETCDF FILE #####") } +save_corr <- function(skill, + recipe, + data_cube, + outdir, + agg="global") { + # This function adds metadata to the ensemble correlation in 'skill' + # and exports it to a netCDF file inside 'outdir'. + + # Select ensemble correlation from the list of metrics + ## TODO: Move condition to wrapper + if ("corr" %in% names(skill)) { + corr_metrics <- grep("^corr", names(skill)) + skill <- skill[corr_metrics] + } + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + ## TODO: Implement metrics with additional non-singleton dimensions + ## e.g. 'ensemble' in ensemble correlation. + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe) + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + ## TODO: create dictionary with proper metadata + metric <- names(skill[i]) + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, standard_name = sdname)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + # Generate vector containing leadtimes + dates <- sort(as.Date(data_cube$Dates$start)) + n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps + dates <- dates[1:n_steps] + init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d') + # Get time difference in months + leadtimes <- interval(init_date, dates) %/% hours(1) + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, data_cube$Variable$varName, + fcst.sdate, fcst.sdate, + agg, fcst.horizon, "corr") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + print("##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") +} save_percentiles <- function(percentiles, recipe, data_cube, diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 3218ded1..a6ed196b 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -6,10 +6,13 @@ source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" + # Load datasets data <- load_datasets(recipe_file) ## TODO: Instead of reading the recipe at each module, do it at the beginning? recipe <- read_yaml(recipe_file) +recipe$ID <- tools::file_path_sans_ext(basename(recipe_file)) # Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics @@ -21,6 +24,7 @@ probs <- compute_probabilities(calibrated_data$hcst, recipe) outdir <- get_dir(recipe) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) save_metrics(skill_metrics, recipe, data$hcst, outdir) +save_corr(skill_metrics, recipe, data$hcst, outdir) # Export percentiles and probability bins onto outfile save_percentiles(probs$percentiles, recipe, data$hcst, outdir) save_probabilities(probs$probs, recipe, data$hcst, outdir) -- GitLab From 7349175c0a49619dbacd9ba2cea0ed7ee3aa42ce Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 09:06:42 +0200 Subject: [PATCH 60/68] Remove seasonal daily from operative unit tests --- tests/test_seasonal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R index 4718e3d4..07937875 100644 --- a/tests/test_seasonal.R +++ b/tests/test_seasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') +files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal_monthly') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) -- GitLab From 773134d66bfd9b12d27fb7de093568480061e24d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 10:48:22 +0200 Subject: [PATCH 61/68] Add wrapper for functions in saving module --- modules/Saving/Saving.R | 37 +++++++++++++++++++++++++++++++++++++ modules/test_victoria.R | 25 +++++-------------------- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 5334ec77..e9b7ae7e 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -3,6 +3,42 @@ source("modules/Saving/paths2save.R") +save_data <- function(recipe, data, calibrated_data, + skill_metrics = NULL, + probabilities = NULL) { + # Wrapper for the saving functions. + # recipe: The auto-s2s recipe + # data: output of load_datasets() + # calibrated_data: output of calibrate_datasets() + # skill_metrics: output of compute_skill_metrics() + # probabilities: output of compute_probabilities() + + # Create output directory + outdir <- get_dir(recipe) + dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + + # Export hindcast, forecast and observations onto outfile + save_forecast(calibrated_data$hcst, recipe, outdir) + if (!is.null(calibrated_data$fcst)) { + save_forecast(calibrated_data$fcst, recipe, outdir) + } + save_observations(data$obs, recipe, outdir) + + # Export skill metrics onto outfile + if (!is.null(skill_metrics)) { + save_metrics(skill_metrics, recipe, data$hcst, outdir) + if ("corr" %in% names(skill_metrics)) { + save_corr(skill_metrics, recipe, data$hcst, outdir) + } + } + + # Export probabilities onto outfile + if (!is.null(probabilities)) { + save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir) + save_probabilities(probabilities$probs, recipe, data$hcst, outdir) + } +} + get_global_attributes <- function(recipe) { # Generates metadata of interest to add to the global attributes of the # netCDF files. @@ -462,6 +498,7 @@ save_corr <- function(skill, } print("##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } + save_percentiles <- function(percentiles, recipe, data_cube, diff --git a/modules/test_victoria.R b/modules/test_victoria.R index a6ed196b..920ca12f 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,36 +1,21 @@ -recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" - source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" +recipe <- read_yaml(recipe_file) # Load datasets data <- load_datasets(recipe_file) ## TODO: Instead of reading the recipe at each module, do it at the beginning? recipe <- read_yaml(recipe_file) -recipe$ID <- tools::file_path_sans_ext(basename(recipe_file)) # Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) - -probs <- compute_probabilities(calibrated_data$hcst, recipe) - -# Export skill metrics onto outfile -outdir <- get_dir(recipe) -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) -save_metrics(skill_metrics, recipe, data$hcst, outdir) -save_corr(skill_metrics, recipe, data$hcst, outdir) -# Export percentiles and probability bins onto outfile -save_percentiles(probs$percentiles, recipe, data$hcst, outdir) -save_probabilities(probs$probs, recipe, data$hcst, outdir) -# Export hindcast, forecast and observations onto outfile -save_forecast(calibrated_data$hcst, recipe, outdir) -if (!is.null(calibrated_data$fcst)) { - save_forecast(calibrated_data$fcst, recipe, outdir) -} -save_observations(data$obs, recipe, outdir) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(calibrated_data$hcst, recipe) +# Export all data to netCDF +save_data(recipe, data, calibrated_data, skill_metrics, probabilities) -- GitLab From f8ce169f0f8082ebd8b222513a3737df39eccb0f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 12:03:14 +0200 Subject: [PATCH 62/68] Add variable dictionary --- conf/variable-dictionary.yml | 78 ++++++++++++++++++++++++++++++++++-- modules/Saving/Saving.R | 56 +++++++++++++++----------- 2 files changed, 107 insertions(+), 27 deletions(-) diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index e1a0e702..5127ae14 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -6,7 +6,7 @@ vars: units: "K" long_name: "Near-Surface Air Temperature" standard_name: "air_temperature" - outname: "t2" +# outname: "t2" tasmax: units: "K" long_name: "Maximum Near-Surface Air Temperature" @@ -19,15 +19,85 @@ vars: units: "m s-1" long_name: "Near-Surface Wind Speed" standard_name: "wind_speed" - outname: "wind" +# outname: "wind" rsds: units: "W m-2" long_name: "Surface Downwelling Shortwave Radiation" standard_name: "surface_downwelling_shortwave_flux_in_air" positive: "down" - outname: "rswin" +# outname: "rswin" prlr: units: "mm" long_name: "Total precipitation" standard_name: "total_precipitation_flux" #? Not in CF - outname: "acprec" +# outname: "acprec" + +# Coordinates +coords: + longitude: + units: "degrees_east" + standard_name: "longitude" + long_name: "Longitude" + axis: "X" + latitude: + units: "degrees_north" + standard_name: "latitude" + long_name: "Latitude" + axis: "Y" + +# Skill metrics +metrics: + enscorr: + long_name: "Ensemble Mean Correlation Coefficient" + enscorr_specs: + long_name: "Ensemble Mean Correlation Coefficient" + enscorr_p.value: + long_name: "Ensemble Mean Correlation p-value" + enscorr_conf.low: + long_name: "Ensemble Mean Correlation Lower Confidence Interval" + enscorr_conf.up: + long_name: "Ensemble Mean Correlation Upper Confidence Interval" + enscorr_significance: + long_name: "Ensemble Mean Correlation Statistical Significance" + corr: + long_name: "Ensemble Correlation Coefficient" + corr_specs: + long_name: "Ensemble Correlation Coefficient" + corr_p.value: + long_name: "Ensemble Correlation p-value" + corr_conf.low: + long_name: "Ensemble Correlation Lower Confidence Interval" + corr_conf.up: + long_name: "Ensemble Correlation Upper Confidence Interval" + corr_significance: + long_name: "Ensemble Correlation Statistical Significance" + rps: + long_name: "Ranked Probability Score" + frps: + long_name: "Fair Ranked Probability Score" + rpss: + long_name: "Ranked Probability Skill Score" + rpss_significance: + long_name: "Ranked Probability Skill Score Statistical Significance" + rpss_specs: + long_name: "Ranked Probability Skill Score" + frpss: + long_name: "Fair Ranked Probability Skill Score" + frpss_significance: + long_name: "Fair Ranked Probability Skill Score Statistical Significance" + frpss_specs: + long_name: "Fair Ranked Probability Skill Score" + bss10: + long_name: "Brier Skill Score Lower Extreme" + bss10_specs: + long_name: "Brier Skill Score Lower Extreme" + bss10_significance: + long_name: "Brier Score Lower Extreme Statistical Significance" + bss90: + long_name: "Brier Skill Score Upper Extreme" + bss90_significance: + long_name: "Brier Skill Score Upper Extreme Statistical Significance" + crps: + long_name: "Continuous Ranked Probability Score" + crpss: + long_name: "Continuous Ranked Probability Skill Score" diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index e9b7ae7e..e1fc1828 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,4 +1,3 @@ -## TODO: Add function to save corr ## TODO: Save obs percentiles source("modules/Saving/paths2save.R") @@ -13,22 +12,24 @@ save_data <- function(recipe, data, calibrated_data, # skill_metrics: output of compute_skill_metrics() # probabilities: output of compute_probabilities() + dict <- read_yaml("conf/variable-dictionary.yml") + # Create output directory outdir <- get_dir(recipe) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Export hindcast, forecast and observations onto outfile - save_forecast(calibrated_data$hcst, recipe, outdir) + save_forecast(calibrated_data$hcst, recipe, dict, outdir) if (!is.null(calibrated_data$fcst)) { - save_forecast(calibrated_data$fcst, recipe, outdir) + save_forecast(calibrated_data$fcst, recipe, dict, outdir) } - save_observations(data$obs, recipe, outdir) + save_observations(data$obs, recipe, dict, outdir) # Export skill metrics onto outfile if (!is.null(skill_metrics)) { - save_metrics(skill_metrics, recipe, data$hcst, outdir) + save_metrics(skill_metrics, recipe, dict, data$hcst, outdir) if ("corr" %in% names(skill_metrics)) { - save_corr(skill_metrics, recipe, data$hcst, outdir) + save_corr(skill_metrics, recipe, dict, data$hcst, outdir) } } @@ -82,7 +83,7 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) { attr(sdate, 'variables') <- metadata names(dim(sdate)) <- 'sdate' - return(list(time=time, sdate=sdate)) + return(list(time=time)) } get_latlon <- function(latitude, longitude) { @@ -107,6 +108,7 @@ get_latlon <- function(latitude, longitude) { save_forecast <- function(data_cube, recipe, + dictionary, outdir, agg="global") { # Loops over the years in the s2dv_cube containing a hindcast or forecast @@ -150,10 +152,11 @@ save_forecast <- function(data_cube, } # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name if (tolower(agg) == "country") { dims <- c('Country', 'time') var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) + var.longname <- paste0("Country-Aggregated ", var.longname) var.units <- attr(data_cube$Variable, 'variable')$units } else { dims <- c(lalo, 'ensemble', 'time') @@ -204,6 +207,7 @@ save_forecast <- function(data_cube, save_observations <- function(data_cube, recipe, + dictionary, outdir, agg="global") { # Loops over the years in the s2dv_cube containing the observations and @@ -247,15 +251,15 @@ save_observations <- function(data_cube, } # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name if (tolower(agg) == "country") { dims <- c('Country', 'time') var.expname <- paste0(variable, '_country') - var.sdname <- paste0("Country-Aggregated ", var.longname) + var.longname <- paste0("Country-Aggregated ", var.longname) var.units <- attr(data_cube$Variable, 'variable')$units } else { dims <- c(lalo, 'time') var.expname <- variable - var.sdname <- var.longname var.units <- attr(data_cube$Variable, 'variable')$units } @@ -325,6 +329,7 @@ save_observations <- function(data_cube, save_metrics <- function(skill, recipe, + dictionary, data_cube, outdir, agg="global") { @@ -355,14 +360,17 @@ save_metrics <- function(skill, for (i in 1:length(skill)) { ## TODO: create dictionary with proper metadata metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name if (tolower(agg) == "country") { sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') } else { - sdname <- paste0(metric, " grid point metric") # formerly names(metric) + sdname <- paste0(metric, " grid point metric") dims <- c(lalo, 'time') } - metadata <- list(metric = list(name = metric, standard_name = sdname)) + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -413,6 +421,7 @@ save_metrics <- function(skill, save_corr <- function(skill, recipe, + dictionary, data_cube, outdir, agg="global") { @@ -443,6 +452,7 @@ save_corr <- function(skill, for (i in 1:length(skill)) { ## TODO: create dictionary with proper metadata metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name if (tolower(agg) == "country") { sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'ensemble', 'time') @@ -450,7 +460,9 @@ save_corr <- function(skill, sdname <- paste0(metric, " grid point metric") # formerly names(metric) dims <- c(lalo, 'ensemble', 'time') } - metadata <- list(metric = list(name = metric, standard_name = sdname)) + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -500,10 +512,10 @@ save_corr <- function(skill, } save_percentiles <- function(percentiles, - recipe, - data_cube, - outdir, - agg="global") { + recipe, + data_cube, + outdir, + agg="global") { # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. @@ -523,14 +535,13 @@ save_percentiles <- function(percentiles, ## TODO: create dictionary with proper metadata ## TODO: replace with proper standard names percentile <- names(percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") if (tolower(agg) == "country") { - sdname <- paste0(gsub("^.*_", "", percentile), "th percentile") dims <- c('Country', 'time') } else { - sdname <- paste0(gsub("^.*_", "", percentile), "th percentile") dims <- c(lalo, 'time') } - metadata <- list(metric = list(name = percentile, standard_name = sdname)) + metadata <- list(metric = list(name = percentile, long_name = long_name)) attr(percentiles[[i]], 'variables') <- metadata names(dim(percentiles[[i]])) <- dims } @@ -636,14 +647,13 @@ save_probabilities <- function(probs, ## TODO: create dictionary with proper metadata ## TODO: replace with proper standard names prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") if (tolower(agg) == "country") { - sdname <- paste0(prob_bin, " probability category") dims <- c('Country', 'time') } else { - sdname <- paste0(prob_bin, " probability category") dims <- c(lalo, 'time') } - metadata <- list(metric = list(name = prob_bin, standard_name = sdname)) + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) attr(probs_syear[[bin]], 'variables') <- metadata names(dim(probs_syear[[bin]])) <- dims # is this necessary? } -- GitLab From df713cbe3a94fd232a3c1a5fa7fa7f4029100570 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 12:04:06 +0200 Subject: [PATCH 63/68] update recipes --- modules/Loading/testing_recipes/recipe_1.yml | 5 ++- modules/Loading/testing_recipes/recipe_2.yml | 3 +- modules/Loading/testing_recipes/recipe_3.yml | 3 +- modules/Loading/testing_recipes/recipe_6.yml | 3 +- .../testing_recipes/recipe_decadal.yml | 3 +- .../testing_recipes/recipe_decadal_daily.yml | 3 +- .../recipe_unit-test-daily.yml | 44 ------------------- 7 files changed, 14 insertions(+), 50 deletions(-) delete mode 100644 modules/Loading/testing_recipes/recipe_unit-test-daily.yml diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml index cc142aa4..71e386fa 100644 --- a/modules/Loading/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -33,9 +33,12 @@ Analysis: method: qmap # Mandatory, str: Calibration method. See docu. Skill: metric: RPSS FRPSS # str: Skill metric or list of skill metrics. See docu. - prob: [[1/3, 2/3], [1/10, 9/10]] # frac: Probability bins + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. Indicators: index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/Loading/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml index 9bb2f8a7..27cdccdc 100644 --- a/modules/Loading/testing_recipes/recipe_2.yml +++ b/modules/Loading/testing_recipes/recipe_2.yml @@ -32,7 +32,8 @@ Analysis: method: raw Skill: metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS - prob: [[1/3, 2/3]] + Probabilities: + percentiles: [[1/3, 2/3]] Indicators: index: no Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml index 014abe9e..233c14eb 100644 --- a/modules/Loading/testing_recipes/recipe_3.yml +++ b/modules/Loading/testing_recipes/recipe_3.yml @@ -32,7 +32,8 @@ Analysis: method: qmap Skill: metric: FRPS RPSS - prob: [[1/3, 2/3], [1/10, 9/10]] + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_6.yml b/modules/Loading/testing_recipes/recipe_6.yml index bc8a9d27..b1829d22 100644 --- a/modules/Loading/testing_recipes/recipe_6.yml +++ b/modules/Loading/testing_recipes/recipe_6.yml @@ -32,7 +32,8 @@ Analysis: method: mse_min Skill: metric: CRPS CRPSS FCRPS FCRPSS FRPS_Specs - prob: [[1/3, 2/3], [1/10, 9/10]] + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: no Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml index 0e9e1851..bcbcc736 100644 --- a/modules/Loading/testing_recipes/recipe_decadal.yml +++ b/modules/Loading/testing_recipes/recipe_decadal.yml @@ -33,7 +33,8 @@ Analysis: method: bias Skill: metric: RPSS Corr - prob: [[1/3, 2/3]] + Probabilities: + percentiles: [[1/3, 2/3]] Indicators: index: FALSE Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml index 4f7608cf..457dccf6 100644 --- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml +++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml @@ -33,7 +33,8 @@ Analysis: method: qmap Skill: metric: RPSS FRPSS - prob: [[1/3, 2/3], [1/10, 9/10]] + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/modules/Loading/testing_recipes/recipe_unit-test-daily.yml b/modules/Loading/testing_recipes/recipe_unit-test-daily.yml deleted file mode 100644 index 1ce56fad..00000000 --- a/modules/Loading/testing_recipes/recipe_unit-test-daily.yml +++ /dev/null @@ -1,44 +0,0 @@ -Description: - Author: V. Agudetse - Info: Unit test for daily data, regridding to system - -Analysis: - Horizon: seasonal - Variables: - name: tas - freq: daily_mean - Datasets: - System: - name: system5c3s - Multimodel: no - Reference: - name: era5 - Time: - sdate: - fcst_syear: '2020' - fcst_sday: '1101' - hcst_start: '1993' - hcst_end: '1996' - leadtimemin: 0 - leadtimemax: 0 - Region: - latmin: -2 - latmax: 2 - lonmin: 0 - lonmax: 4 - Regrid: - method: bilinear - type: to_system - Workflow: - Calibration: - method: qmap - Skill: - metric: RPSS - Indicators: - index: no - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 2f40b5266431e385b056dc1bd96e6b972a584e68 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 12:04:56 +0200 Subject: [PATCH 64/68] Update unit tests --- tests/recipes/recipe-decadal_daily_1.yml | 3 ++- tests/recipes/recipe-decadal_monthly_1.yml | 3 ++- tests/recipes/recipe-decadal_monthly_2.yml | 3 ++- tests/testthat/test-decadal_monthly_1.R | 6 ++---- tests/testthat/test-decadal_monthly_2.R | 6 ++---- tests/testthat/test-seasonal_daily.R | 3 +-- tests/testthat/test-seasonal_monthly.R | 3 +-- 7 files changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index 119facb9..de2c9288 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -33,7 +33,8 @@ Analysis: method: qmap Skill: metric: RPSS - prob: [[1/10, 9/10]] + Probabilities: + percentiles: [[1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 02e3e349..577fdb89 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -33,7 +33,8 @@ Analysis: method: bias Skill: metric: RPSS - prob: [[1/3, 2/3], [1/10, 9/10]] + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 6a5fc617..92f1553d 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -33,7 +33,8 @@ Analysis: method: raw Skill: metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS - prob: [[1/3, 2/3]] + Probabilities: + percentiles: [[1/3, 2/3]] Indicators: index: FALSE Output_format: S2S4E diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index e771cf29..51d10f1b 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -22,12 +22,10 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe, - na.rm = T, ncores = 4) +probs <- compute_probabilities(calibrated_data$hcst, recipe) ))}) #====================================== diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index a3325a7f..02aabdf1 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -23,12 +23,10 @@ suppressWarnings({invisible(capture.output( # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) ))}) suppressWarnings({invisible(capture.output( -probs <- compute_probabilities(calibrated_data$hcst, recipe, - na.rm = T, ncores = 4) +probs <- compute_probabilities(calibrated_data$hcst, recipe) ))}) #====================================== diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 1ff81f5c..c37b5514 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -20,8 +20,7 @@ calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) ))}) test_that("1. Loading", { diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 0aa33b22..05c1c411 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -20,8 +20,7 @@ calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics suppressWarnings({invisible(capture.output( -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) ))}) test_that("1. Loading", { -- GitLab From 0b632bd53211e6e8f95d4683ba2cd64832e89f5a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 12:16:01 +0200 Subject: [PATCH 65/68] Add daily seasonal unit test --- tests/test_seasonal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R index 07937875..4718e3d4 100644 --- a/tests/test_seasonal.R +++ b/tests/test_seasonal.R @@ -1,7 +1,7 @@ library(testthat) path_testthat <- file.path('./tests/testthat/') -files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal_monthly') +files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') for (i_file in 1:length(files_testthat)) { source(paste0('./tests/testthat/', files_testthat[i_file])) -- GitLab From d474de782bc8ece38213bd7fe57f98fe7cd4f75d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 14:39:22 +0200 Subject: [PATCH 66/68] Change dimensions of time attributete for daily hcst and fcst --- modules/Loading/Loading.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index c017e0f4..8f0c4626 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -132,7 +132,7 @@ load_datasets <- function(recipe_file) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - if (recipe$Analysis$Variables$freq == "daily_mean"){ + if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows # multidim split names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" @@ -141,6 +141,13 @@ load_datasets <- function(recipe_file) { latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(hcst))] <- dim(hcst) dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims } # Convert hcst to s2dv_cube object @@ -186,6 +193,13 @@ load_datasets <- function(recipe_file) { latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(fcst))] <- dim(fcst) dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims } # Convert fcst to s2dv_cube -- GitLab From 701b7c5b56885f84889adb7212596f30b0f15e69 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 14:41:47 +0200 Subject: [PATCH 67/68] Change output directory in seasonal daily unit test --- tests/recipes/recipe-seasonal_daily_1.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index 7e31e03e..fc1bc58c 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -38,5 +38,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: ./out-logs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From a3b1d477f9d958fe1385746af71c60581f2ca79a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 Aug 2022 15:41:38 +0200 Subject: [PATCH 68/68] Remove some TODOs, reorganize condition in filename creation --- modules/Saving/Saving.R | 11 ----------- modules/Saving/paths2save.R | 8 ++++---- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index e1fc1828..1c56e459 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -90,7 +90,6 @@ get_latlon <- function(latitude, longitude) { # Adds dimensions and metadata to lat and lon # latitude: array containing the latitude values # longitude: array containing the longitude values - ## TODO: Extract metadata from s2dv_cube dim(longitude) <- length(longitude) metadata <- list(longitude = list(units = 'degrees_east')) @@ -347,8 +346,6 @@ save_metrics <- function(skill, lalo <- c('longitude', 'latitude') # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { - ## TODO: Implement metrics with additional non-singleton dimensions - ## e.g. 'ensemble' in ensemble correlation. skill <- lapply(skill, function(x) { Reorder(x, c(lalo, 'time'))}) } @@ -358,7 +355,6 @@ save_metrics <- function(skill, attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { - ## TODO: create dictionary with proper metadata metric <- names(skill[i]) long_name <- dictionary$metrics[[metric]]$long_name if (tolower(agg) == "country") { @@ -429,7 +425,6 @@ save_corr <- function(skill, # and exports it to a netCDF file inside 'outdir'. # Select ensemble correlation from the list of metrics - ## TODO: Move condition to wrapper if ("corr" %in% names(skill)) { corr_metrics <- grep("^corr", names(skill)) skill <- skill[corr_metrics] @@ -439,8 +434,6 @@ save_corr <- function(skill, lalo <- c('longitude', 'latitude') # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { - ## TODO: Implement metrics with additional non-singleton dimensions - ## e.g. 'ensemble' in ensemble correlation. skill <- lapply(skill, function(x) { Reorder(x, c(lalo, 'ensemble', 'time'))}) } @@ -450,7 +443,6 @@ save_corr <- function(skill, attr(skill[[1]], 'global_attrs') <- global_attributes for (i in 1:length(skill)) { - ## TODO: create dictionary with proper metadata metric <- names(skill[i]) long_name <- dictionary$metrics[[metric]]$long_name if (tolower(agg) == "country") { @@ -532,7 +524,6 @@ save_percentiles <- function(percentiles, attr(percentiles[[1]], 'global_attrs') <- global_attributes for (i in 1:length(percentiles)) { - ## TODO: create dictionary with proper metadata ## TODO: replace with proper standard names percentile <- names(percentiles[i]) long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") @@ -644,8 +635,6 @@ save_probabilities <- function(probs, ## TODO: Replace for loop with something more efficient? for (bin in 1:length(probs_syear)) { - ## TODO: create dictionary with proper metadata - ## TODO: replace with proper standard names prob_bin <- names(probs_syear[bin]) long_name <- paste0(prob_bin, " probability category") if (tolower(agg) == "country") { diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index c243fb39..4974cbd8 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -5,12 +5,12 @@ get_filename <- function(dir, var, date, fcst.sdate, agg, horizon, file.type) { # variable, forecast date, startdate, aggregation, forecast horizon and # type of metric/forecast/probability. - if (horizon == "seasonal") { - shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%m") - dd <- "month" - } else { + if (horizon == "subseasonal") { shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%V") dd <- "week" + } else { + shortdate <- format(as.Date(as.character(fcst.sdate), "%Y%m%d"), "%m") + dd <- "month" } switch(tolower(agg), -- GitLab