From 70f94cdf152bc9a8b04d39b795b4ec16eaf16096 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 3 Mar 2023 10:36:40 +0100 Subject: [PATCH 01/52] Change one CSDownscale function for testing, fix Viz module --- modules/Downscaling/Downscaling.R | 14 +++++++------- .../recipe_system5c3s-tas_downscaling.yml | 18 +++++++++--------- modules/Visualization/Visualization.R | 5 ----- modules/test_seasonal.R | 10 ++++++---- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 1f814b74..4c32d09a 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -9,12 +9,12 @@ #░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intbc.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intlr.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Analogs.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/LogisticReg.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Utils.R') #source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") @@ -257,6 +257,6 @@ downscale_datasets <- function(recipe, data) { } } print(DOWNSCAL_MSG) - return(list(hcst = hcst_downscal, fcst = NULL)) + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index 0cad69ec..b5d57c6f 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -19,10 +19,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: no Reference: - name: era5 + name: ERA5 Time: sdate: '0501' hcst_start: '2000' @@ -47,16 +47,16 @@ Analysis: # Assumption 1: leave-one-out cross-validation is always applied # Assumption 2: for analogs, we select the best analog (minimum distance) # TO DO: add downscaling to point locations - type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: # optional, regridding method accepted by CDO - bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: # optional, 'basic', 'large_scale', '4nn' - log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: conservative # optional, regridding method accepted by CDO + bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' + lr_method: basic # optional, 'basic', 'large_scale', '4nn' + log_reg_method: ens_mean # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO nanalogs: 4 # optional, number of analogs to be searched Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index e7236da3..9e453bdc 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -48,11 +48,6 @@ plot_data <- function(recipe, significance) } - # Plot downscaled data - if (!is.null(downscaled_data)) { - plot_ensemble_mean(recipe, archive, downscaled_data$hcst$exp, outdir) - } - # Plot forecast ensemble mean if (!is.null(data$fcst)) { plot_ensemble_mean(recipe, archive, data$fcst, outdir) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 44394cbd..bf784c1f 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,20 +1,22 @@ -setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") +source("modules/Downscaling/Downscaling.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" recipe <- prepare_outputs(recipe_file) # Load datasets data <- load_datasets(recipe) +# Downscale datasets +data <- downscale_datasets(recipe, data) # Calibrate datasets -calibrated_data <- calibrate_datasets(recipe, data) +# data <- calibrate_datasets(recipe, data) # Compute anomalies -calibrated_data <- compute_anomalies(recipe, calibrated_data) +# calibrated_data <- compute_anomalies(recipe, calibrated_data) # Compute skill metrics skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins -- GitLab From c524fbe95c959abe226c145810b8640231cc6b2a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 3 Mar 2023 16:14:08 +0100 Subject: [PATCH 02/52] Restructure downscaling checks (WIP) --- modules/Downscaling/Downscaling.R | 186 +++++++++++++++++------------- 1 file changed, 103 insertions(+), 83 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 4c32d09a..1e8fb5d9 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -9,6 +9,7 @@ #░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN +## TODO: Move recipe checks to check_recipe() source('/esarchive/scratch/vagudets/repos/csdownscale/R/Interpolation.R') source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intbc.R') source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intlr.R') @@ -29,56 +30,60 @@ downscale_datasets <- function(recipe, data) { type <- tolower(recipe$Analysis$Workflow$Downscaling$type) if (!is.null(data$fcst)) { - warning("The downscaling will be only performed to the hindcast data") + warn(recipe$Run$logger, + "The downscaling will be only performed to the hindcast data") data$fcst <- NULL } if (type == "none") { - hcst_downscal <- data$hcst DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" - } else { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) - log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + logreg_method <- tolower(recipe$Analysis$Workflow$Downscaling$logreg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) - + ## TODO: Compute number of cores if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { ncores <- recipe$Analysis$ncores } - - #TO DO: add the parametre loocv where it corresponds + ## TODO: add the parametre loocv where it corresponds if (is.null(recipe$Analysis$loocv)) { loocv <- TRUE } else { loocv <- recipe$Analysis$loocv } - + # Define downscaling options DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") LR_METHODS <- c("basic", "large_scale", "4nn") - LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") if (!(type %in% DOWNSCAL_TYPES)) { - stop("Downscaling type in the recipe is not available. Accepted types ", - "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") - } - + error(recipe$Run$logger, + paste("Downscaling type in the recipe is not available.", + "Accepted entries are:", DOWNSCAL_TYPES)) + } + # Interpolation if (type == "int") { - if (is.null(int_method)) { - stop("Please provide one interpolation method in the recipe.") + # Check method + if (length(int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) } - + # Check target grid if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "target grid is provided in the recipe.")) } - + ## TODO: Move this check elsewhere # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the # observations @@ -86,6 +91,7 @@ downscale_datasets <- function(recipe, data) { lonmin <- data$hcst$lon[1] latmax <- data$hcst$lat[length(data$hcst$lat)] lonmax <- data$hcst$lon[length(data$hcst$lon)] + # Interpolate hcst hcst_downscal <- CST_Interpolation(data$hcst, points = NULL, method_remap = int_method, @@ -94,7 +100,7 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) - + # Interpolate obs obs_downscal <- CST_Interpolation(data$obs, points = NULL, method_remap = int_method, @@ -103,29 +109,34 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) - hcst_downscal$obs <- obs_downscal$exp - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intbc") { if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no interpolation method is provided.")) + stop() } - - if (is.null(bc_method)) { - stop("Please provide one bias-correction method in the recipe. Accepted ", - "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") + if (length(bc_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + stop() ## "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + if (length(target_grid) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no target grid is provided.")) + stop() } - if (!(bc_method %in% BC_METHODS)) { - stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", - "are 'simple_bias', 'calibration', 'quantile_mapping'.")) + error(recipe$Run$logger, + paste0(bc_method, " method in the recipe is not available.", + "The available methods are:", BC_METHODS)) + stop() } - + # Interpolate hcst and obs with bias correction hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, @@ -138,28 +149,32 @@ downscale_datasets <- function(recipe, data) { member_dim = "ensemble", region = NULL, ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intlr") { + ## TODO: Logger if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no interpolation method is provided.")) + stop() } - - if (is.null(lr_method)) { - stop("Please provide one linear regression method in the recipe. Accepted ", - "methods are 'basic', 'large_scale', '4nn'.") + if (length(lr_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method is provided.")) + stop() } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + if (length(target_grid) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no target grid is provided.")) + stop() } - if (!(lr_method %in% LR_METHODS)) { - stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", - "are 'basic', 'large_scale', '4nn'.")) + error(recipe$Run$logger, + paste0("The accepted linear regression methods are:", LR_METHODS)) + stop() } - - # TO DO: add the possibility to have the element 'pred' in 'data' + ## TODO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { stop("Please provide the large scale predictors in the element 'data$pred$data'.") @@ -167,7 +182,7 @@ downscale_datasets <- function(recipe, data) { } else { data$pred$data <- NULL } - + # Interpolate hcst and obs with linear regression hcst_downscal <- CST_Intlr(data$hcst, data$obs, lr_method = lr_method, target_grid = target_grid, @@ -175,25 +190,23 @@ downscale_datasets <- function(recipe, data) { int_method = int_method, method_point_interp = NULL, predictors = data$pred$data, - lat_dim = "latitude", + lat_dim = "latitude", lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", + sdate_dim = "syear", + time_dim = "time", member_dim = "ensemble", large_scale_predictor_dimname = 'vars', loocv = loocv, - region = NULL, + region = NULL, ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { - - if (is.null(nanalogs)) { - warning("The number of analogs for searching has not been provided in the ", - "recipe. Setting it to 3.") + if (length(nanalogs) == 0) { + warn(recipe$Run$logger, + paste("The number of analogs for searching has not been provided", + "in the recipe. Setting it to 3.")) nanalogs <- 3 } - + # Apply analogs method to hcst and obs hcst_downscal <- CST_Analogs(data$hcst, data$obs, grid_exp = data$hcst$source_files[1], nanalogs = nanalogs, @@ -207,36 +220,43 @@ downscale_datasets <- function(recipe, data) { return_indices = FALSE, loocv_window = loocv, ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "logreg") { - if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method is provided.")) + stop() } - - if (is.null(log_reg_method)) { - stop("Please provide one logistic regression method in the recipe. Accepted ", - "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + if (length(logreg_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no logistic regression method is provided. The accepted", + "methods are:", LOG_REG_METHODS)) + stop() } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + if (length(target_grid) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no target grid is provided.")) + stop() } - + ## Keep this check here # Since we are forcing to create three categories, and applying cross-validation, # we need at least six years of data for the logistic regression function to not # crash - if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { - stop("The number of start dates is insufficient for the logisitic regression method. ", - "Please provide six or more.") + if (dim(data$hcst$data)[["syear"]] < 6) { + error(recipe$Run$logger, + paste("The number of start dates is insufficient for the", + "logistic regression method. Please provide six or more.")) + stop() } - - if (!(log_reg_method %in% LOG_REG_METHODS)) { - stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", - "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) + if (!(logreg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste("The accepted methos for logistic regression are:", + LOGREG_METHODS)) + stop() } - + # Apply logistic regression to hcst and obs hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, target_grid = target_grid, int_method = int_method, @@ -253,10 +273,10 @@ downscale_datasets <- function(recipe, data) { loocv = loocv, ncores = ncores) - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } - } - print(DOWNSCAL_MSG) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + info(recipe$Run$logger, DOWNSCAL_MSG) return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } -- GitLab From a78ef83f6cb30dc7f567e8ab3e1172cc7f840f73 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Mar 2023 10:46:49 +0100 Subject: [PATCH 03/52] Rewrite downscaling checks --- modules/Downscaling/Downscaling.R | 12 ++++++------ .../recipe_system5c3s-tas_downscaling.yml | 6 +++--- modules/test_seasonal.R | 12 ++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 1e8fb5d9..f2a51b48 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -10,12 +10,12 @@ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN ## TODO: Move recipe checks to check_recipe() -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Interpolation.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intbc.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intlr.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Analogs.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/LogisticReg.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') #source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index b5d57c6f..99e02c7d 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -50,10 +50,10 @@ Analysis: type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' int_method: conservative # optional, regridding method accepted by CDO bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: basic # optional, 'basic', 'large_scale', '4nn' - log_reg_method: ens_mean # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + lr_method: # optional, 'basic', 'large_scale', '4nn' + log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO - nanalogs: 4 # optional, number of analogs to be searched + nanalogs: # optional, number of analogs to be searched Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index bf784c1f..1f3b8b9b 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -14,15 +14,15 @@ data <- load_datasets(recipe) # Downscale datasets data <- downscale_datasets(recipe, data) # Calibrate datasets -# data <- calibrate_datasets(recipe, data) +data <- calibrate_datasets(recipe, data) # Compute anomalies -# calibrated_data <- compute_anomalies(recipe, calibrated_data) +# data <- compute_anomalies(recipe, data) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- compute_skill_metrics(recipe, data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data) +# probabilities <- compute_probabilities(recipe, data) # Export all data to netCDF -save_data(recipe, calibrated_data, skill_metrics, probabilities) +save_data(recipe, data, skill_metrics, probabilities) # Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, +plot_data(recipe, data, skill_metrics, probabilities, significance = T) -- GitLab From 3a788ac65c3d2dca10e6a9356f95e8084b4583b7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Mar 2023 16:12:15 +0100 Subject: [PATCH 04/52] Move checks from Downscaling.R to recipe checker --- modules/Downscaling/Downscaling.R | 97 +---------------------------- tools/check_recipe.R | 100 +++++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 95 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index f2a51b48..d8522b65 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -64,25 +64,8 @@ downscale_datasets <- function(recipe, data) { LR_METHODS <- c("basic", "large_scale", "4nn") LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") - if (!(type %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, - paste("Downscaling type in the recipe is not available.", - "Accepted entries are:", DOWNSCAL_TYPES)) - } # Interpolation if (type == "int") { - # Check method - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) - } - # Check target grid - if (is.null(target_grid)) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "target grid is provided in the recipe.")) - } ## TODO: Move this check elsewhere # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the @@ -112,31 +95,7 @@ downscale_datasets <- function(recipe, data) { hcst_downscal$obs <- obs_downscal$exp } else if (type == "intbc") { - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no interpolation method is provided.")) - stop() - } - if (length(bc_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) - stop() ## "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") - } - if (length(target_grid) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no target grid is provided.")) - stop() - } - if (!(bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, - paste0(bc_method, " method in the recipe is not available.", - "The available methods are:", BC_METHODS)) - stop() - } - # Interpolate hcst and obs with bias correction + # Interpolate hcst and obs with bias correction hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, @@ -150,30 +109,6 @@ downscale_datasets <- function(recipe, data) { region = NULL, ncores = ncores) } else if (type == "intlr") { - ## TODO: Logger - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no interpolation method is provided.")) - stop() - } - if (length(lr_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method is provided.")) - stop() - } - if (length(target_grid) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no target grid is provided.")) - stop() - } - if (!(lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted linear regression methods are:", LR_METHODS)) - stop() - } ## TODO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { @@ -201,7 +136,7 @@ downscale_datasets <- function(recipe, data) { ncores = ncores) } else if (type == "analogs") { if (length(nanalogs) == 0) { - warn(recipe$Run$logger, + info(recipe$Run$logger, paste("The number of analogs for searching has not been provided", "in the recipe. Setting it to 3.")) nanalogs <- 3 @@ -221,41 +156,15 @@ downscale_datasets <- function(recipe, data) { loocv_window = loocv, ncores = ncores) } else if (type == "logreg") { - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method is provided.")) - stop() - } - if (length(logreg_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no logistic regression method is provided. The accepted", - "methods are:", LOG_REG_METHODS)) - stop() - } - if (length(target_grid) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no target grid is provided.")) - stop() - } - ## Keep this check here # Since we are forcing to create three categories, and applying cross-validation, # we need at least six years of data for the logistic regression function to not # crash if (dim(data$hcst$data)[["syear"]] < 6) { error(recipe$Run$logger, - paste("The number of start dates is insufficient for the", + paste("The number of years of data is insufficient for the", "logistic regression method. Please provide six or more.")) stop() } - if (!(logreg_method %in% LOGREG_METHODS)) { - error(recipe$Run$logger, - paste("The accepted methos for logistic regression are:", - LOGREG_METHODS)) - stop() - } # Apply logistic regression to hcst and obs hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, target_grid = target_grid, diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 964c4446..617bd169 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -222,7 +222,9 @@ check_recipe <- function(recipe) { # WORKFLOW CHECKS # --------------------------------------------------------------------- - # Only one Calibration method allowed: + # Calibration + # If 'method' is FALSE/no/'none' or NULL, set to 'raw' + ## TODO: Review this check if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || @@ -257,6 +259,102 @@ check_recipe <- function(recipe) { error_status <- T } } + # Downscaling + ## TODO: Simplify checks (reduce number of lines) + downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) + # Define accepted entries + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") + LR_METHODS <- c("basic", "large_scale", "4nn") + LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + # Check downscaling type + ## TODO: Consider 'NULL' case + if (!((downscal_params$type) %in% DOWNSCAL_TYPES)) { + error(recipe$Run$logger, + paste0("The type of Downscaling request in the recipe is not ", + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) + error_status <- T + } + if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && + (is.null(downscal_params$target_grid))) { + error(recipe$Run$logger, + paste("A target grid is required for the downscaling method", + "requested in the recipe.")) + error_status <- T + } + if (downscal_params$type == "int") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) + error_status <- T + } + } else if (downscal_params$type == "intbc") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested in the recipe, but no", + "interpolation method is provided.")) + error_status <- T + } + if (is.null(downscal_params$bc_method)) { + error(recipe$Run$logger, + paste0("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + error_status <- T + } else if (!(downscal_params$bc_method %in% BC_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted Bias Correction methods for the downscaling", + "module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "intlr") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (is.null(downscal_params$lr_method)) { + error(recipe$Run$logger, + paste0("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method was provided.")) + error_status <- T + } else if (!(downscal_params$lr_method %in% LR_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted linear regression methods for the", + "downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "analogs") { + if (length(nanalogs) == 0) { + warn(recipe$Run$logger, + paste("Downscaling type is 'analogs, but the number of analogs", + "has not been provided in the recipe.")) + } + } else if (downscal_params$type == "logreg") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (is.null(downscal_params$log_reg_method)) { + error(recipe$Run$logger, + paste0("Downscaling type 'logreg' was requested in the recipe, but", + "no logistic regression method is provided. The accepted", + "methods are:", + paste(LOG_REG_METHODS, collapse = ", "), ".")) + error_status <- T + } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted logistic regression methods for the ", + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T + } + } # Skill if (("Skill" %in% names(recipe$Analysis$Workflow)) && (is.null(recipe$Analysis$Workflow$Skill$metric))) { -- GitLab From bdbe693e7dbd9f6f90288caaac8ddda0b9087971 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 11:50:22 +0100 Subject: [PATCH 05/52] Refine downscaling checks --- tools/check_recipe.R | 166 ++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 617bd169..b2c62e44 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,8 +1,6 @@ check_recipe <- function(recipe) { - # recipe: yaml recipe already read it - ## TODO: Adapt to decadal case - + ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- @@ -268,92 +266,97 @@ check_recipe <- function(recipe) { LR_METHODS <- c("basic", "large_scale", "4nn") LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") # Check downscaling type - ## TODO: Consider 'NULL' case - if (!((downscal_params$type) %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, - paste0("The type of Downscaling request in the recipe is not ", - "available. It must be one of the following: ", - paste(DOWNSCAL_TYPES, collapse = ", "), ".")) - error_status <- T - } - if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && - (is.null(downscal_params$target_grid))) { - error(recipe$Run$logger, - paste("A target grid is required for the downscaling method", - "requested in the recipe.")) - error_status <- T - } - if (downscal_params$type == "int") { - if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) - error_status <- T - } - } else if (downscal_params$type == "intbc") { - if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested in the recipe, but no", - "interpolation method is provided.")) - error_status <- T - } - if (is.null(downscal_params$bc_method)) { - error(recipe$Run$logger, - paste0("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) - error_status <- T - } else if (!(downscal_params$bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted Bias Correction methods for the downscaling", - "module are: ", paste(BC_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "intlr") { - if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no interpolation method was provided.")) - error_status <- T - } - if (is.null(downscal_params$lr_method)) { - error(recipe$Run$logger, - paste0("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method was provided.")) - error_status <- T - } else if (!(downscal_params$lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted linear regression methods for the", - "downscaling module are: ", - paste(LR_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "analogs") { - if (length(nanalogs) == 0) { + if ("type" %in% names(downscal_params)) { + if (length(downscal_params$type) == 0) { + downscal_params$type <- "none" warn(recipe$Run$logger, - paste("Downscaling type is 'analogs, but the number of analogs", - "has not been provided in the recipe.")) + paste("Downscaling 'type' is empty in the recipe, setting it to", + "'none'.")) } - } else if (downscal_params$type == "logreg") { - if (is.null(downscal_params$int_method)) { + if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method was provided.")) + paste0("The type of Downscaling request in the recipe is not ", + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) error_status <- T } - if (is.null(downscal_params$log_reg_method)) { + if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && + (length(downscal_params$target_grid) == 0)) { error(recipe$Run$logger, - paste0("Downscaling type 'logreg' was requested in the recipe, but", - "no logistic regression method is provided. The accepted", - "methods are:", - paste(LOG_REG_METHODS, collapse = ", "), ".")) - error_status <- T - } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted logistic regression methods for the ", - "downscaling module are: ", - paste(LOGREG_METHODS, collapse = ", "), ".")) + paste("A target grid is required for the downscaling method", + "requested in the recipe.")) error_status <- T } + if (downscal_params$type == "int") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) + error_status <- T + } + } else if (downscal_params$type == "intbc") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested in the recipe, but no", + "interpolation method is provided.")) + error_status <- T + } + if (length(downscal_params$bc_method)== 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + error_status <- T + } else if (!(downscal_params$bc_method %in% BC_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted Bias Correction methods for the downscaling", + " module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "intlr") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (length(downscal_params$lr_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method was provided.")) + error_status <- T + } else if (!(downscal_params$lr_method %in% LR_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted linear regression methods for the", + " downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "analogs") { + if (length(nanalogs) == 0) { + warn(recipe$Run$logger, + paste("Downscaling type is 'analogs, but the number of analogs", + "has not been provided in the recipe.")) + } + } else if (downscal_params$type == "logreg") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (length(downscal_params$log_reg_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe,", + "but no logistic regression method is provided.")) + error_status <- T + } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted logistic regression methods for the ", + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T + } + } } # Skill if (("Skill" %in% names(recipe$Analysis$Workflow)) && @@ -380,6 +383,7 @@ check_recipe <- function(recipe) { # RUN CHECKS # --------------------------------------------------------------------- + ## TODO: These checks should probably go first RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") -- GitLab From e2a8a33bf99c67901e6e16bd982c02f70fd48f48 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 13:27:47 +0100 Subject: [PATCH 06/52] Add CSDownscale functions --- modules/Downscaling/tmp/Analogs.R | 537 +++++++++++++++++ modules/Downscaling/tmp/Intbc.R | 325 +++++++++++ modules/Downscaling/tmp/Interpolation.R | 739 ++++++++++++++++++++++++ modules/Downscaling/tmp/Intlr.R | 525 +++++++++++++++++ modules/Downscaling/tmp/LogisticReg.R | 497 ++++++++++++++++ modules/Downscaling/tmp/Utils.R | 31 + 6 files changed, 2654 insertions(+) create mode 100644 modules/Downscaling/tmp/Analogs.R create mode 100644 modules/Downscaling/tmp/Intbc.R create mode 100644 modules/Downscaling/tmp/Interpolation.R create mode 100644 modules/Downscaling/tmp/Intlr.R create mode 100644 modules/Downscaling/tmp/LogisticReg.R create mode 100644 modules/Downscaling/tmp/Utils.R diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R new file mode 100644 index 00000000..fd6e2f97 --- /dev/null +++ b/modules/Downscaling/tmp/Analogs.R @@ -0,0 +1,537 @@ +#'@rdname CST_Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an 's2dv' object with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv' object with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. +#'The object is expected to be already subset for the desired region. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'@export +CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", + lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", + region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = 1) { + + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, grid_exp = grid_exp, + nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, + region = region, return_indices = return_indices, loocv_window = loocv_window, + ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude, start date and time. The object +#'is expected to be already subset for the desired region. Optionally, 'obs' can have the +#'dimension 'window', containing the sampled fields into which the function will look for +#'the analogs. See function 'generate_window()'. Otherwise, the function will look for +#'analogs using all the possible fields contained in obs. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. +#'@param nanalogs an integer indicating the number of analogs to be searched. +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. The indices refer to the position of the +#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will +#'refer to the position of the element in the dimension 'window'. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@import multiApply +#'@import CSTools +#'@importFrom s2dv InsertDim CDORemap +#'@importFrom FNN get.knnx +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') +#'@export +Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs2 = NULL, + obs2_lats = NULL, obs2_lons = NULL, nanalogs = 3, fun_analog = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, return_indices = FALSE, + loocv_window = TRUE, ncores = 1) { + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(grid_exp, 'character')) { + stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + + if (!inherits(nanalogs, 'numeric')) { + stop("Parameter 'nanalogs' must be of the class 'numeric'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(time_dim, 'character')) { + stop("Parameter 'time_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) { + stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'time_dim'") + } + + # Ensure we have enough data to interpolate from high-res to coarse grid + #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | + # (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { + + # stop("There are not enough data in 'obs'. Please to add more latitudes or ", + # "longitudes.") + #} + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (!is.null(obs2)) { + # the code is not yet prepared to handle members in the observations + if (member_dim %in% names(dim(obs2))) { + if (identical(as.numeric(dim(obs2)[member_dim]), 1)) { + obs2 <- ClimProjDiags::Subset(x = obs2, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs2' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (is.null(obs2_lats) | is.null(obs2_lons)) { + stop("Missing latitudes and/or longitudes for the provided training observations. Please ", + "provide them with the parametres 'obs2_lats' and 'obs2_lons'") + } + + if (is.na(match(lon_dim, names(dim(obs2))))) { + stop("Missing longitude dimension in 'obs2', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(obs2))))) { + stop("Missing latitude dimension in 'obs2', or does not match the parameter 'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(obs2))))) { + stop("Missing start date dimension in 'obs2', or does not match the parameter 'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(obs2))))) { + stop("Missing time dimension in 'obs2', or does not match the parameter 'time_dim'") + } + } + + # Select a function to apply to the analogs selected for a given observation + if (!is.null(fun_analog)) { + stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + } + + if (!is.null(obs2)) { + obs_train <- obs2 + obs_train_lats <- obs2_lats + obs_train_lons <- obs2_lons + } else { + obs_train <- obs + obs_train_lats <- obs_lats + obs_train_lons <- obs_lons + } + + # Correct indices later if cross-validation + loocv_correction <- FALSE + if ( !("window" %in% names(dim(obs_train))) & loocv_window) { + loocv_correction <- TRUE + } + + #----------------------------------- + # Interpolate high-res observations to the coarse grid + #----------------------------------- + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and ", + "'exp_lons'.") + region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) + } + + obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, + target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = "conservative", region = region) + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + region = region)$data + } else { + exp_interpolated <- exp + } + + # Create window if user does not have it in the training observations + if ( !("window" %in% names(dim(obs_interpolated$data))) ) { + obs_train_interpolated <- generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, + time_dim = time_dim, loocv = loocv_window) + obs_hres <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) + } + + #----------------------------------- + # Reshape train and test + #----------------------------------- + res.data <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog), ncores = ncores)$output1 + + # Return the indices of the best analogs + if (return_indices) { + res.ind <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 + + # If cross-validation has been applied, correct the indices + if (loocv_correction) { + nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] + ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] + res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) + sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), + output_dims = c("ind", sdate_dim))$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats) + } + else { + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) + } + + return(res) +} + +# For each element in test, find the indices of the k nearest neigbhors in train +.analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { + + require(FNN) + + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) + # test dim: 2 dimensions lat and lon (in this order) + # Number of lats/lons of the high-resolution data + space_dims_hres <- dim(obs_hres)[c(2,3)] + + # Reformat train and test as an array with (time, points) + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + test <- as.vector(test) + obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" + + # Identify and remove NA's + idx_na_tr <- is.na(train[ , 1]) + idx_na_te <- is.na(test) + idx_na <- idx_na_tr | idx_na_te + tr_wo_na <- t(train[!idx_na , ]) + te_wo_na <- test[!idx_na] + te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") + + knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) + + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + + # Either return only the indices or the analogs + if (return_indices) { + res <- as.numeric(idx) + } else { + res <- obs_hres[ , idx] + dim(res) <- c(space_dims_hres, analogs = k) + + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + weight <- 1 / dist + res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else { + res <- apply(res, c(1,2), fun_analog) + } + } + } + + return(res) +} + +# Add the dimension window to an array that contains, at least, the start date and time +# dimensions +# object has at least dimensions sdate and time +generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { + + rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] + ntimes <- dim(obj)[names(dim(obj)) == time_dim] + rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] + + # Generate a window containing all the data + if (is.null(size)) { + + # Generate window removing one start date each time (leave-one-out cross-validation) + if (loocv) { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), + output_dims = c('window', sdate_dim))$output1 + # Generate window without cross-validation + } else { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = 'window')$output1 + } + } + # Generate window of the size specified by the user. Only applied with CV + else { + # For an accurate generation of the window, it is mandatory to add some "extra" data. + if (!("smonth" %in% names(dim(obj)))) { + stop("Missing 'smonth' dimension") + } + + # Concatenate data from previous, target and posterior months + obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), + fun = as.vector, output_dims = "time")$output1 + + if (loocv) { + obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), + fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), + output_dims = 'window')$output1 + names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) + } else { + obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), + output_dims = c('window', time_dim))$output1 + + } + } + + return(obj_window) +} + + + + + + + + diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R new file mode 100644 index 00000000..86bb5a9c --- /dev/null +++ b/modules/Downscaling/tmp/Intbc.R @@ -0,0 +1,325 @@ +#'@rdname CST_Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export + +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], + method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) + +} + +#'@rdname Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import CSTools +#' +#'@seealso \code{\link[CSTools]{BiasCorrection}} +#'@seealso \code{\link[CSTools]{Calibration}} +#'@seealso \code{\link[CSTools]{QuantileMapping}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = 1, ...) { + + if (!inherits(bc_method, 'character')) { + stop("Parameter 'bc_method' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") + } + + if (!(bc_method %in% c('sbc', 'cal', 'qm', 'dbc', 'simple_bias', 'calibration', + 'quantile_mapping', 'dynamical_bias'))) { + stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", + "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", + "'sbc', 'cal', 'qm' can also be used.") + } + + if (!is.null(points) & is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # Some functions only accept the dimension names "member" and "sdate" for exp and + # "sdate" for obs + #if (member_dim != 'member') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == member_dim), 'member') + #} + + #if (sdate_dim != 'sdate') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') + # names(dim(obs_ref)) <- replace(names(dim(obs_ref)), + # which(names(dim(obs_ref)) == sdate_dim), 'sdate') + #} + + if (bc_method == 'sbc' | bc_method == 'simple_bias') { + if (dim(obs_ref)[sdate_dim] == 1) { + warning('Simple Bias Correction should not be used with only one observation. Returning NA.') + } + + res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'cal' | bc_method == 'calibration') { + if (dim(exp_interpolated$data)[member_dim] == 1) { + stop('Calibration must not be used with only one ensemble member.') + } + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, + memb_dim = member_dim, sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # the temporal dimension must be only one dimension called "time" + if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { + exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = "time")$output1 + } + if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { + obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, + output_dims = "time")$output1 + } + # REMEMBER to add na.rm = T in colMeans in .proxiesattractor + res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) + } + + # Return a list of three elements + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + + + + diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R new file mode 100644 index 00000000..e594691f --- /dev/null +++ b/modules/Downscaling/tmp/Interpolation.R @@ -0,0 +1,739 @@ +#'@rdname CST_Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp s2dv object containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The field data is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An s2dv object containing the dowscaled field. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, + lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + #if (is.null(exp[[lat_dim]]) | is.null(exp[[lon_dim]])) { + # stop("The name of the latitude/longitude elements in 'exp' must match the parametres ", + # "'lat_dim' and 'lon_dim'") + #} + + if ((length(which(names(dim(exp$data)) == lat_dim)) == 0) | (length(which(names(dim(exp$data)) == lon_dim)) == 0)) { + stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") + } + + res <- Interpolation(exp = exp$data, lats = exp$lat, lons = exp$lon, + source_file = exp$source_files[1], points = points, + method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, + lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + res_s2dv <- list(exp = exp, obs = NULL) + return(res_s2dv) +} + +#'@rdname Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The object is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param lats a numeric vector containing the latitude values. Latitudes must range from +#'-90 to 90. +#'@param lons a numeric vector containing the longitude values. Longitudes can range from +#'-180 to 180 or from 0 to 360. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling +#'is to a point location. +#'@import multiApply +#'@import plyr +#'@importFrom s2dv CDORemap +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'res <- Interpolation(exp = exp, lats = lats, lons = lons, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, + target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!is.null(method_remap)) { + if (!inherits(method_remap, 'character')) { + stop("Parameter 'method_remap' must be of the class 'character'") + } + } + + if (!is.null(method_point_interp)) { + if (!inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") + } + + # Check for negative latitudes in the exp data + if (any(lats < -90 | lats > 90) ) { + stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") + } + + # checkings for the case of interpolation to point locations + if (!is.null(points)) { + if (!inherits(points, 'list')) { + stop("Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes.") + } + + if (is.null(method_point_interp)) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + if (!(method_point_interp %in% c('nearest', 'bilinear', '9point', 'invdist4nn', 'NE', 'NW', 'SE', 'SW'))) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + # Points must be a list of two elements + if (length(points) != 2) { + stop("'points' must be a lis of two elements containing the point ", + "latitudes and longitudes in the form 'points$lat', 'points$lon'") + } + + # The names of the two elements must be 'lat' and 'lon' + if (any(!(c(lat_dim, lon_dim) %in% names(points)))) { + stop("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'") + } + + # Check that the number of latitudes and longitudes match + if (length(unique(lengths(points))) != 1L) { + stop("The number of latitudes and longitudes must match") + } + + # Check for negative latitudes in the point coordinates + if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { + stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", + "-90 to 90") + } + + if (is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + } else { + if (is.null(method_remap)) { + stop("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(target_grid)) { + stop("Parameter 'target_grid' can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + } + + #---------------------------------- + # Limits of the region defined by the model data + #---------------------------------- + # for the case when region limits are not passed by the user + # regions contains the following elements in order: lonmin, lonmax, latmin, latmax + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'lats' and 'lons'.") + region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) + } + + # Ensure points to be within the region limits + if (!is.null(points)) { + if (any(points[[lat_dim]] > region[4]) | any(points[[lat_dim]] < region[3]) | + any(points[[lon_dim]] > region[2]) | any(points[[lon_dim]] < region[1])) { + stop("At least one of the points lies outside the model region") + } + } + + #---------------------------------- + # Map regrid with CDO + #---------------------------------- + if (is.null(points)) { + res <- CDORemap(data_array = exp, + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) + + # Return a list + res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) + + #---------------------------------- + # Interpolate to point locations + #---------------------------------- + } else { + # First create interpolation weights, depending on the chosen method + weights <- create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), + lats = points[[lat_dim]], lons = points[[lon_dim]], + method = method_point_interp, region = list(lat_min = region[3], + lat_max = region[4], lon_min = region[1], lon_max = region[2])) + + # Select coarse-scale data to be interpolated + model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp) + + # Interpolate model data to point locations + res <- interpolate_data(model_data_gridpoints, weights) + + # Return a list + res <- list(data = res, obs = NULL, lon = points[[lon_dim]], lat = points[[lat_dim]]) + } + + return(res) +} + +#====================== +# Compute weights for interpolation at several (lat,lon) positions +# We assume that grid boxes are centered in the grid point. +#====================== +create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, + method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", + "NW", "SE", "SW")) +{ + # crop the region to get the correct weights - save temporary file + nc_cropped1 <- paste0('tmp_cropped_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + nc_cropped2 <- paste0('tmp_cropped2_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + + system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, + ',', region$lat_max, ' ', ncfile, ' ', nc_cropped1)) + + #---------------- + # Read grid description and compute (i,j) of requested locations (including decimals) + #---------------- + griddes <- get_griddes(nc_cropped1) + + if (is.null(griddes$yinc)) { + system(paste0('rm ', nc_cropped1)) + stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", + "downscaling to point locations.") + } + + # If latitudes are decreasingly ordered, revert them + if (griddes$yinc < 0) { + system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) + griddes <- get_griddes(nc_cropped2) + } + # remove temporary files + system(paste0('rm ', nc_cropped1)) + system(paste0('rm ', nc_cropped2)) + + if (is.null(griddes)) { + stop("'griddes' not found in the NetCDF source files") + } + + gridpoints <- latlon2ij(griddes, lats, lons) + + #---------------- + # Compute the weights according to the selected method + #---------------- + if(method == "nearest") { + #---------------- + # Round i and j to closest integer. Weight is always 1. + #---------------- + + # | | | + # -+-----+-----+- + # | x| | + # | a | | + # | | | + # -+-----+-----+- + # | | | + + centeri <- round(gridpoints$i,0) + centeri[centeri == griddes$xsize+1] <- 1 # close longitudes + + weights.df <- data.frame(locid = locids, + lat = lats, + lon = lons, + rawi = gridpoints$i, + rawj = gridpoints$j, + i = centeri, + j = round(gridpoints$j, 0), + weight = 1) + } else if (method %in% c("bilinear","invdist4nn")) { + #---------------- + # Get the (i,j) coordinates of the 4 points (a,b,c,d) around x. + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + lowi <- floor(gridpoints$i) + highi <- ceiling(gridpoints$i) + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- floor(gridpoints$j) + highj <- ceiling(gridpoints$j) + # Note: highi and lowi are the same if i is integer + # Note: highj and lowj are the same if j is integer + + #---------------- + # Get x position wrt ad and ab axes (from 0 to 1) + #---------------- + pcti <- gridpoints$i - lowi + pctj <- gridpoints$j - lowj + + #---------------- + # Compute weights for a,b,c,d grid points + #---------------- + if(method == "bilinear") { + wa = (1 - pcti) * (1 - pctj) + wb = (1 - pcti) * pctj + wc = pcti * pctj + wd = pcti * (1 - pctj) + } else if(method == "invdist4nn") { + #---------------- + # Note: the distance is computed in the (i,j) space. + # Note2: this method does not guarantees a continuous interpolation. + # Use bilinear if that's desirable. + # When x is on the ab line, c and d would be used. In the limit of x + # being just left of ab other points would be used. + # Here we just dropped c and d coeffs when over ab. Same for ad line, + # b and c coeffs dropped. This prevents repeated nodes. + #---------------- + ida = 1 / sqrt(pcti^2 + pctj^2) + idb = 1 / sqrt(pcti^2 + (1 - pctj)^2) + idc = 1 / sqrt((1-pcti)^2 + (1-pctj)^2) + idd = 1 / sqrt((1-pcti)^2 + pctj^2) + idb[pctj == 0] <- 0; + idc[pctj == 0] <- 0; + idc[pcti == 0] <- 0; + idd[pcti == 0] <- 0; + + #---------------- + # Normalize vector of inverse distances + #---------------- + invdist <- cbind(ida, idb, idc, idd) + print(invdist) + w <- t(apply(invdist, 1, function(x) { print(x); if(any(is.infinite(x))) { + x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) + print(w) + + wa = w[ , 1] + wb = w[ , 2] + wc = w[ , 3] + wd = w[ , 4] + } + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats,lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = wa) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = wb) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = wc) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = wd) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df) + } else if(method == "9point") { + #---------------- + # Get the (i,j) coordinates of the 9 points (a,b,...,i) around x + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | | + #-+-----+-----+-----+- + # | | | | + # | c | f | i | + # | | | | + #-+-----+-----+-----+- + # | | x| | + # | b | e | h | + # | | | | + #-+-----+-----+-----+- + # | | | | + # | a | d | g | + # | | | | + #-+-----+-----+-----+- + # | | | | + + centeri <- round(gridpoints$i, 0) + centeri[centeri == griddes$xsize + 1] <- 1 + centerj <- round(gridpoints$j, 0) + lowi <- centeri - 1 + highi <- centeri + 1 + lowi[lowi == 0] <- griddes$xsize # close the longitudes + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- centerj - 1 + highj <- centerj + 1 + + #---------------- + # For the north and south pole do a 6-point average + #---------------- + w_highj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,0 ,1/9)) + w_centerj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,1/6,1/9)) + w_lowj <- ifelse(centerj == 1,0 ,ifelse(centerj == griddes$ysize,1/6,1/9)) + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = w_lowj) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = centerj, weight = w_centerj) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = w_highj) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = lowj, weight = w_lowj) + weightse.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = centerj, weight = w_centerj) + weightsf.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = highj, weight = w_highj) + weightsg.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = w_lowj) + weightsh.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = centerj, weight = w_centerj) + weightsi.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = w_highj) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df, weightse.df, + weightsf.df, weightsg.df, weightsh.df, weightsi.df) + } else if(method %in% c("NE", "NW", "SW", "SE")) { + #---------------- + # Find if increasing i and j increases or decreases lat and lon + #---------------- + westtoeast <- (griddes$xinc > 0) + southtonorth <- T + if(griddes$gridtype == "gaussian") { + # We assume gaussian grid latitudes are ordered north to south + southtonorth <- F + } else { #lonlat + if(griddes$yinc < 0) {southtonorth <- F} + } + + #---------------- + # Get the (i,j) coordinates of the desired point (a,b,c or d) around x + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + if(substr(method,1,1) == "N" & southtonorth == T) { selj <- ceiling(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == T) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "N" & southtonorth == F) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == F) { selj <- ceiling(gridpoints$j) } + + if(substr(method,2,2) == "E" & westtoeast == T) {seli <- ceiling(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == T) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "E" & westtoeast == F) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == F) {seli <- ceiling(gridpoints$i) } + + seli[seli == griddes$xsize + 1] <- 1 # close the longitudes + + weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = seli, j = selj, weight = 1) + } else { + stop(paste0("Method " ,method, " not implemented")) + } + + #---------------- + # Order by locid and remove lines with 0 weight + # This also removes some duplicates in the bilinear/invdist4nn methods when i + # or j is a whole number, or in the 9-point method when at the poles. + #---------------- + weights.df <- weights.df[order(weights.df$locid), ] + weights.df <- weights.df[weights.df$weight != 0, ] + + #---------------- + # Add as attributes the method and the nc file used to compute the weights + #---------------- + attributes(weights.df)$nc_file <- normalizePath(ncfile) + attributes(weights.df)$method <- method + + return(weights.df) +} + +#====================== +# Compute (i,j) from (lat,lon). +# Works only for 'lonlat' and 'gaussian' grids. +# Grids are supposed to cover whole globe. +#====================== +latlon2ij <- function(griddes, lats, lons) { + #------------ + # Check input params + #------------ + if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} + if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} + if((griddes$xfirst > 180) & (any(lons < 0))) { + stop("Please use the same convention for the longitudes in the source file and the ", + "longitude values in 'points'.") + } + #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} + # no need to resize lons to [0,360) + + #------------ + # Compute i (with decimals) + # i lies in [1,xsize+1) + # %% gives the remainder + #------------ + gridpoints <- list() + gridpoints$i <- 1 + (((lons - griddes$xfirst) / griddes$xinc) %% griddes$xsize) + + #------------ + # Compute j (with decimals) + #------------ + if(griddes$gridtype=='lonlat') { + gridpoints$j <- 1 + (lats - griddes$yfirst) / griddes$yinc + } else if(griddes$gridtype == 'gaussian') { + # We assume gaussian grid latitudes are ordered north to south + # findInterval can only work with monotonic ascending values so we revert twice + northj <- griddes$ysize-findInterval(lats, -griddes$yvals) + southj <- northj + 1 + + # Special case: We are north of the first lat + gridpoints$j[northj == 0] <- 1 + + # Special case: We are south of the last lat + gridpoints$j[southj == griddes$ysize + 1] <- griddes$ysize + + # Generic case + ok_idx <- !(northj == 0 | southj == griddes$ysize+1) + gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - + lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) + } else { stop("Unsupported grid") } + + return(gridpoints) +} + +#====================== +# Use cdo griddes to obtain grid information +#====================== +get_griddes <- function(ncfile) { + tmp <- system(paste0("cdo griddes ", ncfile, + " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) + arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) + griddes <- as.list(arr[,2]) + names(griddes) <- arr[,1] + + if(griddes$gridtype == "gaussian") { + griddes$yvals <- get_lats(ncfile) + } + + # Convert some fields to numeric. Ensures all fields are present. + for(nm in c("xsize", "ysize", "xfirst", "yfirst", "xinc", "yinc")) { + griddes[[nm]] <- ifelse(is.null(griddes[[nm]]), NA, as.numeric(griddes[[nm]])) + } + + return(griddes) +} + +#====================== +# Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". +#====================== +get_lats <- function(ncfile) { + + tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) + + if(!is.null(attributes(tmp)$status)) { + tmp <- system(paste0('ncks -H -s "%f " -v lat ',ncfile),intern=T) + } + + lats <- as.numeric(strsplit(tmp[1],"\\s+",perl=T)[[1]]) + + return(lats) +} + +#====================== +# Load model data at all (i,j) pairs listed in the weights dataframe. +# Uses StartR. All ... parameters go to Start (i.e. specify dat, var, +# sdate, time, ensemble, num_procs, etc) +#====================== +get_model_data <- function(weights.df, mdata) { + + #----------------- + # Get data for all combinations of i and j. + # (inefficient, getting many unneded pairs). + # Avoid retrieving duplicates with unique() + # These are the indices of the global grid + #----------------- + is <- weights.df$i + js <- weights.df$j + + #----------------- + # Get indices of original is and js in unique(is),unique(js) that were requested + #----------------- + idxi <- match(is, unique(is)) + idxj <- match(js, unique(js)) + + #----------------- + # Subsample mdata to keep only the needed (i,j) pairs. + #----------------- + if (is.na(match("longitude", names(dim(mdata))))) { + londim <- match("lon", names(dim(mdata))) + } else { + londim <- match("longitude", names(dim(mdata))) + } + if (is.na(match("latitude", names(dim(mdata))))) { + latdim <- match("lat", names(dim(mdata))) + } else { + latdim <- match("latitude", names(dim(mdata))) + } + + # trick: exchange idxi and idxj + #if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } + #keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] + + #sub_mdata <- apply(mdata, keepdims, function(x) { + # laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + #names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Retrieve with multiApply + #----------------- + sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 + names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Return an array that contains as many gridpoints as (i,j) pairs were requested + #----------------- + return(sub_mdata) +} + +#====================== +# Multiply the grid-point series by the weights, +# to obtain the desired interpolations +#====================== +interpolate_data <- function(model_data, weights.df) { + #----------------- + # Multiply each gridpoint matrix by its corresponding weight + #----------------- + gpdim <- match("gridpoint", names(dim(model_data))) + weighted_data <- sweep(model_data, gpdim, weights.df$weight, "*") + + #----------------- + # Sum all series that belong to same interpolation point + # Return an array that contains the requested locations and interpolation type + #----------------- + #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) + #names(dim(interp_data))[1] <- "location" + interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { + rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"))$output1 + + return(interp_data) +} + + diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R new file mode 100644 index 00000000..565e3046 --- /dev/null +++ b/modules/Downscaling/tmp/Intlr.R @@ -0,0 +1,525 @@ +#'@rdname CST_Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, + method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", member_dim = "member", + large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, points = points, source_file_exp = exp$source_files[1], + source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, + int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, + member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, + loocv = loocv, region = region, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude and start date. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, + ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(lr_method, 'character')) { + stop("Parameter 'lr_method' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (!is.null(points) & (is.null(source_file_exp) | is.null(source_file_obs))) { + stop("No source files found. Source files for exp and obs must be provided in the parameters ", + "'source_file_exp' and 'source_file_obs', respectively.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + # sdate must be the time dimension in the input data + stopifnot(sdate_dim %in% names(dim(exp))) + stopifnot(sdate_dim %in% names(dim(obs))) + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + restore_ens <- TRUE + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + # checkings for the parametre 'predictors' + if (!is.null(predictors)) { + if (!is.array(predictors)) { + stop("Parameter 'predictors' must be of the class 'array'") + } else { + # ensure the predictor variable name matches the parametre large_scale_predictor_dimname + stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) + stopifnot(sdate_dim %in% names(dim(predictors))) + stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) + } + } + + #----------------------------------- + # Interpolation + #----------------------------------- + if (lr_method != '4nn') { + + if (is.null(int_method)) { + stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", + "Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + obs_interpolated <- obs_interpolated$data + } else { + obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons + } + } + + #----------------------------------- + # Linear regressions + #----------------------------------- + # Pointwise linear regression + # Predictor: model data + # Predictand: observations + if (lr_method == 'basic') { + predictor <- exp_interpolated$data + predictand <- obs_interpolated + + target_dims_predictor <- sdate_dim + target_dims_predictand <- sdate_dim + } + + # (Multi) linear regression with large-scale predictors + # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations + # Predictand: model data + else if (lr_method == 'large-scale') { + if (is.null(predictors)) { + stop("The large-scale predictors must be passed through the parametre 'predictors'") + } + + predictand <- obs_interpolated + predictor <- predictors + + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) + target_dims_predictand <- sdate_dim + } + + # Multi-linear regression with the four nearest neighbours + # Predictors: model data + # Predictand: observations + else if (lr_method == '4nn') { + predictor <- find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) + + if (is.null(points)) { + if (!is.null(target_grid)) { + warning("Interpolating to the 'obs' grid") + } + predictand <- obs + + lats <- obs_lats + lons <- obs_lons + } + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + # interpolate to point locations + else { + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + lats <- predictor$lat + lons <- predictor$lon + predictor <- predictor$data + predictand <- predictand$data + } + + target_dims_predictor <- c(sdate_dim,'nn') + target_dims_predictand <- sdate_dim + } + + else { + stop(paste0(lr_method, " method is not implemented yet")) + } + + # Apply the linear regressions + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + + names(dim(res))[1] <- sdate_dim + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) + } + + # Return a list of three elements + res <- list(data = res, obs = predictand, lon = lons, lat = lats) + + return(res) +} + +#----------------------------------- +# Atomic function to generate and apply the linear regressions +#----------------------------------- +.intlr <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- train_lm(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + } + + return(res) + +} + +#----------------------------------- +# Function to generate the linear regressions. +# Returns a list +#----------------------------------- +train_lm <- function(df, loocv) { + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) lm(df[-j,], formula = y ~ .)) + + } else { + + lm1 <- list(lm(data = df, formula = y ~ .)) + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the linear regressions. +#----------------------------------- +pred_lm <- function(df, lm1, loocv) { + + if (loocv) { + + pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,])) + + } else { + + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + } + + return(pred_vals) +} + +#----------------------------------- +# Function to find N nearest neighbours. +# 'coar' is an array with named dimensions +#----------------------------------- +find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4) { + + # Sort the distances from closest to furthest + idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) + idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) + + names(dim(idx_lat)) <- c('nn', lat_dim) + names(dim(idx_lon)) <- c('nn', lon_dim) + + # obtain the values of the nearest neighbours + nearest <- Apply(list(coar, idx_lat, idx_lon), + target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), + fun = function(x, y, z) x[y, z])$output1 + + return(nearest) +} + + diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R new file mode 100644 index 00000000..24be6936 --- /dev/null +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -0,0 +1,497 @@ +#'@rdname CST_LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. The +#'object is expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export +CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", + probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + member_dim = "member", region = NULL, loocv = FALSE, ncores = 1) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, + int_method = int_method, log_reg_method = log_reg_method, points = points, + method_point_interp = method_point_interp, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + source_file = exp$source_files[1], region = region, loocv = loocv, + ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- LogisticReg(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export +LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, + int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", + source_file = NULL, region = NULL, loocv = FALSE, ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(target_grid, 'character')) { + stop("Parameter 'target_grid' must be of the class 'character'") + } + + if (!is.null(int_method) & !inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!is.null(method_point_interp) & !inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + if (!is.null(source_file) & !inherits(source_file, 'character')) { + stop("Parameter 'source_file' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'member_dim'") + } + + if (!is.null(points) & (is.null(source_file))) { + stop("No source files found. One source file for exp must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # compute ensemble mean anomalies + if (log_reg_method == "ens_mean") { + predictor <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim) + + target_dims_predictor <- sdate_dim + } + else if (log_reg_method == "ens_mean_sd") { + ens_mean_anom <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, + sdate_dim = sdate_dim) + ens_sd <- get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim) + + #merge two arrays into one array of predictors + predictor <- abind(ens_mean_anom, ens_sd, along = 1/2) + names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) + + target_dims_predictor <- c(sdate_dim, "pred") + } else if (log_reg_method == "sorted_members") { + predictor <- sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim) + + target_dims_predictor <- c(sdate_dim, member_dim) + } else { + stop(paste0(log_reg_method, " not recognised or not implemented.")) + } + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # convert observations to categorical predictands + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + terc <- convert2prob(as.vector(x), prob = probs_cat) + apply(terc, 1, function(r) which (r == 1))}, + output_dims = sdate_dim)$output1 + + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"))$output1 + + if (return_most_likely_cat) { + res <- Apply(res, target_dims = c(sdate_dim, "category"), most_likely_category, + output_dims = sdate_dim)$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + +most_likely_category <- function(data) { +# data, expected dims: start date, category (in this order) + + if (all(is.na(data))) { + mlc <- rep(NA, nrow(data)) + } else { + mlc <- apply(data, 1, which.max) + } + return(mlc) +} + +sort_members <- function(obj_ens, member_dim) { + + sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE)$output1 + + return(sorted) +} + +get_ens_sd <- function(obj_ens, member_dim) { + + # compute ensemble spread + ens_sd <- Apply(obj_ens, target_dims = member_dim, sd, na.rm = TRUE)$output1 + + return(ens_sd) +} + +get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { + + require(s2dv) + + # compute climatology + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean)$output1 + + # compute ensemble mean + ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE)$output1 + + # compute ensemble mean anomalies + anom <- Ano(ens_mean, clim) + + return(anom) +} + +# atomic functions for logistic regressions +.log_reg <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- matrix(NA, nrow = n, ncol = length(unique(tmp_df$y))) + + } else { + # training + lm1 <- train_lr(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + } + return(res) +} + +#----------------------------------- +# Function to train the logistic regressions. +#----------------------------------- +train_lr <- function(df, loocv) { + + require(nnet) + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + + } else { + + lm1 <- list(multinom(y ~ ., data = df)) + + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the logistic regressions. +#----------------------------------- +pred_lr <- function(df, lm1, loocv) { + + require(plyr) + + if (loocv) { + + # The error: "Error: Results must have the same dimensions." can + # appear when the number of sdates is insufficient + pred_vals_ls <- list() + for (j in 1:nrow(df)) { + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + } + + pred_vals <- laply(pred_vals_ls, .fun = as.array) + + } else { + + # type = class, probs + #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") + #pred_vals <- unlist(pred_vals_ls) + pred_vals <- predict(lm1[[1]], df, type = "probs") + } + + return(pred_vals) +} + + diff --git a/modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R new file mode 100644 index 00000000..4c727465 --- /dev/null +++ b/modules/Downscaling/tmp/Utils.R @@ -0,0 +1,31 @@ +.check_coords <- function(lat1, lon1, lat2, lon2) { + match <- TRUE + if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { + match <- FALSE + } + return(match) +} + +# reorder dims to a reference array. If they do not exist, they are created +# example +#arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) +#arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) + +.reorder_dims <- function(arr_ref, arr_to_reorder) { + + miss_dims <- names(dim(arr_ref))[!(names(dim(arr_ref)) %in% names(dim(arr_to_reorder)))] + + if (length(miss_dims) != 0) { + for (m in seq(miss_dims)) { + arr_to_reorder <- InsertDim(data = arr_to_reorder, posdim = length(dim(arr_to_reorder)) + 1, lendim = 1, + name = miss_dims[m]) + } + } + + # TODO: add code to reorder dimensions and put the non-common dimensions at the end + + orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) + return(Reorder(data = arr_to_reorder, order = orddim)) +} + + -- GitLab From 64036d79a200d0b28782376bb35460b1bb83cda0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 13:29:44 +0100 Subject: [PATCH 07/52] Source functions from temporary folder --- modules/Downscaling/Downscaling.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index d8522b65..97732c40 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -10,12 +10,12 @@ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN ## TODO: Move recipe checks to check_recipe() -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('modules/Downscaling/tmp/Interpolation.R') +source('modules/Downscaling/tmp/Intbc') +source('modules/Downscaling/tmp/Intlr.R') +source('modules/Downscaling/tmp/Analogs.R') +source('modules/Downscaling/tmp/LogisticReg.R') +source('modules/Downscaling/tmp/Utils.R') #source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") -- GitLab From 575fe297b608820cd129b2cd04a2e308ca81d61f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 14 Mar 2023 11:02:29 +0100 Subject: [PATCH 08/52] Fix bug (error when sourcing function) and modify path names for downscaling --- modules/Downscaling/Downscaling.R | 2 +- modules/Saving/paths2save.R | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 97732c40..23586788 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -11,7 +11,7 @@ ## TODO: Remove once CSDownscale is on CRAN ## TODO: Move recipe checks to check_recipe() source('modules/Downscaling/tmp/Interpolation.R') -source('modules/Downscaling/tmp/Intbc') +source('modules/Downscaling/tmp/Intbc.R') source('modules/Downscaling/tmp/Intlr.R') source('modules/Downscaling/tmp/Analogs.R') source('modules/Downscaling/tmp/LogisticReg.R') diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 93196b86..b2023ae6 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -92,8 +92,17 @@ get_dir <- function(recipe, agg = "global") { fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) } } - + # Calibration or downscaling method calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + if ((calib.method == "raw") && + (!is.null(recipe$Analysis$Workflow$Downscaling))) { + if (recipe$Analysis$Workflow$Downscaling$type == "none") { + calib.method <- "raw" + } else { + calib.method <- recipe$Analysis$Workflow$Downscaling$type + } + } + # Frequency store.freq <- recipe$Analysis$Variables$freq ## TODO: Change "_country" switch(tolower(agg), -- GitLab From 23f6813af84acd5a9efd11efa39d1be89ff593b9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 17 Mar 2023 10:43:49 +0100 Subject: [PATCH 09/52] Add saving options to recipe (provisional) --- .../testing_recipes/recipe_system7c3s-tas.yml | 4 ++++ modules/Saving/Saving.R | 13 +++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml index c8d3b5e8..4e67943b 100644 --- a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml @@ -39,6 +39,10 @@ Analysis: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] Indicators: index: no + Saving: + save_hindcast: no + save_forecast: no + save_observations: no ncores: 1 remove_NAs: yes Output_format: S2S4E diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index f481be70..daf099c3 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -42,13 +42,18 @@ save_data <- function(recipe, data, dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Export hindcast, forecast and observations onto outfile - save_forecast(data$hcst, recipe, dict, outdir, archive = archive, - type = 'hcst') - if (!is.null(data$fcst)) { + if (recipe$Analysis$Workflow$Saving$save_hindcast) { + save_forecast(data$hcst, recipe, dict, outdir, archive = archive, + type = 'hcst') + } + if ((!is.null(data$fcst)) && + recipe$Analysis$Workflow$Saving$save_forecast) { save_forecast(data$fcst, recipe, dict, outdir, archive = archive, type = 'fcst') } - save_observations(data$obs, recipe, dict, outdir, archive = archive) + if (recipe$Analysis$Workflow$Saving$save_observations) { + save_observations(data$obs, recipe, dict, outdir, archive = archive) + } # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file -- GitLab From 823ebfbc83335a572a6073b1df4c281e33aa9b29 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 22 Mar 2023 15:46:39 +0100 Subject: [PATCH 10/52] can retrieve hurs variable from CERRA dataset --- conf/archive.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/archive.yml b/conf/archive.yml index eb8e86a5..6da69a65 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -186,7 +186,7 @@ archive: daily_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/"} monthly_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", - "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/"} + "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/","hurs":"_f3h-r2631x1113/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" CERRA-Land: -- GitLab From 65981563db5824cd1d2b97bd588176d544998631 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 22 Mar 2023 15:49:39 +0100 Subject: [PATCH 11/52] applies the updates in the Intbc function --- modules/Downscaling/Downscaling.R | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 23586788..58b9c22f 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -42,11 +42,13 @@ downscale_datasets <- function(recipe, data) { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) logreg_method <- tolower(recipe$Analysis$Workflow$Downscaling$logreg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) - ## TODO: Compute number of cores + + ## TODO: Compute number of cores if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { @@ -94,12 +96,13 @@ downscale_datasets <- function(recipe, data) { method_point_interp = NULL) hcst_downscal$obs <- obs_downscal$exp - } else if (type == "intbc") { - # Interpolate hcst and obs with bias correction + } else if (type == "intbc" & (bc_method=="cal" | bc_method=="calibration")) { + # Interpolate hcst and obs with bias correction (via calibration) hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, int_method = int_method, + cal.method=cal_method, points = NULL, method_point_interp = NULL, lat_dim = "latitude", @@ -108,7 +111,23 @@ downscale_datasets <- function(recipe, data) { member_dim = "ensemble", region = NULL, ncores = ncores) - } else if (type == "intlr") { + + } else if (type == "intbc" & !(bc_method=="cal" | bc_method=="calibration")) { + # Interpolate hcst and obs with bias correction (any other method than calibration) + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) + + } else if (type == "intlr") { ## TODO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { -- GitLab From b771b442e0acae4f193b0d80f6ca4d0b63e37b0c Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 22 Mar 2023 15:50:56 +0100 Subject: [PATCH 12/52] different 'int + calibration' methods can be tested --- modules/Downscaling/tmp/Intbc.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 86bb5a9c..5a5275bf 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -68,7 +68,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1) + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1,...) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -82,7 +82,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores) + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores,...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data -- GitLab From 7c3282e8264c10a7ca4881ee21eb8592a0236cc9 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Fri, 24 Mar 2023 14:33:28 +0100 Subject: [PATCH 13/52] checks are included --- modules/Downscaling/Downscaling.R | 178 ++++++++++++++++++++---------- 1 file changed, 122 insertions(+), 56 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 58b9c22f..3e574e73 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -1,23 +1,10 @@ -# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ -#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ -#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ -#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ -#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ -#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ -# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ -#░ ░ ░ -## TODO: Remove once CSDownscale is on CRAN -## TODO: Move recipe checks to check_recipe() +### Downscaling Module source('modules/Downscaling/tmp/Interpolation.R') source('modules/Downscaling/tmp/Intbc.R') source('modules/Downscaling/tmp/Intlr.R') source('modules/Downscaling/tmp/Analogs.R') source('modules/Downscaling/tmp/LogisticReg.R') source('modules/Downscaling/tmp/Utils.R') -#source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") - ## Entry params data and recipe? downscale_datasets <- function(recipe, data) { @@ -30,45 +17,57 @@ downscale_datasets <- function(recipe, data) { type <- tolower(recipe$Analysis$Workflow$Downscaling$type) if (!is.null(data$fcst)) { - warn(recipe$Run$logger, - "The downscaling will be only performed to the hindcast data") + warning("The downscaling will be only performed to the hindcast data") data$fcst <- NULL } if (type == "none") { + hcst_downscal <- data$hcst DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" + } else { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) - cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) + cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) - logreg_method <- tolower(recipe$Analysis$Workflow$Downscaling$logreg_method) + log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) - - ## TODO: Compute number of cores + if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { ncores <- recipe$Analysis$ncores } - ## TODO: add the parametre loocv where it corresponds + + #TO DO: add the parametre loocv where it corresponds if (is.null(recipe$Analysis$loocv)) { loocv <- TRUE } else { loocv <- recipe$Analysis$loocv } - # Define downscaling options + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") - BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping", "sbc", "cal", "qm") LR_METHODS <- c("basic", "large_scale", "4nn") - LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + + if (!(type %in% DOWNSCAL_TYPES)) { + stop("Downscaling type in the recipe is not available. Accepted types ", + "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") + } - # Interpolation if (type == "int") { - ## TODO: Move this check elsewhere + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the # observations @@ -76,7 +75,6 @@ downscale_datasets <- function(recipe, data) { lonmin <- data$hcst$lon[1] latmax <- data$hcst$lat[length(data$hcst$lat)] lonmax <- data$hcst$lon[length(data$hcst$lon)] - # Interpolate hcst hcst_downscal <- CST_Interpolation(data$hcst, points = NULL, method_remap = int_method, @@ -85,7 +83,7 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) - # Interpolate obs + obs_downscal <- CST_Interpolation(data$obs, points = NULL, method_remap = int_method, @@ -94,15 +92,41 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) + hcst_downscal$obs <- obs_downscal$exp - } else if (type == "intbc" & (bc_method=="cal" | bc_method=="calibration")) { - # Interpolate hcst and obs with bias correction (via calibration) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intbc") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(bc_method)) { + stop("Please provide one bias-correction method in the recipe. Accepted ", + "methods are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', ", + "'qm'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(bc_method %in% BC_METHODS)) { + stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", + "are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', 'qm'.")) + } + + if (bc_method=="cal" | bc_method=="calibration") + { + if (length(cal_method)==0) { + stop("Please provide one (and only one) calibration method in the recipe.") + } + hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, int_method = int_method, - cal.method=cal_method, + cal.method=cal_method, points = NULL, method_point_interp = NULL, lat_dim = "latitude", @@ -112,9 +136,10 @@ downscale_datasets <- function(recipe, data) { region = NULL, ncores = ncores) - } else if (type == "intbc" & !(bc_method=="cal" | bc_method=="calibration")) { - # Interpolate hcst and obs with bias correction (any other method than calibration) - hcst_downscal <- CST_Intbc(data$hcst, data$obs, + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else + { + hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, int_method = int_method, @@ -126,9 +151,28 @@ downscale_datasets <- function(recipe, data) { member_dim = "ensemble", region = NULL, ncores = ncores) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + } else if (type == "intlr") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } - } else if (type == "intlr") { - ## TODO: add the possibility to have the element 'pred' in 'data' + if (is.null(lr_method)) { + stop("Please provide one linear regression method in the recipe. Accepted ", + "methods are 'basic', 'large_scale', '4nn'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(lr_method %in% LR_METHODS)) { + stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", + "are 'basic', 'large_scale', '4nn'.")) + } + + # TO DO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { stop("Please provide the large scale predictors in the element 'data$pred$data'.") @@ -136,7 +180,7 @@ downscale_datasets <- function(recipe, data) { } else { data$pred$data <- NULL } - # Interpolate hcst and obs with linear regression + hcst_downscal <- CST_Intlr(data$hcst, data$obs, lr_method = lr_method, target_grid = target_grid, @@ -144,23 +188,25 @@ downscale_datasets <- function(recipe, data) { int_method = int_method, method_point_interp = NULL, predictors = data$pred$data, - lat_dim = "latitude", + lat_dim = "latitude", lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", + sdate_dim = "syear", + time_dim = "time", member_dim = "ensemble", large_scale_predictor_dimname = 'vars', loocv = loocv, - region = NULL, + region = NULL, ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { - if (length(nanalogs) == 0) { - info(recipe$Run$logger, - paste("The number of analogs for searching has not been provided", - "in the recipe. Setting it to 3.")) + + if (is.null(nanalogs)) { + warning("The number of analogs for searching has not been provided in the ", + "recipe. Setting it to 3.") nanalogs <- 3 } - # Apply analogs method to hcst and obs + hcst_downscal <- CST_Analogs(data$hcst, data$obs, grid_exp = data$hcst$source_files[1], nanalogs = nanalogs, @@ -174,17 +220,36 @@ downscale_datasets <- function(recipe, data) { return_indices = FALSE, loocv_window = loocv, ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "logreg") { + + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(log_reg_method)) { + stop("Please provide one logistic regression method in the recipe. Accepted ", + "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + # Since we are forcing to create three categories, and applying cross-validation, # we need at least six years of data for the logistic regression function to not # crash - if (dim(data$hcst$data)[["syear"]] < 6) { - error(recipe$Run$logger, - paste("The number of years of data is insufficient for the", - "logistic regression method. Please provide six or more.")) - stop() + if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { + stop("The number of start dates is insufficient for the logisitic regression method. ", + "Please provide six or more.") + } + + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) } - # Apply logistic regression to hcst and obs + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, target_grid = target_grid, int_method = int_method, @@ -201,10 +266,11 @@ downscale_datasets <- function(recipe, data) { loocv = loocv, ncores = ncores) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } - info(recipe$Run$logger, DOWNSCAL_MSG) - return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) + } + print(DOWNSCAL_MSG) + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } + -- GitLab From d397a625cd841bdc991802fcd2ec699e8a710a15 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 29 Mar 2023 10:47:38 +0200 Subject: [PATCH 14/52] hurs variable can be downloaded from ECMWF-SEAS5 and CERRA archives --- conf/archive.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 6da69a65..87e9cd65 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,7 +16,7 @@ archive: "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/"} + "tdps":"_f6h/","hurs":"_f6h/"} nmember: fcst: 51 hcst: 25 @@ -183,10 +183,10 @@ archive: name: "ECMWF CERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/cerra/" - daily_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", + daily_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/"} - monthly_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", - "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/","hurs":"_f3h-r2631x1113/"} + monthly_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", + "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" CERRA-Land: -- GitLab From c7ef402670562ba1717f49315a317f6bc99b0a85 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 30 Mar 2023 08:44:52 +0200 Subject: [PATCH 15/52] Save Anomalies --- modules/Anomalies/Anomalies.R | 16 +++++++++++++--- modules/Saving/paths2save.R | 2 ++ modules/test_seasonal.R | 2 +- recipes/atomic_recipes/recipe_system7c3s-tas.yml | 4 ++++ 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 859e97bb..b54188d1 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -82,7 +82,19 @@ compute_anomalies <- function(recipe, data) { "$hcst.full_val and $obs.full_val.")) info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") - + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + if (recipe$Analysis$Workflow$Anomalies$save_outputs %in% + c('all', 'exp_only', 'fcst_only')) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + if (recipe$Analysis$Workflow$Anomalies$save_outputs %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') + } + if (recipe$Analysis$Workflow$Anomalies$save_outputs == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } } else { warn(recipe$Run$logger, paste("The Anomalies module has been called, but", "recipe parameter Analysis:Variables:anomaly is set to FALSE.", @@ -92,8 +104,6 @@ compute_anomalies <- function(recipe, data) { info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") } - ## TODO: Return fcst full value? - return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 93196b86..46152207 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -104,5 +104,7 @@ get_dir <- function(recipe, agg = "global") { "-", store.freq, "/", variable, "/", fcst.sdate, "/")}) } + ## TODO: Multivar case + dir.create(dir, showWarnings = FALSE, recursive = TRUE) return(dir) } diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index cddc1b37..4dd34b61 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -5,7 +5,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "recipes/tests/recipe_seasonal_two-variables.yml" +recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml" recipe <- prepare_outputs(recipe_file) # Load datasets diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml index 47cfc31b..e4e0a087 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -31,12 +31,16 @@ Analysis: Anomalies: compute: yes # yes/no, default yes cross_validation: yes # yes/no, default yes + save_outputs: 'all' # 'all'/'none'/'exp_only'/'fcst_only' Calibration: method: mse_min + save_outputs: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + save_outputs: 'all' # 'all'/'none'/list of metrics Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save_outputs: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' Indicators: index: no ncores: 10 -- GitLab From 564c0af5f35db0e3d5634a1e6ce4d5d3ae32aba7 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Thu, 30 Mar 2023 14:34:41 +0200 Subject: [PATCH 16/52] edited based on new release of CSTools --- modules/Downscaling/tmp/Intbc.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 5a5275bf..3cfea06e 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -78,20 +78,20 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, - obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, - int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords$lat, exp_lons = exp$coords$lon, + obs_lats = obs$coords$lat, obs_lons = obs$coords$lon, target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, source_file = exp$attrs$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores,...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data - exp$lon <- res$lon - exp$lat <- res$lat + exp$coords$lon <- res$lon + exp$coords$lat <- res$lat obs$data <- res$obs - obs$lat <- res$lat - obs$lon <- res$lon + obs$coords$lat <- res$lat + obs$coords$lon <- res$lon res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) -- GitLab From 79b85eb570a541dbe5b1ae458ff463f686f08fa1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 31 Mar 2023 17:34:05 +0200 Subject: [PATCH 17/52] try to resolve conflicts --- modules/Visualization/Visualization.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 61d6ec93..22bbf325 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -1,4 +1,4 @@ -#G# TODO: Remove once released in s2dv/CSTools +## TODO: Remove once released in s2dv/CSTools source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") source("modules/Visualization/tmp/PlotCombinedMap.R") source("modules/Visualization/tmp/clim.palette.R") @@ -47,7 +47,6 @@ plot_data <- function(recipe, plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, significance) } - # Plot forecast ensemble mean if (!is.null(data$fcst)) { plot_ensemble_mean(recipe, archive, data$fcst, outdir) -- GitLab From 276aefb5c947c1c89c135c2256309b90157d69d9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 3 Apr 2023 10:33:45 +0200 Subject: [PATCH 18/52] Update CSDownscale functions and Downscaling module to work with new s2dv_cube --- modules/Downscaling/Downscaling.R | 12 ++++++------ modules/Downscaling/tmp/Analogs.R | 17 ++++++++++------- modules/Downscaling/tmp/Intbc.R | 23 +++++++++++++---------- modules/Downscaling/tmp/Interpolation.R | 9 +++++---- modules/Downscaling/tmp/Intlr.R | 19 +++++++++++-------- modules/Downscaling/tmp/LogisticReg.R | 17 ++++++++++------- 6 files changed, 55 insertions(+), 42 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 3e574e73..f642f673 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -71,10 +71,10 @@ downscale_datasets <- function(recipe, data) { # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the # observations - latmin <- data$hcst$lat[1] - lonmin <- data$hcst$lon[1] - latmax <- data$hcst$lat[length(data$hcst$lat)] - lonmax <- data$hcst$lon[length(data$hcst$lon)] + latmin <- data$hcst$coords$latitude[1] + lonmin <- data$hcst$coords$longitude[1] + latmax <- data$hcst$coords$latitude[length(data$hcst$lat)] + lonmax <- data$hcst$coords$longitude[length(data$hcst$lon)] hcst_downscal <- CST_Interpolation(data$hcst, points = NULL, method_remap = int_method, @@ -126,7 +126,7 @@ downscale_datasets <- function(recipe, data) { target_grid = target_grid, bc_method = bc_method, int_method = int_method, - cal.method=cal_method, + cal.method = cal_method, points = NULL, method_point_interp = NULL, lat_dim = "latitude", @@ -208,7 +208,7 @@ downscale_datasets <- function(recipe, data) { } hcst_downscal <- CST_Analogs(data$hcst, data$obs, - grid_exp = data$hcst$source_files[1], + grid_exp = data$hcst$attrs$source_files[1], nanalogs = nanalogs, fun_analog = "min", lat_dim = "latitude", diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R index fd6e2f97..a69a66df 100644 --- a/modules/Downscaling/tmp/Analogs.R +++ b/modules/Downscaling/tmp/Analogs.R @@ -99,21 +99,24 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, - obs_lats = obs$lat, obs_lons = obs$lon, grid_exp = grid_exp, - nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, + res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], + exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], + obs_lons = obs$coords[[lon_dim]], grid_exp = grid_exp, nanalogs = nanalogs, + fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, region = region, return_indices = return_indices, loocv_window = loocv_window, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data - exp$lon <- res$lon - exp$lat <- res$lat + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat obs$data <- res$obs - obs$lat <- res$lat - obs$lon <- res$lon + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 3cfea06e..27de3735 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -78,20 +78,23 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords$lat, exp_lons = exp$coords$lon, - obs_lats = obs$coords$lat, obs_lons = obs$coords$lon, target_grid = target_grid, - int_method = int_method, bc_method = bc_method, points = points, source_file = exp$attrs$source_files[1], - method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores,...) + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, + source_file = exp$attrs$source_files[1], method_point_interp = method_point_interp, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + region = region, ncores = ncores,...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data - exp$coords$lon <- res$lon - exp$coords$lat <- res$lat - + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + obs$data <- res$obs - obs$coords$lat <- res$lat - obs$coords$lon <- res$lon + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R index e594691f..1599bf3b 100644 --- a/modules/Downscaling/tmp/Interpolation.R +++ b/modules/Downscaling/tmp/Interpolation.R @@ -67,15 +67,16 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") } - res <- Interpolation(exp = exp$data, lats = exp$lat, lons = exp$lon, - source_file = exp$source_files[1], points = points, + res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$coords[[lon_dim]], + source_file = exp$attrs$source_files[1], points = points, method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data - exp$lon <- res$lon - exp$lat <- res$lat + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat res_s2dv <- list(exp = exp, obs = NULL) return(res_s2dv) diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index 565e3046..24c909f3 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -99,22 +99,25 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, - obs_lats = obs$lat, obs_lons = obs$lon, points = points, source_file_exp = exp$source_files[1], - source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, - int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], points = points, + source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], + target_grid = target_grid, lr_method = lr_method, int_method = int_method, + method_point_interp = method_point_interp, predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, region = region, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data - exp$lon <- res$lon - exp$lat <- res$lat + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat obs$data <- res$obs - obs$lat <- res$lat - obs$lon <- res$lon + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index 24be6936..c514d254 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -101,23 +101,26 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, - obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], + exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], + obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, int_method = int_method, log_reg_method = log_reg_method, points = points, method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - source_file = exp$source_files[1], region = region, loocv = loocv, + source_file = exp$attrs$source_files[1], region = region, loocv = loocv, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data - exp$lon <- res$lon - exp$lat <- res$lat + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat obs$data <- res$obs - obs$lat <- res$lat - obs$lon <- res$lon + obs$dims <- dim(obs$data) + obs$coords[[lon_dim]] <- res$lon + obs$coords[[lat_dim]] <- res$lat res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) -- GitLab From f682561676aa85d43aa3804ca26a9cef58876864 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Tue, 4 Apr 2023 10:18:25 +0200 Subject: [PATCH 19/52] updated based on Release of CSTools 5.0.0 --- modules/Downscaling/Downscaling.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 3e574e73..993702b3 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -30,7 +30,7 @@ downscale_datasets <- function(recipe, data) { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) - cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) + cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) @@ -71,10 +71,10 @@ downscale_datasets <- function(recipe, data) { # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the # observations - latmin <- data$hcst$lat[1] - lonmin <- data$hcst$lon[1] - latmax <- data$hcst$lat[length(data$hcst$lat)] - lonmax <- data$hcst$lon[length(data$hcst$lon)] + latmin <- data$hcst$coords$latitude[1] + lonmin <- data$hcst$coords$longitude[1] + latmax <- data$hcst$coords$latitude[length(data$hcst$coords$latitude)] + lonmax <- data$hcst$coords$longitude[length(data$hcst$coords$longitude)] hcst_downscal <- CST_Interpolation(data$hcst, points = NULL, method_remap = int_method, @@ -126,7 +126,7 @@ downscale_datasets <- function(recipe, data) { target_grid = target_grid, bc_method = bc_method, int_method = int_method, - cal.method=cal_method, + cal.method=cal_method, points = NULL, method_point_interp = NULL, lat_dim = "latitude", -- GitLab From d33738a6a66802db03974086741b6097edf9873d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 11 Apr 2023 15:40:16 +0200 Subject: [PATCH 20/52] Add Visualization plot options to recipe --- modules/Visualization/Visualization.R | 76 ++++++++++++------- .../atomic_recipes/recipe_system7c3s-tas.yml | 4 +- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 84e0a139..c3c14e06 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -22,7 +22,9 @@ plot_data <- function(recipe, # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - outdir <- paste0(get_dir(recipe), "/plots/") + plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") + outdir <- paste0(get_dir(recipe)) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) if ((is.null(skill_metrics)) && (is.null(data$fcst))) { @@ -43,20 +45,38 @@ plot_data <- function(recipe, } # Plot skill metrics - if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, - significance) + if ("skill_metrics" %in% plots) { + if (!is.null(skill_metrics)) { + plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, + significance) + } else { + error(recipe$Run$logger, + paste0("The skill metric plots have been requested, but the ", + "parameter 'skill_metrics' is NULL")) + } } # Plot forecast ensemble mean - if (!is.null(data$fcst)) { - plot_ensemble_mean(recipe, archive, data$fcst, outdir) + if ("forecast_ensemble_mean" %in% plots) { + if (!is.null(data$fcst)) { + plot_ensemble_mean(recipe, archive, data$fcst, outdir) + } else { + error(recipe$Run$logger, + paste0("The forecast ensemble mean plot has been requested, but ", + "there is no fcst element in 'data'")) + } } # Plot Most Likely Terciles - if ((!is.null(probabilities)) && (!is.null(data$fcst))) { - plot_most_likely_terciles(recipe, archive, data$fcst, - probabilities, outdir) + if ("most_likely_terciles" %in% plots) { + if ((!is.null(probabilities)) && (!is.null(data$fcst))) { + plot_most_likely_terciles(recipe, archive, data$fcst, + probabilities, outdir) + } else { + error(recipe$Run$logger, + paste0("For the most likely terciles plot, both the fsct and the ", + "probabilities must be provided.")) + } } } @@ -87,7 +107,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, - start = 1, stop = 2)), + start = 1, stop = 2)), label = T, abb = T) # Define color palette and number of breaks according to output format ## TODO: Make separate function @@ -170,23 +190,23 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if ((significance) && (significance_name %in% names(skill_metrics))) { skill_significance <- skill_metrics[[significance_name]] skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) + "longitude", + "latitude")) # Split skill significance into list of lists, along the time dimension # This allows for plotting the significance dots correctly. skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") + dim = 'time', + level = "sublist", + names = "dots") } else { skill_significance <- NULL } # Define output file name and titles outfile <- paste0(outdir, name, ".png") toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, - "-", system_name, "-", init_month, hcst_period) + "-", system_name, "-", init_month, hcst_period) months <- unique(lubridate::month(data_cube$attrs$Dates, - label = T, abb = F)) + label = T, abb = F)) titles <- as.vector(months) # Plot suppressWarnings( @@ -194,11 +214,11 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, asplit(skill, MARGIN=1), # Splitting array into a list longitude, latitude, special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, brks = brks, cols = cols, col_inf = col_inf, @@ -280,13 +300,13 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { outfile <- paste0(outdir, "forecast_ensemble_mean_", i_syear, ".png") } toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) + "- Initialization:", i_syear) months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) titles <- as.vector(months) # Plots PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, + i_ensemble_mean, longitude, latitude, filled.continents = F, toptitle = toptitle, title_scale = 0.6, @@ -360,9 +380,9 @@ plot_most_likely_terciles <- function(recipe, archive, outfile <- paste0(outdir, "forecast_most_likely_tercile_", i_syear, ".png") } toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) + "Initialization:", i_syear) months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) + label = T, abb = F) ## TODO: Ensure this works for daily and sub-daily cases titles <- as.vector(months) @@ -371,8 +391,8 @@ plot_most_likely_terciles <- function(recipe, archive, ## on. suppressWarnings( PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, coast_width = 1.5, title_scale = 0.6, legend_scale = 0.8, #cex_bar_titles = 0.6, diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml index e4e0a087..b01fdd20 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -37,10 +37,12 @@ Analysis: save_outputs: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS - save_outputs: 'all' # 'all'/'none'/list of metrics + save_outputs: 'all' # 'all'/'none' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] save_outputs: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles Indicators: index: no ncores: 10 -- GitLab From 32a51f4bc2b6239d1551e89647f4849634e1e066 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 12 Apr 2023 10:58:53 +0200 Subject: [PATCH 21/52] Separate check_recipe() and check_number_of_dependent_verification() --- .../check_number_of_dependent_verifications.R | 134 +++++++++++++++++ tools/check_recipe.R | 135 ------------------ 2 files changed, 134 insertions(+), 135 deletions(-) create mode 100644 tools/check_number_of_dependent_verifications.R diff --git a/tools/check_number_of_dependent_verifications.R b/tools/check_number_of_dependent_verifications.R new file mode 100644 index 00000000..0c85d09f --- /dev/null +++ b/tools/check_number_of_dependent_verifications.R @@ -0,0 +1,134 @@ +check_number_of_dependent_verifications <- function(recipe) { + # Number of verifications depends on the variables and indicators requested + # and the order of the workflow: + # workflow: correction + indicator --> only 1 variable is calibrated + # workflow: indicator + correction --> the indicator and the ecv are calibrated + independent_verifications <- NULL + dependent_verifications <- NULL + dep <- 1 + # check workflow order: + if (all(c('Calibration', 'Indicators') %in% names(recipe$Analysis$Workflow))) { + cal_pos <- which(names(recipe$Analysis$Workflow) == 'Calibration') + ind_pos <- which(names(recipe$Analysis$Workflow) == 'Indicators') + if (cal_pos < ind_pos) { + workflow_independent <- FALSE + } else { + workflow_independent <- TRUE + } + } + if (workflow_independent) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + if (is.null(recipe$Analysis$Variables$Indicators) || + (length(recipe$Analysis$Variables$Indicators) == 1 && + is.null(recipe$Analysis$Variables$ECVs))) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + ecvs <- recipe$Analysi$Variables$ECVs + inds <- recipe$Analysi$Variables$Indicators + ind_table <- read_yaml(paste0(recipe$Run$code_dir, + "conf/indicators_table.yml")) + # first, loop on ecvs if any and compare to indicators + done <- NULL # to gather the indicators reviewed + if (!is.null(ecvs)) { + for (i in 1:length(ecvs)) { + dependent <- list(ecvs[[i]]) + for (j in 1:length(inds)) { + if (ind_table[inds[[j]]$name][[1]]$ECVs == ecvs[[i]]$name) { + if (ind_table[inds[[j]]$name][[1]]$freq == ecvs[[i]]$freq) { + # they are dependent + dependent <- append(dependent, inds[[j]]) + done <- append(done, inds[[j]]) + } + } + } + if (length(dependent) == 1) { + dependent <- NULL + independent_verifications <- append(independent_verifications, + list(ecvs[[i]])) + } else { + dependent_verifications <- append(dependent_verifications, + list(dependent)) + } + } + # There are indicators not reviewed yet? + if (length(done) < length(inds)) { + if (length(inds) == 1) { + independent_verifications <- append(independent_verifications, + inds) + } else { + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } else { # there are only Indicators: + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } + if (!is.null(independent_verifications)) { + info(logger, paste("The variables for independent verification are ", + paste(independent_verifications, collapse = " "))) + } + if (!is.null(dependent_verifications)) { + info(logger, paste("The variables for dependent verification are: ", + paste(dependent_verifications, collapse = " "))) + } + # remove unnecessary names in objects to be removed + return(list(independent = independent_verifications, + dependent = dependent_verifications)) +} +#workflow <- list(Calibration = list(method = 'SBC'), +# Skill = list(metric = 'RPSS')) +#ApplyWorkflow <- function(workflow) { + +#res <- do.call('CST_BiasCorrection', +# args = list(exp = lonlat_data$exp, +# obs = lonlat_data$obs)) + + + + diff --git a/tools/check_recipe.R b/tools/check_recipe.R index eaec8f9a..d8acc8cd 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -430,138 +430,3 @@ check_recipe <- function(recipe) { # return(append(nverifications, fcst.sdate)) } } - -check_number_of_dependent_verifications <- function(recipe) { - # Number of verifications depends on the variables and indicators requested - # and the order of the workflow: - # workflow: correction + indicator --> only 1 variable is calibrated - # workflow: indicator + correction --> the indicator and the ecv are calibrated - independent_verifications <- NULL - dependent_verifications <- NULL - dep <- 1 - # check workflow order: - if (all(c('Calibration', 'Indicators') %in% names(recipe$Analysis$Workflow))) { - cal_pos <- which(names(recipe$Analysis$Workflow) == 'Calibration') - ind_pos <- which(names(recipe$Analysis$Workflow) == 'Indicators') - if (cal_pos < ind_pos) { - workflow_independent <- FALSE - } else { - workflow_independent <- TRUE - } - } - if (workflow_independent) { - independent_verifications <- append(recipe$Analysis$Variables$ECVs, - recipe$Analysis$Variables$Indicators) - } else { - if (is.null(recipe$Analysis$Variables$Indicators) || - (length(recipe$Analysis$Variables$Indicators) == 1 && - is.null(recipe$Analysis$Variables$ECVs))) { - independent_verifications <- append(recipe$Analysis$Variables$ECVs, - recipe$Analysis$Variables$Indicators) - } else { - ecvs <- recipe$Analysi$Variables$ECVs - inds <- recipe$Analysi$Variables$Indicators - ind_table <- read_yaml(paste0(recipe$Run$code_dir, - "conf/indicators_table.yml")) - # first, loop on ecvs if any and compare to indicators - done <- NULL # to gather the indicators reviewed - if (!is.null(ecvs)) { - for (i in 1:length(ecvs)) { - dependent <- list(ecvs[[i]]) - for (j in 1:length(inds)) { - if (ind_table[inds[[j]]$name][[1]]$ECVs == ecvs[[i]]$name) { - if (ind_table[inds[[j]]$name][[1]]$freq == ecvs[[i]]$freq) { - # they are dependent - dependent <- append(dependent, inds[[j]]) - done <- append(done, inds[[j]]) - } - } - } - if (length(dependent) == 1) { - dependent <- NULL - independent_verifications <- append(independent_verifications, - list(ecvs[[i]])) - } else { - dependent_verifications <- append(dependent_verifications, - list(dependent)) - } - } - # There are indicators not reviewed yet? - if (length(done) < length(inds)) { - if (length(inds) == 1) { - independent_verifications <- append(independent_verifications, - inds) - } else { - done <- NULL - for (i in 1:(length(inds) - 1)) { - dependent <- list(inds[[i]]$name) - if (is.na(match(unlist(dependent), unlist(done)))) { - for (j in (i+1):length(inds)) { - if (ind_table[inds[[i]]$name][[1]]$ECVs == - ind_table[inds[[j]]$name][[1]]$ECVs) { - if (ind_table[inds[[i]]$name][[1]]$freq == - ind_table[inds[[j]]$name][[1]]$freq) { - dependent <- append(dependent, inds[[j]]$name) - done <- dependent - } - } - } - } - if (length(dependent) == 1) { - independent_verifications <- dependent - dependent <- NULL - } else { - dependent_verifications <- dependent - } - } - } - } - } else { # there are only Indicators: - done <- NULL - for (i in 1:(length(inds) - 1)) { - dependent <- list(inds[[i]]$name) - if (is.na(match(unlist(dependent), unlist(done)))) { - for (j in (i+1):length(inds)) { - if (ind_table[inds[[i]]$name][[1]]$ECVs == - ind_table[inds[[j]]$name][[1]]$ECVs) { - if (ind_table[inds[[i]]$name][[1]]$freq == - ind_table[inds[[j]]$name][[1]]$freq) { - dependent <- append(dependent, inds[[j]]$name) - done <- dependent - } - } - } - } - if (length(dependent) == 1) { - independent_verifications <- dependent - dependent <- NULL - } else { - dependent_verifications <- dependent - } - } - } - } - } - if (!is.null(independent_verifications)) { - info(logger, paste("The variables for independent verification are ", - paste(independent_verifications, collapse = " "))) - } - if (!is.null(dependent_verifications)) { - info(logger, paste("The variables for dependent verification are: ", - paste(dependent_verifications, collapse = " "))) - } - # remove unnecessary names in objects to be removed - return(list(independent = independent_verifications, - dependent = dependent_verifications)) -} -#workflow <- list(Calibration = list(method = 'SBC'), -# Skill = list(metric = 'RPSS')) -#ApplyWorkflow <- function(workflow) { - -#res <- do.call('CST_BiasCorrection', -# args = list(exp = lonlat_data$exp, -# obs = lonlat_data$obs)) - - - - -- GitLab From 6e7f94b6e9a81e8330ad7ece3b09aba0f597e145 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 12 Apr 2023 12:07:35 +0200 Subject: [PATCH 22/52] Remove CST_Anomaly --- modules/Anomalies/Anomalies.R | 2 - modules/Anomalies/tmp/CST_Anomaly.R | 246 ---------------------------- 2 files changed, 248 deletions(-) delete mode 100644 modules/Anomalies/tmp/CST_Anomaly.R diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index b32e8849..5eb737b3 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -1,5 +1,3 @@ -source("modules/Anomalies/tmp/CST_Anomaly.R") - # Compute the hcst, obs and fcst anomalies with or without cross-validation # and return them, along with the hcst and obs climatologies. diff --git a/modules/Anomalies/tmp/CST_Anomaly.R b/modules/Anomalies/tmp/CST_Anomaly.R deleted file mode 100644 index f38e39b0..00000000 --- a/modules/Anomalies/tmp/CST_Anomaly.R +++ /dev/null @@ -1,246 +0,0 @@ -#'Anomalies relative to a climatology along selected dimension with or without cross-validation -#' -#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} -#'@author Pena Jesus, \email{jesus.pena@bsc.es} -#'@description This function computes the anomalies relative to a climatology -#'computed along the selected dimension (usually starting dates or forecast -#'time) allowing the application or not of crossvalidated climatologies. The -#'computation is carried out independently for experimental and observational -#'data products. -#' -#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the seasonal forecast experiment data in the element -#' named \code{$data}. -#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} -#' function, containing the observed data in the element named \code{$data}. -#'@param dim_anom A character string indicating the name of the dimension -#' along which the climatology will be computed. The default value is 'sdate'. -#'@param cross A logical value indicating whether cross-validation should be -#' applied or not. Default = FALSE. -#'@param memb_dim A character string indicating the name of the member -#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no -#' member dimension, set NULL. The default value is 'member'. -#'@param memb A logical value indicating whether to subtract the climatology -#' based on the individual members (TRUE) or the ensemble mean over all -#' members (FALSE) when calculating the anomalies. The default value is TRUE. -#'@param dat_dim A character vector indicating the name of the dataset and -#' member dimensions. If there is no dataset dimension, it can be NULL. -#' The default value is "c('dataset', 'member')". -#'@param filter_span A numeric value indicating the degree of smoothing. This -#' option is only available if parameter \code{cross} is set to FALSE. -#'@param ftime_dim A character string indicating the name of the temporal -#' dimension where the smoothing with 'filter_span' will be applied. It cannot -#' be NULL if 'filter_span' is provided. The default value is 'ftime'. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. It will be used only when -#' 'filter_span' is not NULL. -#' -#'@return A list with two S3 objects, 'exp' and 'obs', of the class -#''s2dv_cube', containing experimental and date-corresponding observational -#'anomalies, respectively. These 's2dv_cube's can be ingested by other functions -#'in CSTools. -#' -#'@examples -#'# Example 1: -#'mod <- 1 : (2 * 3 * 4 * 5 * 6 * 7) -#'dim(mod) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'obs <- 1 : (1 * 1 * 4 * 5 * 6 * 7) -#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) -#'lon <- seq(0, 30, 5) -#'lat <- seq(0, 25, 5) -#'exp <- list(data = mod, lat = lat, lon = lon) -#'obs <- list(data = obs, lat = lat, lon = lon) -#'attr(exp, 'class') <- 's2dv_cube' -#'attr(obs, 'class') <- 's2dv_cube' -#' -#'anom <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) -#' -#'@seealso \code{\link[s2dv]{Ano_CrossValid}}, \code{\link[s2dv]{Clim}} and \code{\link{CST_Load}} -#' -#'@import multiApply -#'@importFrom s2dv InsertDim Clim Ano_CrossValid Reorder -#'@export -CST_Anomaly <- function(exp = NULL, obs = NULL, dim_anom = 'sdate', cross = FALSE, - memb_dim = 'member', memb = TRUE, dat_dim = c('dataset', 'member'), - filter_span = NULL, ftime_dim = 'ftime', ncores = NULL) { - # s2dv_cube - if (!inherits(exp, 's2dv_cube') & !is.null(exp) || - !inherits(obs, 's2dv_cube') & !is.null(obs)) { - stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") - } - # exp and obs - if (is.null(exp$data) & is.null(obs$data)) { - stop("One of the parameter 'exp' or 'obs' cannot be NULL.") - } - case_exp = case_obs = 0 - if (is.null(exp)) { - exp <- obs - case_obs = 1 - warning("Parameter 'exp' is not provided and 'obs' will be used instead.") - } - if (is.null(obs)) { - obs <- exp - case_exp = 1 - warning("Parameter 'obs' is not provided and 'exp' will be used instead.") - } - if(any(is.null(names(dim(exp$data))))| any(nchar(names(dim(exp$data))) == 0) | - any(is.null(names(dim(obs$data))))| any(nchar(names(dim(obs$data))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names in element 'data'.") - } - if(!all(names(dim(exp$data)) %in% names(dim(obs$data))) | - !all(names(dim(obs$data)) %in% names(dim(exp$data)))) { - stop("Parameter 'exp' and 'obs' must have same dimension names in element 'data'.") - } - dim_exp <- dim(exp$data) - dim_obs <- dim(obs$data) - dimnames_data <- names(dim_exp) - # dim_anom - if (is.numeric(dim_anom) & length(dim_anom) == 1) { - warning("Parameter 'dim_anom' must be a character string and a numeric value will not be ", - "accepted in the next release. The corresponding dimension name is assigned.") - dim_anom <- dimnames_data[dim_anom] - } - if (!is.character(dim_anom)) { - stop("Parameter 'dim_anom' must be a character string.") - } - if (!dim_anom %in% names(dim_exp) | !dim_anom %in% names(dim_obs)) { - stop("Parameter 'dim_anom' is not found in 'exp' or in 'obs' dimension in element 'data'.") - } - if (dim_exp[dim_anom] <= 1 | dim_obs[dim_anom] <= 1) { - stop("The length of dimension 'dim_anom' in label 'data' of the parameter ", - "'exp' and 'obs' must be greater than 1.") - } - # cross - if (!is.logical(cross) | !is.logical(memb) ) { - stop("Parameters 'cross' and 'memb' must be logical.") - } - if (length(cross) > 1 | length(memb) > 1 ) { - cross <- cross[1] - warning("Parameter 'cross' has length greater than 1 and only the first element", - "will be used.") - } - # memb - if (length(memb) > 1) { - memb <- memb[1] - warning("Parameter 'memb' has length greater than 1 and only the first element", - "will be used.") - } - # memb_dim - if (!is.null(memb_dim)) { - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { - stop("Parameter 'memb_dim' is not found in 'exp' or in 'obs' dimension.") - } - } - # dat_dim - if (!is.null(dat_dim)) { - if (!is.character(dat_dim)) { - stop("Parameter 'dat_dim' must be a character vector.") - } - if (!all(dat_dim %in% names(dim_exp)) | !all(dat_dim %in% names(dim_obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension in element 'data'.", - " Set it as NULL if there is no dataset dimension.") - } - } - # filter_span - if (!is.null(filter_span)) { - if (!is.numeric(filter_span)) { - warning("Paramater 'filter_span' is not numeric and any filter", - " is being applied.") - filter_span <- NULL - } - # ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - # ftime_dim - if (!is.character(ftime_dim)) { - stop("Parameter 'ftime_dim' must be a character string.") - } - if (!ftime_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { - stop("Parameter 'ftime_dim' is not found in 'exp' or in 'obs' dimension in element 'data'.") - } - } - - # Computating anomalies - #---------------------- - - # With cross-validation - if (cross) { - ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, - time_dim = dim_anom, - memb_dim = memb_dim, - memb = memb, - dat_dim = dat_dim, - ncores = ncores) - - # Without cross-validation - } else { - tmp <- Clim(exp = exp$data, obs = obs$data, - time_dim = dim_anom, - memb_dim = memb_dim, - memb = memb, - dat_dim = dat_dim, - ncores = ncores) - if (!is.null(filter_span)) { - tmp$clim_exp <- Apply(tmp$clim_exp, - target_dims = c(ftime_dim), - output_dims = c(ftime_dim), - fun = .Loess, - loess_span = filter_span, - ncores = ncores)$output1 - tmp$clim_obs <- Apply(tmp$clim_obs, - target_dims = c(ftime_dim), - output_dims = c(ftime_dim), - fun = .Loess, - loess_span = filter_span, - ncores = ncores)$output1 - } - if (memb) { - clim_exp <- tmp$clim_exp - clim_obs <- tmp$clim_obs - } else { - clim_exp <- InsertDim(tmp$clim_exp, 1, dim_exp[memb_dim]) - clim_obs <- InsertDim(tmp$clim_obs, 1, dim_obs[memb_dim]) - } - clim_exp <- InsertDim(clim_exp, 1, dim_exp[dim_anom]) - clim_obs <- InsertDim(clim_obs, 1, dim_obs[dim_anom]) - ano <- NULL - - # Permuting back dimensions to original order - clim_exp <- Reorder(clim_exp, dimnames_data) - clim_obs <- Reorder(clim_obs, dimnames_data) - - ano$exp <- exp$data - clim_exp - ano$obs <- obs$data - clim_obs - } - - exp$data <- ano$exp - obs$data <- ano$obs - - # Outputs - # ~~~~~~~~~ - if (case_obs == 1) { - return(obs) - } - else if (case_exp == 1) { - return(exp) - } - else { - return(list(exp = exp, obs = obs)) - } -} - -.Loess <- function(clim, loess_span) { - data <- data.frame(ensmean = clim, day = 1 : length(clim)) - loess_filt <- loess(ensmean ~ day, data, span = loess_span) - output <- predict(loess_filt) - return(output) -} - -- GitLab From 6ae017f259cee5182e9ea347347f46c19c60197e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 12 Apr 2023 12:12:21 +0200 Subject: [PATCH 23/52] Source get_dir and get_filename --- modules/Saving/Saving.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 85c3a3a0..cf74e90d 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,6 +1,7 @@ ## TODO: Save obs percentiles -source("modules/Saving/paths2save.R") +source("modules/Saving/R/get_dir.R") +source("modules/Saving/R/get_filename.R") save_data <- function(recipe, data, skill_metrics = NULL, -- GitLab From e14a92df6bd55331cfce3e4953974da16f8960ba Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 12 Apr 2023 12:17:28 +0200 Subject: [PATCH 24/52] Remove save_data() from test --- modules/test_seasonal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 4dd34b61..4535d41b 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -19,7 +19,7 @@ skill_metrics <- compute_skill_metrics(recipe, data) # Compute percentiles and probability bins probabilities <- compute_probabilities(recipe, data) # Export all data to netCDF -save_data(recipe, data, skill_metrics, probabilities) +# save_data(recipe, data, skill_metrics, probabilities) # Plot data plot_data(recipe, data, skill_metrics, probabilities, significance = T) -- GitLab From dc8bc8049e0df8c98154658b5fbbda3cd309986c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 12 Apr 2023 17:02:04 +0200 Subject: [PATCH 25/52] replace all tabs with spaces --- modules/Saving/Saving.R | 180 +++++++++---------- modules/Visualization/Visualization.R | 248 +++++++++++++------------- 2 files changed, 214 insertions(+), 214 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index cf74e90d..fb7910a2 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -4,8 +4,8 @@ source("modules/Saving/R/get_dir.R") source("modules/Saving/R/get_filename.R") save_data <- function(recipe, data, - skill_metrics = NULL, - probabilities = NULL) { + skill_metrics = NULL, + probabilities = NULL) { # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive @@ -23,8 +23,8 @@ save_data <- function(recipe, data, if (is.null(data)) { error(recipe$Run$logger, - paste("The 'data' parameter is mandatory. It should be a list", - "of at least two s2dv_cubes containing the hcst and obs.")) + paste("The 'data' parameter is mandatory. It should be a list", + "of at least two s2dv_cubes containing the hcst and obs.")) stop() } # Create output directory @@ -33,15 +33,15 @@ save_data <- function(recipe, data, # Export hindcast, forecast and observations onto outfile save_forecast(recipe = recipe, data_cube = data$hcst, - type = 'hcst', - outdir = outdir) + type = 'hcst', + outdir = outdir) if (!is.null(data$fcst)) { save_forecast(recipe = recipe, data_cube = data$fcst, - type = 'fcst', - outdir = outdir) + type = 'fcst', + outdir = outdir) } save_observations(recipe = recipe, data_cube = data$obs, - outdir = outdir) + outdir = outdir) # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file @@ -59,29 +59,29 @@ save_data <- function(recipe, data, # Export skill metrics onto outfile if (!is.null(skill_metrics)) { save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, - outdir = outdir) + data_cube = data$hcst, + outdir = outdir) } if (!is.null(corr_metrics)) { save_corr(recipe = recipe, skill = corr_metrics, - data_cube = data$hcst, - outdir = outdir) + data_cube = data$hcst, + outdir = outdir) } # Export probabilities onto outfile if (!is.null(probabilities)) { save_percentiles(recipe = recipe, percentiles = probabilities$percentiles, - data_cube = data$hcst, - outdir = outdir) + data_cube = data$hcst, + outdir = outdir) save_probabilities(recipe = recipe, probs = probabilities$probs, - data_cube = data$hcst, - type = "hcst", - outdir = outdir) + data_cube = data$hcst, + type = "hcst", + outdir = outdir) if (!is.null(probabilities$probs_fcst)) { save_probabilities(recipe = recipe, probs = probabilities$probs_fcst, - data_cube = data$fcst, - type = "fcst", - outdir = outdir) + data_cube = data$fcst, + type = "fcst", + outdir = outdir) } } } @@ -91,18 +91,18 @@ get_global_attributes <- function(recipe, archive) { # netCDF files. parameters <- recipe$Analysis hcst_period <- paste0(parameters$Time$hcst_start, " to ", - parameters$Time$hcst_end) + parameters$Time$hcst_end) current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) system_name <- parameters$Datasets$System$name reference_name <- parameters$Datasets$Reference$name attrs <- list(reference_period = hcst_period, - institution_system = archive$System[[system_name]]$institution, - institution_reference = archive$Reference[[reference_name]]$institution, - system = system_name, - reference = reference_name, - calibration_method = parameters$Workflow$Calibration$method, - computed_on = current_time) + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) return(attrs) } @@ -122,14 +122,14 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { dim(time) <- length(time) sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) + calendar = calendar)) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' sdate <- 1:length(sdate) dim(sdate) <- length(sdate) metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), + collapse=", "), units = paste0('Init date'))) attr(sdate, 'variables') <- metadata names(dim(sdate)) <- 'sdate' @@ -157,9 +157,9 @@ get_latlon <- function(latitude, longitude) { } save_forecast <- function(recipe, - data_cube, - type = "hcst", - agg = "global", + data_cube, + type = "hcst", + agg = "global", outdir = NULL) { # Loops over the years in the s2dv_cube containing a hindcast or forecast # and exports each year to a netCDF file. @@ -184,7 +184,7 @@ save_forecast <- function(recipe, # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { ## Method 1: Use the first date as init_date. But it may be better to use ## the real initialized date (ask users) @@ -194,21 +194,21 @@ save_forecast <- function(recipe, if (type == 'hcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } } else { if (type == 'hcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) + format = '%Y%m%d', cal = calendar) } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) + format = '%Y%m%d', cal = calendar) } } # Get time difference in hours @@ -245,8 +245,8 @@ save_forecast <- function(recipe, } metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, + standard_name = var.sdname, + long_name = var.longname, units = var.units)) attr(fcst[[1]], 'variables') <- metadata names(dim(fcst[[1]])) <- dims @@ -274,7 +274,7 @@ save_forecast <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "exp") + fcst.sdate, agg, "exp") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -291,14 +291,14 @@ save_forecast <- function(recipe, } } info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) + "SAVED TO NETCDF FILE #####")) } save_observations <- function(recipe, - data_cube, + data_cube, agg = "global", - outdir = NULL) { + outdir = NULL) { # Loops over the years in the s2dv_cube containing the observations and # exports each year to a netCDF file. # data_cube: s2dv_cube containing the data and metadata @@ -323,7 +323,7 @@ save_observations <- function(recipe, # Generate vector containing leadtimes ## TODO: Move to a separate function? dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { ## Method 1: Use the first date as init_date. But it may be better to use ## the real initialized date (ask users) @@ -332,12 +332,12 @@ save_observations <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) + format = '%Y%m%d', cal = calendar) } # Get time difference in hours leadtimes <- as.numeric(dates - init_date)/3600 @@ -372,8 +372,8 @@ save_observations <- function(recipe, } metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, + standard_name = var.sdname, + long_name = var.longname, units = var.units)) attr(fcst[[1]], 'variables') <- metadata names(dim(fcst[[1]])) <- dims @@ -414,7 +414,7 @@ save_observations <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "obs") + fcst.sdate, agg, "obs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -443,10 +443,10 @@ save_observations <- function(recipe, # } save_metrics <- function(recipe, - skill, + skill, data_cube, agg = "global", - outdir = NULL) { + outdir = NULL) { # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. @@ -467,10 +467,10 @@ save_metrics <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } attr(skill[[1]], 'global_attrs') <- global_attributes @@ -487,9 +487,9 @@ save_metrics <- function(recipe, dims <- c(lalo, 'time') } metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -501,13 +501,13 @@ save_metrics <- function(recipe, # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -530,7 +530,7 @@ save_metrics <- function(recipe, } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) + recipe$Analysis$Time$sdate) } else { fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) } @@ -544,7 +544,7 @@ save_metrics <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") + fcst.sdate, agg, "skill") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -563,10 +563,10 @@ save_metrics <- function(recipe, } save_corr <- function(recipe, - skill, + skill, data_cube, agg = "global", - outdir = NULL) { + outdir = NULL) { # This function adds metadata to the ensemble correlation in 'skill' # and exports it to a netCDF file inside 'outdir'. @@ -585,10 +585,10 @@ save_corr <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(global_attributes, - list(from_anomalies = "Yes")) + list(from_anomalies = "Yes")) } else { global_attributes <- c(global_attributes, - list(from_anomalies = "No")) + list(from_anomalies = "No")) } attr(skill[[1]], 'global_attrs') <- global_attributes @@ -605,9 +605,9 @@ save_corr <- function(recipe, dims <- c(lalo, 'ensemble', 'time') } metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -619,12 +619,12 @@ save_corr <- function(recipe, # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -661,7 +661,7 @@ save_corr <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") + fcst.sdate, agg, "corr") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -681,10 +681,10 @@ save_corr <- function(recipe, } save_percentiles <- function(recipe, - percentiles, + percentiles, data_cube, agg = "global", - outdir = NULL) { + outdir = NULL) { # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. archive <- get_archive(recipe) @@ -703,10 +703,10 @@ save_percentiles <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } attr(percentiles[[1]], 'global_attrs') <- global_attributes @@ -730,12 +730,12 @@ save_percentiles <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -771,7 +771,7 @@ save_percentiles <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") + fcst.sdate, agg, "percentiles") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) @@ -789,11 +789,11 @@ save_percentiles <- function(recipe, } save_probabilities <- function(recipe, - probs, - data_cube, + probs, + data_cube, agg = "global", - type = "hcst", - outdir = NULL) { + type = "hcst", + outdir = NULL) { # Loops over the years in the s2dv_cube containing a hindcast or forecast # and exports the corresponding category probabilities to a netCDF file. # probs: array containing the probability data @@ -817,10 +817,10 @@ save_probabilities <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -829,12 +829,12 @@ save_probabilities <- function(recipe, # Generate vector containing leadtimes ## TODO: Move to a separate function? dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -852,10 +852,10 @@ save_probabilities <- function(recipe, probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') if (tolower(agg) == "global") { probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) + Reorder(x, c(lalo, 'time'))}) } else { probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) + Reorder(x, c('country', 'time'))}) } ## TODO: Replace for loop with something more efficient? @@ -891,7 +891,7 @@ save_probabilities <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "probs") + fcst.sdate, agg, "probs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -910,5 +910,5 @@ save_probabilities <- function(recipe, info(recipe$Run$logger, paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) + "PROBABILITIES SAVED TO NETCDF FILE #####")) } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ce5d37e8..bd389fc8 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -4,11 +4,11 @@ ## TODO: Decadal plot names plot_data <- function(recipe, - data, - skill_metrics = NULL, - probabilities = NULL, - archive = NULL, - significance = F) { + data, + skill_metrics = NULL, + probabilities = NULL, + archive = NULL, + significance = F) { # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() # archive: the auto-s2s archive as read by read_yaml() @@ -25,18 +25,18 @@ plot_data <- function(recipe, if ((is.null(skill_metrics)) && (is.null(data$fcst))) { error(recipe$Run$logger, "The Visualization module has been called, - but there is no fcst in 'data', and 'skill_metrics' is NULL - so there is no data that can be plotted.") + but there is no fcst in 'data', and 'skill_metrics' is NULL + so there is no data that can be plotted.") stop() } if (is.null(archive)) { if (tolower(recipe$Analysis$Horizon) == "seasonal") { archive <- - read_yaml(paste0("conf/archive.yml"))[[recipe$Run$filesystem]] + read_yaml(paste0("conf/archive.yml"))[[recipe$Run$filesystem]] } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- - read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] + read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] } } @@ -44,11 +44,11 @@ plot_data <- function(recipe, if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, - significance) + significance) } else { error(recipe$Run$logger, paste0("The skill metric plots have been requested, but the ", - "parameter 'skill_metrics' is NULL")) + "parameter 'skill_metrics' is NULL")) } } @@ -59,7 +59,7 @@ plot_data <- function(recipe, } else { error(recipe$Run$logger, paste0("The forecast ensemble mean plot has been requested, but ", - "there is no fcst element in 'data'")) + "there is no fcst element in 'data'")) } } @@ -67,17 +67,17 @@ plot_data <- function(recipe, if ("most_likely_terciles" %in% plots) { if ((!is.null(probabilities)) && (!is.null(data$fcst))) { plot_most_likely_terciles(recipe, archive, data$fcst, - probabilities, outdir) + probabilities, outdir) } else { error(recipe$Run$logger, - paste0("For the most likely terciles plot, both the fsct and the ", - "probabilities must be provided.")) + paste0("For the most likely terciles plot, both the fsct and the ", + "probabilities must be provided.")) } } } plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, - outdir, significance = F) { + outdir, significance = F) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive # data_cube: s2dv_cube object with the corresponding hindcast data @@ -89,7 +89,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, # Abort if frequency is daily if (recipe$Analysis$Variables$freq == "daily_mean") { error(recipe$Run$logger, "Visualization functions not yet implemented - for daily data.") + for daily data.") stop() } # Abort if skill_metrics is not list @@ -101,9 +101,9 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, longitude <- data_cube$coords$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", - recipe$Analysis$Time$hcst_end) + recipe$Analysis$Time$hcst_end) init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, - start = 1, stop = 2)) + start = 1, stop = 2)) month_label <- tolower(month.name[init_month]) month_abbreviation <- month.abb[init_month] @@ -119,8 +119,8 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, # Group different metrics by type skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", - "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", - "enscorr_specs", "rmsss") + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs", "rmsss") scores <- c("rps", "frps", "crps", "frps_specs") # Assign colorbar to each metric type ## TODO: Triangle ends @@ -128,56 +128,56 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if (name %in% names(skill_metrics)) { # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", - "rmsss")) { - display_name <- toupper(strsplit(name, "_")[[1]][1]) + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias Skill Score" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL + display_name <- "Mean Bias Skill Score" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL } else if (name %in% c("enscorr", "enscorr_specs")) { - display_name <- "Ensemble Mean Correlation" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL + display_name <- "Ensemble Mean Correlation" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL } else if (name %in% scores) { - skill <- skill_metrics[[name]] - display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- skill_metrics[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) - cols <- colorbar[1:(length(colorbar) - 1)] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] } else if (name == "enssprerr") { - skill <- skill_metrics[[name]] - display_name <- "Spread-to-Error Ratio" - brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) - colorbar <- clim.colors(length(brks), diverging_palette) - cols <- colorbar[1:length(colorbar) - 1] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] + skill <- skill_metrics[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] } else if (name == "mean_bias") { - skill <- skill_metrics[[name]] - display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) - brks <- max_value * seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- colorbar[length(colorbar)] + skill <- skill_metrics[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] } options(bitmapType = "cairo") # Reorder dimensions @@ -188,28 +188,28 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if ((significance) && (significance_name %in% names(skill_metrics))) { skill_significance <- skill_metrics[[significance_name]] skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) + "longitude", + "latitude")) # Split skill significance into list of lists, along the time dimension # This allows for plotting the significance dots correctly. skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") + dim = 'time', + level = "sublist", + names = "dots") } else { skill_significance <- NULL } # Define output file name and titles if (tolower(recipe$Analysis$Horizon) == "seasonal") { - outfile <- paste0(outdir, name, "-", month_label, ".png") + outfile <- paste0(outdir, name, "-", month_label, ".png") } else { - outfile <- paste0(outdir, name, ".png") + outfile <- paste0(outdir, name, ".png") } toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, - "-", system_name, "-", month_abbreviation, - hcst_period) + "-", system_name, "-", month_abbreviation, + hcst_period) months <- unique(lubridate::month(data_cube$attrs$Dates, - label = T, abb = F)) + label = T, abb = F)) titles <- as.vector(months) # Plot suppressWarnings( @@ -217,11 +217,11 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, asplit(skill, MARGIN=1), # Splitting array into a list longitude, latitude, special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, brks = brks, cols = cols, col_inf = col_inf, @@ -252,7 +252,7 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { variable <- recipe$Analysis$Variables$name units <- attr(fcst$Variable, "variable")$units start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) + recipe$Analysis$Time$sdate) # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') # Drop extra dims, add time dim if missing: @@ -263,13 +263,13 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { } if (!'syear' %in% names(dim(ensemble_mean))) { ensemble_mean <- Reorder(ensemble_mean, c("time", - "longitude", - "latitude")) + "longitude", + "latitude")) } else { ensemble_mean <- Reorder(ensemble_mean, c("syear", - "time", - "longitude", - "latitude")) + "time", + "longitude", + "latitude")) } ## TODO: Redefine column colors, possibly depending on variable if (variable == 'prlr') { @@ -282,7 +282,7 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { # Define brks, centered on in the case of anomalies ## if (grepl("anomaly", - fcst$attrs$Variable$metadata[[variable]]$long_name)) { + fcst$attrs$Variable$metadata[[variable]]$long_name)) { variable <- paste(variable, "anomaly") max_value <- max(abs(ensemble_mean)) ugly_intervals <- seq(-max_value, max_value, max_value/20) @@ -303,34 +303,34 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") } toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) + "- Initialization:", i_syear) months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) titles <- as.vector(months) # Plots PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, + i_ensemble_mean, longitude, latitude, filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = outfile, - bar_label_digits = 4, - bar_extra_margin = rep(0.7, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = cols, + brks = brks, + fileout = outfile, + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) } info(recipe$Run$logger, "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } plot_most_likely_terciles <- function(recipe, archive, - fcst, - probabilities, - outdir) { + fcst, + probabilities, + outdir) { ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily @@ -343,22 +343,22 @@ plot_most_likely_terciles <- function(recipe, archive, system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) + recipe$Analysis$Time$sdate) # Retrieve and rearrange probability bins for the forecast if (is.null(probabilities$probs_fcst$prob_b33) || is.null(probabilities$probs_fcst$prob_33_to_66) || is.null(probabilities$probs_fcst$prob_a66)) { stop("The forecast tercile probability bins are not present inside ", - "'probabilities', the most likely tercile map cannot be plotted.") + "'probabilities', the most likely tercile map cannot be plotted.") } probs_fcst <- abind(probabilities$probs_fcst$prob_b33, - probabilities$probs_fcst$prob_33_to_66, - probabilities$probs_fcst$prob_a66, - along = 0) + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) names(dim(probs_fcst)) <- c("bin", - names(dim(probabilities$probs_fcst$prob_b33))) + names(dim(probabilities$probs_fcst$prob_b33))) ## TODO: Improve this section # Drop extra dims, add time dim if missing: @@ -370,7 +370,7 @@ plot_most_likely_terciles <- function(recipe, archive, probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) } else { probs_fcst <- Reorder(probs_fcst, - c("syear", "time", "bin", "longitude", "latitude")) + c("syear", "time", "bin", "longitude", "latitude")) } for (i_syear in start_date) { @@ -378,15 +378,15 @@ plot_most_likely_terciles <- function(recipe, archive, if (length(start_date) == 1) { i_probs_fcst <- probs_fcst outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, - ".png") + ".png") } else { i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") } toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) + "Initialization:", i_syear) months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) + label = T, abb = F) ## TODO: Ensure this works for daily and sub-daily cases titles <- as.vector(months) @@ -395,19 +395,19 @@ plot_most_likely_terciles <- function(recipe, archive, ## on. suppressWarnings( PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits = 2, - bar_scale = rep(0.7, 4), - bar_label_scale = 1.2, - axes_label_scale = 1.3, - triangle_ends = c(F, F), width = 11, height = 8) + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, + triangle_ends = c(F, F), width = 11, height = 8) ) } -- GitLab From e17e7b5368f29d248bb5ece4e5ef1c1eecc25b0e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 13 Apr 2023 09:37:58 +0200 Subject: [PATCH 26/52] Replace tabs with spaces --- modules/Anomalies/Anomalies.R | 54 +++--- modules/Calibration/Calibration.R | 118 +++++++------- modules/Loading/Loading.R | 156 +++++++++--------- modules/Saving/Saving.R | 110 ++++++------- modules/Skill/Skill.R | 262 +++++++++++++++--------------- tools/check_recipe.R | 120 +++++++------- tools/data_summary.R | 4 +- tools/prepare_outputs.R | 12 +- tools/read_atomic_recipe.R | 2 +- tools/write_autosubmit_conf.R | 42 ++--- 10 files changed, 440 insertions(+), 440 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 5eb737b3..2e98b265 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -5,8 +5,8 @@ compute_anomalies <- function(recipe, data) { if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, - paste("The anomaly module has been called, but the element", - "'Workflow:Anomalies:compute' is missing from the recipe.")) + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) stop() } @@ -22,13 +22,13 @@ compute_anomalies <- function(recipe, data) { # Compute anomalies anom <- CST_Anomaly(data$hcst, data$obs, - cross = cross, - memb = TRUE, - memb_dim = 'ensemble', - dim_anom = 'syear', - dat_dim = c('dat', 'ensemble'), - ftime_dim = 'time', - ncores = recipe$Analysis$ncores) + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) # Reorder dims anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) @@ -49,18 +49,18 @@ compute_anomalies <- function(recipe, data) { paste(data$hcst$attrs$Variable$variables[[var]]$long_name, "anomaly") # Change obs longname data$obs$attrs$Variable$variables[[var]]$long_name <- - paste(data$obs$attrs$Variable$variables[[var]]$long_name, "anomaly") + paste(data$obs$attrs$Variable$variables[[var]]$long_name, "anomaly") } # Compute forecast anomaly field if (!is.null(data$fcst)) { # Compute hindcast climatology ensemble mean clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, - time_dim = "syear", - dat_dim = c("dat", "ensemble"), - memb = FALSE, - memb_dim = "ensemble", - ftime_dim = "time", - ncores = recipe$Analysis$ncores) + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time", + ncores = recipe$Analysis$ncores) clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, name = "syear") dims <- dim(clim_hcst) @@ -71,28 +71,28 @@ compute_anomalies <- function(recipe, data) { data$fcst$data <- data$fcst$data - clim_hcst # Change metadata for (var in data$fcst$attrs$Variable$varName) { - data$fcst$attrs$Variable$variables[[var]]$long_name <- - paste(data$fcst$attrs$Variable$variables[[var]]$long_name, "anomaly") + data$fcst$attrs$Variable$variables[[var]]$long_name <- + paste(data$fcst$attrs$Variable$variables[[var]]$long_name, "anomaly") } } info(recipe$Run$logger, - paste("The anomalies have been computed,", cross_msg, - "cross-validation. The original full fields are returned as", - "$hcst.full_val and $obs.full_val.")) + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst.full_val and $obs.full_val.")) info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") # Save outputs recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Anomalies/") + "/outputs/Anomalies/") # Save forecast if (recipe$Analysis$Workflow$Anomalies$save_outputs %in% - c('all', 'exp_only', 'fcst_only')) { + c('all', 'exp_only', 'fcst_only')) { save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') } # Save hindcast if (recipe$Analysis$Workflow$Anomalies$save_outputs %in% - c('all', 'exp_only')) { + c('all', 'exp_only')) { save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') } # Save observation @@ -101,14 +101,14 @@ compute_anomalies <- function(recipe, data) { } } else { warn(recipe$Run$logger, paste("The Anomalies module has been called, but", - "recipe parameter Analysis:Variables:anomaly is set to FALSE.", - "The full fields will be returned.")) + "recipe parameter Analysis:Variables:anomaly is set to FALSE.", + "The full fields will be returned.")) hcst_fullvalue <- NULL obs_fullvalue <- NULL info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") } return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, - hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) + hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) } diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 7efdfa73..9d1db55f 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -11,9 +11,9 @@ calibrate_datasets <- function(recipe, data) { if (method == "raw") { warn(recipe$Run$logger, - paste("The Calibration module has been called, but the calibration", - "method in the recipe is 'raw'. The hcst and fcst will not be", - "calibrated.")) + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) fcst_calibrated <- data$fcst hcst_calibrated <- data$hcst if (!is.null(data$hcst.full_val)) { @@ -45,7 +45,7 @@ calibrate_datasets <- function(recipe, data) { obs.mm <- obs$data for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { obs.mm <- abind(obs.mm, data$obs$data, - along=which(names(dim(data$obs$data)) == 'dat')) + along=which(names(dim(data$obs$data)) == 'dat')) } names(dim(obs.mm)) <- names(dim(obs$data)) data$obs$data <- obs.mm @@ -57,9 +57,9 @@ calibrate_datasets <- function(recipe, data) { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: implement other calibration methods if (!(method %in% CST_CALIB_METHODS)) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "monthly data.")) + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) stop() } else { # Calibrate the hindcast @@ -74,24 +74,24 @@ calibrate_datasets <- function(recipe, data) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) - # In the case where anomalies have been computed, calibrate full values - if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, - data$obs.full_val, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - } else { - hcst_full_calibrated <- NULL - } + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + data$obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } - # Calibrate the forecast + # Calibrate the forecast if (!is.null(data$fcst)) { fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, cal.method = method, @@ -111,52 +111,52 @@ calibrate_datasets <- function(recipe, data) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { - error(recipe$Run$logger, - paste("Calibration method in the recipe is not available for", - "daily data. Only quantile mapping 'qmap is implemented.")) + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) stop() } # Calibrate the hindcast dim_order <- names(dim(data$hcst$data)) hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) # Restore dimension order hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) # In the case where anomalies have been computed, calibrate full values if (!is.null(data$hcst.full_val)) { - hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, - data$obs.full_val, - exp_cor = NULL, - sdate_dim = "syear", - memb_dim = "ensemble", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + data$obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "ensemble", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) } else { - hcst_full_calibrated <- NULL + hcst_full_calibrated <- NULL } if (!is.null(data$fcst)) { # Calibrate the forecast fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = data$fcst, - sdate_dim = "syear", - memb_dim = "ensemble", - # window_dim = "time", - method = "QUANT", - ncores = ncores, - na.rm = na.rm, - wet.day = F) + exp_cor = data$fcst, + sdate_dim = "syear", + memb_dim = "ensemble", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) # Restore dimension order - fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) } else { fcst_calibrated <- NULL } @@ -165,7 +165,7 @@ calibrate_datasets <- function(recipe, data) { info(recipe$Run$logger, CALIB_MSG) ## TODO: What do we do with the full values? recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Calibration/") + "/outputs/Calibration/") if (recipe$Analysis$Workflow$Calibration$save_outputs %in% c('all', 'exp_only', 'fcst_only')) { save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') @@ -180,12 +180,12 @@ calibrate_datasets <- function(recipe, data) { ## TODO: Sort out returns return_list <- list(hcst = hcst_calibrated, - obs = data$obs, - fcst = fcst_calibrated) + obs = data$obs, + fcst = fcst_calibrated) if (!is.null(hcst_full_calibrated)) { return_list <- append(return_list, - list(hcst.full_val = hcst_full_calibrated, - obs.full_val = data$obs.full_val)) + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = data$obs.full_val)) } return(return_list) } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 55b13451..c5eb41e6 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -72,17 +72,17 @@ load_datasets <- function(recipe) { obs.path <- paste0(archive$src, obs.dir, store.freq, "/$var$", reference_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + "$var$_$file_date$.nc") hcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + "$var$_$file_date$.nc") fcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -114,7 +114,7 @@ load_datasets <- function(recipe) { transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), + ensemble = c('member', 'ensemble')), ensemble = indices(1:hcst.nmember), return_vars = list(latitude = 'dat', longitude = 'dat', @@ -134,7 +134,7 @@ load_datasets <- function(recipe) { # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(hcst, "Variables")$common$time))[which(names( - dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- dim(attr(hcst, "Variables")$common$time) dim(attr(hcst, "Variables")$common$time) <- default_time_dims @@ -157,26 +157,26 @@ load_datasets <- function(recipe) { # multiple dims split fcst <- Start(dat = fcst.path, - var = variable, - file_date = sdates$fcst, + var = variable, + file_date = sdates$fcst, time = idxs$fcst, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:fcst.nmember), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) + retrieve = TRUE) if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows @@ -201,7 +201,7 @@ load_datasets <- function(recipe) { # Adjust dates for models where the time stamp goes into the next month if (recipe$Analysis$Variables$freq == "monthly_mean") { fcst$attrs$Dates[] <- - fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) } } else { @@ -215,9 +215,9 @@ load_datasets <- function(recipe) { # the corresponding observations are loaded correctly. dates <- hcst$attrs$Dates dim(dates) <- dim(Subset(hcst$data, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) + along=c('dat', 'var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { @@ -227,22 +227,22 @@ load_datasets <- function(recipe) { obs <- Start(dat = obs.path, var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'y', 'latitude'), - longitude = c('lon', 'x', 'longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'y', 'latitude'), + longitude = c('lon', 'x', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } else if (store.freq == "daily_mean") { @@ -256,28 +256,28 @@ load_datasets <- function(recipe) { dim(dates) <- dim(dates_file) obs <- Start(dat = obs.path, - var = variable, - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } # Adds ensemble dim to obs (for consistency with hcst/fcst) @@ -294,27 +294,27 @@ load_datasets <- function(recipe) { if (!(recipe$Analysis$Regrid$type == 'none')) { if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") @@ -325,7 +325,7 @@ load_datasets <- function(recipe) { dictionary <- read_yaml("conf/variable-dictionary.yml") if (dictionary$vars[[variable]]$accum) { info(recipe$Run$logger, - "Accumulated variable: setting negative values to zero.") + "Accumulated variable: setting negative values to zero.") obs$data[obs$data < 0] <- 0 hcst$data[hcst$data < 0] <- 0 if (!is.null(fcst)) { @@ -339,9 +339,9 @@ load_datasets <- function(recipe) { if (variable == "prlr") { # Verify that the units are m/s and the same in obs and hcst if (((obs$attrs$Variable$metadata[[variable]]$units == "m s-1") || - (obs$attrs$Variable$metadata[[variable]]$units == "m s**-1")) && - ((hcst$attrs$Variable$metadata[[variable]]$units == "m s-1") || - (hcst$attrs$Variable$metadata[[variable]]$units == "m s**-1"))) { + (obs$attrs$Variable$metadata[[variable]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[variable]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[variable]]$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") obs$data <- obs$data*86400*1000 diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index fb7910a2..c900fffe 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -33,12 +33,12 @@ save_data <- function(recipe, data, # Export hindcast, forecast and observations onto outfile save_forecast(recipe = recipe, data_cube = data$hcst, - type = 'hcst', - outdir = outdir) + type = 'hcst', + outdir = outdir) if (!is.null(data$fcst)) { save_forecast(recipe = recipe, data_cube = data$fcst, - type = 'fcst', - outdir = outdir) + type = 'fcst', + outdir = outdir) } save_observations(recipe = recipe, data_cube = data$obs, outdir = outdir) @@ -79,9 +79,9 @@ save_data <- function(recipe, data, outdir = outdir) if (!is.null(probabilities$probs_fcst)) { save_probabilities(recipe = recipe, probs = probabilities$probs_fcst, - data_cube = data$fcst, - type = "fcst", - outdir = outdir) + data_cube = data$fcst, + type = "fcst", + outdir = outdir) } } } @@ -91,7 +91,7 @@ get_global_attributes <- function(recipe, archive) { # netCDF files. parameters <- recipe$Analysis hcst_period <- paste0(parameters$Time$hcst_start, " to ", - parameters$Time$hcst_end) + parameters$Time$hcst_end) current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) system_name <- parameters$Datasets$System$name reference_name <- parameters$Datasets$Reference$name @@ -122,7 +122,7 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { dim(time) <- length(time) sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) + calendar = calendar)) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' @@ -157,9 +157,9 @@ get_latlon <- function(latitude, longitude) { } save_forecast <- function(recipe, - data_cube, - type = "hcst", - agg = "global", + data_cube, + type = "hcst", + agg = "global", outdir = NULL) { # Loops over the years in the s2dv_cube containing a hindcast or forecast # and exports each year to a netCDF file. @@ -194,11 +194,11 @@ save_forecast <- function(recipe, if (type == 'hcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } } else { if (type == 'hcst') { @@ -208,7 +208,7 @@ save_forecast <- function(recipe, } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) + format = '%Y%m%d', cal = calendar) } } # Get time difference in hours @@ -245,8 +245,8 @@ save_forecast <- function(recipe, } metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, + standard_name = var.sdname, + long_name = var.longname, units = var.units)) attr(fcst[[1]], 'variables') <- metadata names(dim(fcst[[1]])) <- dims @@ -274,7 +274,7 @@ save_forecast <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "exp") + fcst.sdate, agg, "exp") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -291,14 +291,14 @@ save_forecast <- function(recipe, } } info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) + "SAVED TO NETCDF FILE #####")) } save_observations <- function(recipe, - data_cube, + data_cube, agg = "global", - outdir = NULL) { + outdir = NULL) { # Loops over the years in the s2dv_cube containing the observations and # exports each year to a netCDF file. # data_cube: s2dv_cube containing the data and metadata @@ -332,7 +332,7 @@ save_observations <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, @@ -372,8 +372,8 @@ save_observations <- function(recipe, } metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, + standard_name = var.sdname, + long_name = var.longname, units = var.units)) attr(fcst[[1]], 'variables') <- metadata names(dim(fcst[[1]])) <- dims @@ -414,7 +414,7 @@ save_observations <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "obs") + fcst.sdate, agg, "obs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -443,10 +443,10 @@ save_observations <- function(recipe, # } save_metrics <- function(recipe, - skill, + skill, data_cube, agg = "global", - outdir = NULL) { + outdir = NULL) { # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. @@ -467,10 +467,10 @@ save_metrics <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } attr(skill[[1]], 'global_attrs') <- global_attributes @@ -487,9 +487,9 @@ save_metrics <- function(recipe, dims <- c(lalo, 'time') } metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -507,7 +507,7 @@ save_metrics <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -544,7 +544,7 @@ save_metrics <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") + fcst.sdate, agg, "skill") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -585,10 +585,10 @@ save_corr <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(global_attributes, - list(from_anomalies = "Yes")) + list(from_anomalies = "Yes")) } else { global_attributes <- c(global_attributes, - list(from_anomalies = "No")) + list(from_anomalies = "No")) } attr(skill[[1]], 'global_attrs') <- global_attributes @@ -605,9 +605,9 @@ save_corr <- function(recipe, dims <- c(lalo, 'ensemble', 'time') } metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -624,7 +624,7 @@ save_corr <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -661,7 +661,7 @@ save_corr <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") + fcst.sdate, agg, "corr") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -681,10 +681,10 @@ save_corr <- function(recipe, } save_percentiles <- function(recipe, - percentiles, + percentiles, data_cube, agg = "global", - outdir = NULL) { + outdir = NULL) { # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. archive <- get_archive(recipe) @@ -703,10 +703,10 @@ save_percentiles <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } attr(percentiles[[1]], 'global_attrs') <- global_attributes @@ -735,7 +735,7 @@ save_percentiles <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -771,7 +771,7 @@ save_percentiles <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") + fcst.sdate, agg, "percentiles") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) @@ -789,11 +789,11 @@ save_percentiles <- function(recipe, } save_probabilities <- function(recipe, - probs, - data_cube, + probs, + data_cube, agg = "global", - type = "hcst", - outdir = NULL) { + type = "hcst", + outdir = NULL) { # Loops over the years in the s2dv_cube containing a hindcast or forecast # and exports the corresponding category probabilities to a netCDF file. # probs: array containing the probability data @@ -817,10 +817,10 @@ save_probabilities <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -834,7 +834,7 @@ save_probabilities <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -891,7 +891,7 @@ save_probabilities <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "probs") + fcst.sdate, agg, "probs") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index e0962405..2600e5eb 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -52,8 +52,8 @@ source("modules/Skill/R/tmp/RandomWalkTest.R") # " it can call ", metric_fun )) # compute_skill_metrics <- function(recipe, data$hcst, obs, -# clim_data$hcst = NULL, -# clim_obs = NULL) { +# clim_data$hcst = NULL, +# clim_obs = NULL) { compute_skill_metrics <- function(recipe, data) { # data$hcst: s2dv_cube containing the hindcast @@ -68,14 +68,14 @@ compute_skill_metrics <- function(recipe, data) { # if (recipe$Analysis$Workflow$Anomalies$compute) { # if (is.null(clim_data$hcst) || is.null(clim_obs)) { # warn(recipe$Run$logger, "Anomalies have been requested in the recipe, -# but the climatologies have not been provided in the -# compute_skill_metrics call. Be aware that some metrics like the -# Mean Bias may not be correct.") +# but the climatologies have not been provided in the +# compute_skill_metrics call. Be aware that some metrics like the +# Mean Bias may not be correct.") # } # } else { # warn(recipe$Run$logger, "Anomaly computation was not requested in the -# recipe. Be aware that some metrics, such as the CRPSS may not be -# correct.") +# recipe. Be aware that some metrics, such as the CRPSS may not be +# correct.") # } time_dim <- 'syear' memb_dim <- 'ensemble' @@ -94,7 +94,7 @@ compute_skill_metrics <- function(recipe, data) { for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', - 'fcrps', 'fcrpss')) { + 'fcrps', 'fcrpss')) { Fair <- T } else { Fair <- F @@ -108,65 +108,65 @@ compute_skill_metrics <- function(recipe, data) { # Ranked Probability Score and Fair version if (metric %in% c('rps', 'frps')) { skill <- RPS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - prob_thresholds = 0.1, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.1, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { skill <- RPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - prob_thresholds = 0.9, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.9, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # CRPS and FCRPS } else if (metric %in% c('crps', 'fcrps')) { skill <- CRPS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - Fair = Fair, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Mean bias (climatology) @@ -174,35 +174,35 @@ compute_skill_metrics <- function(recipe, data) { ## TODO: Eliminate option to compute from anomalies # Compute from full field if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + memb_dim = memb_dim, + ncores = ncores) } skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } else { skill <- AbsBiasSS(data$hcst$data, data$obs$data, - time_dim = time_dim, - memb_dim = memb_dim, - ncores = ncores) + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) } skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$biasSS skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Ensemble mean correlation @@ -214,43 +214,43 @@ compute_skill_metrics <- function(recipe, data) { time_dim = time_dim, method = 'pearson', memb_dim = memb_dim, - memb = memb, - conf = F, - pval = F, - sign = T, - alpha = 0.05, + memb = memb, + conf = F, + pval = F, + sign = T, + alpha = 0.05, ncores = ncores) skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$corr skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'rmsss') { # Compute RMSSS skill <- RMSSS(data$hcst$data, data$obs$data, - dat_dim = 'dat', - time_dim = time_dim, - memb_dim = memb_dim, - pval = FALSE, - sign = TRUE, - sig_method = 'Random Walk', - ncores = ncores) + dat_dim = 'dat', + time_dim = time_dim, + memb_dim = memb_dim, + pval = FALSE, + sign = TRUE, + sig_method = 'Random Walk', + ncores = ncores) # Compute ensemble mean and modify dimensions skill <- lapply(skill, function(x) { - .drop_dims(x)}) + .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$rmsss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { # Remove ensemble dim from obs to avoid veriApply warning obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, - drop = "selected") + drop = "selected") capture.output( skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = data$hcst$data, - obs = obs_noensdim, - tdim = which(names(dim(data$hcst$data))==time_dim), - ensdim = which(names(dim(data$hcst$data))==memb_dim), - na.rm = na.rm, - ncpus = ncores) + fcst = data$hcst$data, + obs = obs_noensdim, + tdim = which(names(dim(data$hcst$data))==time_dim), + ensdim = which(names(dim(data$hcst$data))==memb_dim), + na.rm = na.rm, + ncpus = ncores) ) remove(obs_noensdim) skill <- .drop_dims(skill) @@ -261,21 +261,21 @@ compute_skill_metrics <- function(recipe, data) { ## Retain _specs in metric name for clarity metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', - 'rpss'))) { - warn(recipe$Run$logger, - "Some of the requested SpecsVerification metrics are not available.") + 'rpss'))) { + warn(recipe$Run$logger, + "Some of the requested SpecsVerification metrics are not available.") } capture.output( skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, - skill_metrics = metric_name, - verif.dims=c("syear", "sday", "sweek"), - na.rm = na.rm, - ncores = ncores) + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) ) skill <- .drop_dims(skill) if (metric_name == "frps") { - # Compute yearly mean for FRPS - skill <- colMeans(skill, dims = 1) + # Compute yearly mean for FRPS + skill <- colMeans(skill, dims = 1) } skill_metrics[[ metric ]] <- skill } @@ -283,19 +283,19 @@ compute_skill_metrics <- function(recipe, data) { info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") # Save outputs recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") + "/outputs/Skill/") # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim if (recipe$Analysis$Workflow$Skill$save_outputs == 'all') { corr_metric_names <- grep("^corr", names(skill_metrics)) # Save corr if (length(skill_metrics[corr_metric_names]) > 0) { save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], - data_cube = data$hcst) + data_cube = data$hcst) } # Save other skill metrics if (length(skill_metrics[-corr_metric_names]) > 0) { save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], - data_cube = data$hcst) + data_cube = data$hcst) } } # Return results @@ -323,46 +323,46 @@ compute_probabilities <- function(recipe, data) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, "Quantiles and probability bins have been - requested, but no thresholds are provided in the recipe.") + requested, but no thresholds are provided in the recipe.") stop() } else { for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { # Parse thresholds in recipe thresholds <- sapply(element, function (x) eval(parse(text = x))) quants <- compute_quants(data$hcst$data, thresholds, - ncores = ncores, - na.rm = na.rm) + ncores = ncores, + na.rm = na.rm) probs <- compute_probs(data$hcst$data, quants, - ncores = ncores, - na.rm = na.rm) + ncores = ncores, + na.rm = na.rm) for (i in seq(1:dim(quants)['bin'][[1]])) { - named_quantiles <- append(named_quantiles, - list(ClimProjDiags::Subset(quants, - 'bin', i))) - names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", - as.integer(thresholds[i]*100)) + named_quantiles <- append(named_quantiles, + list(ClimProjDiags::Subset(quants, + 'bin', i))) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) } for (i in seq(1:dim(probs)['bin'][[1]])) { - if (i == 1) { - name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) - } else if (i == dim(probs)['bin'][[1]]) { - name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) - } else { - name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) - } - named_probs <- append(named_probs, - list(ClimProjDiags::Subset(probs, - 'bin', i))) - names(named_probs)[length(named_probs)] <- name_i + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, + list(ClimProjDiags::Subset(probs, + 'bin', i))) + names(named_probs)[length(named_probs)] <- name_i } # Compute fcst probability bins if (!is.null(data$fcst)) { - probs_fcst <- compute_probs(data$fcst$data, quants, - ncores = ncores, - na.rm = na.rm) + probs_fcst <- compute_probs(data$fcst$data, quants, + ncores = ncores, + na.rm = na.rm) for (i in seq(1:dim(probs_fcst)['bin'][[1]])) { if (i == 1) { @@ -371,11 +371,11 @@ compute_probabilities <- function(recipe, data) { name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) } else { name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", - as.integer(thresholds[i]*100)) + as.integer(thresholds[i]*100)) } named_probs_fcst <- append(named_probs_fcst, - list(ClimProjDiags::Subset(probs_fcst, - 'bin', i))) + list(ClimProjDiags::Subset(probs_fcst, + 'bin', i))) names(named_probs_fcst)[length(named_probs_fcst)] <- name_i } } @@ -387,37 +387,37 @@ compute_probabilities <- function(recipe, data) { if (!is.null(data$fcst)) { fcst_years <- dim(data$fcst$data)[['syear']] named_probs_fcst <- lapply(named_probs_fcst, - function(x) {Subset(x, - along = 'syear', - indices = 1:fcst_years, - drop = 'non-selected')}) + function(x) {Subset(x, + along = 'syear', + indices = 1:fcst_years, + drop = 'non-selected')}) results <- list(probs = named_probs, - probs_fcst = named_probs_fcst, - percentiles = named_quantiles) + probs_fcst = named_probs_fcst, + percentiles = named_quantiles) } else { results <- list(probs = named_probs, - percentiles = named_quantiles) + percentiles = named_quantiles) } info(recipe$Run$logger, - "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") # Save outputs recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Skill/") + "/outputs/Skill/") # Save percentiles if (recipe$Analysis$Workflow$Probabilities$save_outputs %in% - c('all', 'percentiles_only')) { + c('all', 'percentiles_only')) { save_percentiles(recipe = recipe, percentiles = results$percentiles, - data_cube = data$hcst) + data_cube = data$hcst) } # Save probability bins if (recipe$Analysis$Workflow$Probabilities$save_outputs %in% - c('all', 'bins_only')) { + c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = results$probs, - data_cube = data$hcst, type = "hcst") + data_cube = data$hcst, type = "hcst") if (!is.null(results$probs_fcst)) { - save_probabilities(recipe = recipe, probs = results$probs_fcst, - data_cbue = data$fcst, type = "fcst") + save_probabilities(recipe = recipe, probs = results$probs_fcst, + data_cbue = data$fcst, type = "fcst") } } # Return results @@ -436,7 +436,7 @@ compute_probabilities <- function(recipe, data) { # If array has memb dim (Corr case), change name to 'ensemble' if ("exp_memb" %in% names(dim(metric_array))) { names(dim(metric_array))[which(names(dim(metric_array)) == - "exp_memb")] <- "ensemble" + "exp_memb")] <- "ensemble" # } else { # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) } diff --git a/tools/check_recipe.R b/tools/check_recipe.R index d8acc8cd..64d1ef42 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -10,10 +10,10 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", - "hcst_end") + "hcst_end") TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") PARAMS <- c("Horizon", "Time", "Variables", "Region", "Regrid", "Workflow", - "Datasets") + "Datasets") HORIZONS <- c("subseasonal", "seasonal", "decadal") ARCHIVE_SEASONAL <- "conf/archive.yml" ARCHIVE_DECADAL <- "conf/archive_decadal.yml" @@ -24,21 +24,21 @@ check_recipe <- function(recipe) { # Check basic elements in recipe:Analysis: if (!("Analysis" %in% names(recipe))) { error(recipe$Run$logger, - "The recipe must contain an element called 'Analysis'.") + "The recipe must contain an element called 'Analysis'.") error_status <- T } if (!all(PARAMS %in% names(recipe$Analysis))) { error(recipe$Run$logger, paste0("The element 'Analysis' in the recipe must contain all of ", - "the following: ", paste(PARAMS, collapse = ", "), ".")) + "the following: ", paste(PARAMS, collapse = ", "), ".")) error_status <- T } if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", - "following: ", paste(HORIZONS, collapse = ", "), ".")) + "following: ", paste(HORIZONS, collapse = ", "), ".")) error_status <- T } # Check time settings @@ -47,18 +47,18 @@ check_recipe <- function(recipe) { archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, - paste0("The element 'Time' in the recipe must contain all of the ", - "following: ", paste(TIME_SETTINGS_SEASONAL, - collapse = ", "), ".")) + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_SEASONAL, + collapse = ", "), ".")) error_status <- T } } else if (tolower(recipe$Analysis$Horizon) == "decadal") { archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]] if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) { error(recipe$Run$logger, - paste0("The element 'Time' in the recipe must contain all of the ", - "following: ", paste(TIME_SETTINGS_DECADAL, - collapse = ", "), ".")) + paste0("The element 'Time' in the recipe must contain all of the ", + "following: ", paste(TIME_SETTINGS_DECADAL, + collapse = ", "), ".")) error_status <- T } } else { @@ -68,14 +68,14 @@ check_recipe <- function(recipe) { if (!is.null(archive)) { if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) { error(recipe$Run$logger, - "The specified System name was not found in the archive.") + "The specified System name was not found in the archive.") error_status <- T } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% names(archive$Reference))) { error(recipe$Run$logger, - "The specified Reference name was not found in the archive.") + "The specified Reference name was not found in the archive.") error_status <- T } } @@ -83,36 +83,36 @@ check_recipe <- function(recipe) { if ((!(recipe$Analysis$Time$ftime_min > 0)) || (!is.integer(recipe$Analysis$Time$ftime_min))) { error(recipe$Run$logger, - "The element 'ftime_min' must be an integer larger than 0.") + "The element 'ftime_min' must be an integer larger than 0.") error_status <- T } if ((!(recipe$Analysis$Time$ftime_max > 0)) || (!is.integer(recipe$Analysis$Time$ftime_max))) { error(recipe$Run$logger, - "The element 'ftime_max' must be an integer larger than 0.") + "The element 'ftime_max' must be an integer larger than 0.") error_status <- T } if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) { error(recipe$Run$logger, - "'ftime_max' cannot be smaller than 'ftime_min'.") + "'ftime_max' cannot be smaller than 'ftime_min'.") error_status <- T } # Check consistency of hindcast years if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_start > 0))) { error(recipe$Run$logger, - "The element 'hcst_start' must be a valid year.") + "The element 'hcst_start' must be a valid year.") error_status <- T } if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) || (!(recipe$Analysis$Time$hcst_end > 0))) { error(recipe$Run$logger, - "The element 'hcst_end' must be a valid year.") + "The element 'hcst_end' must be a valid year.") error_status <- T } if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) { error(recipe$Run$logger, - "'hcst_end' cannot be smaller than 'hcst_start'.") + "'hcst_end' cannot be smaller than 'hcst_start'.") error_status <- T } ## TODO: Is this needed? @@ -141,7 +141,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Time$fcst_year)) { warn(recipe$Run$logger, paste("The element 'fcst_year' is not defined in the recipe.", - "No forecast year will be used.")) + "No forecast year will be used.")) } ## TODO: Adapt and move this inside 'if'? # fcst.sdate <- NULL @@ -165,7 +165,7 @@ check_recipe <- function(recipe) { # calculate number of workflows to create for each variable and if (length(recipe$Analysis$Horizon) > 1) { error(recipe$Run$logger, - "Only one single Horizon can be specified in the recipe") + "Only one single Horizon can be specified in the recipe") error_status <- T } @@ -197,7 +197,7 @@ check_recipe <- function(recipe) { if (!all(LIMITS %in% names(region))) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", - paste(LIMITS, collapse = ", "), ".")) + paste(LIMITS, collapse = ", "), ".")) error_status <- T } } @@ -206,15 +206,15 @@ check_recipe <- function(recipe) { if (!("name" %in% names(region)) || (is.null(region$name))) { error(recipe$Run$logger, paste("If more than one region has been defined, every region", - "must have a unique name.")) + "must have a unique name.")) } } } # Atomic recipe } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) { error(recipe$Run$logger, - paste0("There must be 4 elements in 'Region': ", - paste(LIMITS, collapse = ", "), ".")) + paste0("There must be 4 elements in 'Region': ", + paste(LIMITS, collapse = ", "), ".")) error_status <- T } ## TODO: Implement multiple regions @@ -258,19 +258,19 @@ check_recipe <- function(recipe) { if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, - "Parameter 'compute' must be defined under 'Anomalies'.") + "Parameter 'compute' must be defined under 'Anomalies'.") error_status <- T } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) { error(recipe$Run$logger, - paste("Parameter 'Anomalies:compute' must be a logical value", - "(True/False or yes/no).")) + paste("Parameter 'Anomalies:compute' must be a logical value", + "(True/False or yes/no).")) error_status <- T } else if ((recipe$Analysis$Workflow$Anomalies$compute) && - (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { + (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { error(recipe$Run$logger, - paste("If anomaly computation is requested, parameter", - "'cross_validation' must be defined under 'Anomalies', - and it must be a logical value (True/False or yes/no).")) + paste("If anomaly computation is requested, parameter", + "'cross_validation' must be defined under 'Anomalies', + and it must be a logical value (True/False or yes/no).")) error_status <- T } } @@ -278,19 +278,19 @@ check_recipe <- function(recipe) { if (("Skill" %in% names(recipe$Analysis$Workflow)) && (is.null(recipe$Analysis$Workflow$Skill$metric))) { error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Skill'.") + "Parameter 'metric' must be defined under 'Skill'.") error_status <- T } # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, - "Parameter 'percentiles' must be defined under 'Probabilities'.") + "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- T } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { error(recipe$Run$logger, - paste("Parameter 'Probabilities:percentiles' expects a list.", - "See documentation in the wiki for examples.")) + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) error_status <- T } } @@ -307,7 +307,7 @@ check_recipe <- function(recipe) { } if (!all(RUN_FIELDS %in% names(recipe$Run))) { error(recipe$Run$logger, paste("Recipe element 'Run' must contain", - "all of the following fields:", + "all of the following fields:", paste(RUN_FIELDS, collapse=", "), ".")) error_status <- T } @@ -347,8 +347,8 @@ check_recipe <- function(recipe) { # --------------------------------------------------------------------- AUTO_PARAMS <- c("script", "expid", "hpc_user", "wallclock", - "processors_per_job", "platform", "email_notifications", - "email_address", "notify_completed", "notify_failed") + "processors_per_job", "platform", "email_notifications", + "email_address", "notify_completed", "notify_failed") # Autosubmit false by default if (is.null(recipe$Run$autosubmit)) { recipe$Run$autosubmit <- F @@ -360,53 +360,53 @@ check_recipe <- function(recipe) { # Check that the autosubmit configuration parameters are present if (!("auto_conf" %in% names(recipe$Run))) { error(recipe$Run$logger, - "The 'auto_conf' is missing from the 'Run' section of the recipe.") + "The 'auto_conf' is missing from the 'Run' section of the recipe.") error_status <- T } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) { error(recipe$Run$logger, - paste0("The element 'Run:auto_conf' must contain all of the ", - "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) + paste0("The element 'Run:auto_conf' must contain all of the ", + "following: ", paste(AUTO_PARAMS, collapse = ", "), ".")) error_status <- T } # Check that the script is not NULL and exists if (is.null(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, - "A script must be provided to run the recipe with autosubmit.") + "A script must be provided to run the recipe with autosubmit.") error_status <- T } else if (!file.exists(recipe$Run$auto_conf$script)) { error(recipe$Run$logger, - "Could not find the file for the script in 'auto_conf'.") + "Could not find the file for the script in 'auto_conf'.") error_status <- T } # Check that the experiment ID exists if (is.null(recipe$Run$auto_conf$expid)) { error(recipe$Run$logger, - paste("The Autosubmit EXPID is missing. You can create one by", - "running the following commands on the autosubmit machine:")) + paste("The Autosubmit EXPID is missing. You can create one by", + "running the following commands on the autosubmit machine:")) error(recipe$Run$logger, - paste("module load", auto_specs$module_version)) + paste("module load", auto_specs$module_version)) error(recipe$Run$logger, - paste("autosubmit expid -H", auto_specs$platform, - "-d ")) + paste("autosubmit expid -H", auto_specs$platform, + "-d ")) } else if (!dir.exists(paste0(auto_specs$experiment_dir, - recipe$Run$auto_conf$expid))) { + recipe$Run$auto_conf$expid))) { error(recipe$Run$logger, - paste0("No folder in ", auto_specs$experiment_dir, - " for the EXPID", recipe$Run$auto_conf$expid, - ". Please make sure it is correct.")) + paste0("No folder in ", auto_specs$experiment_dir, + " for the EXPID", recipe$Run$auto_conf$expid, + ". Please make sure it is correct.")) } if ((recipe$Run$auto_conf$email_notifications) && - (is.null(recipe$Run$auto_conf$email_address))) { + (is.null(recipe$Run$auto_conf$email_address))) { error(recipe$Run$logger, - "Autosubmit notifications are enabled but email address is empty!") + "Autosubmit notifications are enabled but email address is empty!") } if (is.null(recipe$Run$auto_conf$hpc_user)) { error(recipe$Run$logger, - "The 'Run:auto_conf:hpc_user' field can not be empty.") + "The 'Run:auto_conf:hpc_user' field can not be empty.") } else if ((recipe$Run$filesystem == "esarchive") && - (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { + (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) { error(recipe$Run$logger, - "Please check your hpc_user ID. It should look like: 'bsc32xxx'") + "Please check your hpc_user ID. It should look like: 'bsc32xxx'") } } @@ -418,13 +418,13 @@ check_recipe <- function(recipe) { ## TODO: Implement number of dependent verifications #nverifications <- check_number_of_dependent_verifications(recipe) # info(recipe$Run$logger, paste("Start Dates:", - # paste(fcst.sdate, collapse = " "))) + # paste(fcst.sdate, collapse = " "))) # Return error if any check has failed if (error_status) { error(recipe$Run$logger, "RECIPE CHECK FAILED.") stop("The recipe contains some errors. Find the full list in the", - "startup.log file.") + "startup.log file.") } else { info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") # return(append(nverifications, fcst.sdate)) diff --git a/tools/data_summary.R b/tools/data_summary.R index 5f532dcf..92dcb353 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -19,7 +19,7 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, "DATA SUMMARY:") info(recipe$Run$logger, paste(object_name, "months:", months)) info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", - sdate_max)) + sdate_max)) info(recipe$Run$logger, paste(object_name, "dimensions:")) # Use capture.output() and for loop to display results neatly output_string <- capture.output(dim(data_cube$data)) @@ -27,7 +27,7 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, i) } info(recipe$Run$logger, paste0("Statistical summary of the data in ", - object_name, ":")) + object_name, ":")) output_string <- capture.output(summary(data_cube$data)) for (i in output_string) { info(recipe$Run$logger, i) diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 61825738..d0857730 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -22,8 +22,8 @@ #'@export prepare_outputs <- function(recipe_file, - disable_checks = FALSE, - uniqueID = TRUE) { + disable_checks = FALSE, + uniqueID = TRUE) { # recipe_file: path to recipe YAML file # disable_checks: If TRUE, does not perform checks on recipe # disable_uniqueID: If TRUE, does not add a unique ID to output dir @@ -39,7 +39,7 @@ prepare_outputs <- function(recipe_file, } else { folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", gsub(" ", "", gsub(":", "", gsub("-", "", - Sys.time())))) + Sys.time())))) } print("Saving all outputs to:") print(paste0(output_dir, folder_name)) @@ -49,7 +49,7 @@ prepare_outputs <- function(recipe_file, ## TODO: Move this part to main recipe # Copy recipe to output folder file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', - 'recipes')) + 'recipes')) # Create log output file logfile <- file.path(output_dir, folder_name, 'logs', 'main.log') file.create(logfile) @@ -84,12 +84,12 @@ prepare_outputs <- function(recipe_file, if (is.null(recipe$Run$filesystem)) { recipe$Run$filesystem <- "esarchive" warn(recipe$Run$logger, - "Filesystem not specified in the recipe. Setting it to 'esarchive'.") + "Filesystem not specified in the recipe. Setting it to 'esarchive'.") } # Run recipe checker if (disable_checks) { warn(recipe$Run$logger, - "Recipe checks disabled. The recipe will not be checked for errors.") + "Recipe checks disabled. The recipe will not be checked for errors.") } else { check_recipe(recipe) } diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R index 1eadb707..de2ad5b5 100644 --- a/tools/read_atomic_recipe.R +++ b/tools/read_atomic_recipe.R @@ -28,7 +28,7 @@ read_atomic_recipe <- function(recipe_file) { recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) # Create log file for atomic recipe logfile <- file.path(recipe$Run$output_dir, 'logs', - paste0(recipe$name, '.log')) + paste0(recipe$name, '.log')) file.create(logfile) # Set default behaviour of logger if (is.null(recipe$Run)) { diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index a0208a9e..a425566d 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -20,9 +20,9 @@ write_autosubmit_conf <- function(recipe, nchunks) { ## expid, email notifications and address conf$config$EXPID <- expid if (recipe$Run$auto_conf$email_notifications) { - conf$mail$NOTIFICATIONS <- "True" + conf$mail$NOTIFICATIONS <- "True" } else { - conf$mail$NOTIFICATIONS <- "False" + conf$mail$NOTIFICATIONS <- "False" } conf$mail$TO <- recipe$Run$auto_conf$email_address } else if (conf_type == "expdef") { @@ -37,34 +37,34 @@ write_autosubmit_conf <- function(recipe, nchunks) { ## wallclock, notify_on, platform?, processors # Different file structure depending on autosubmit version if (auto_specs$auto_version == "4.0.0") { - jobs <- conf$JOBS + jobs <- conf$JOBS } else { - jobs <- conf + jobs <- conf } jobs$verification$WALLCLOCK <- recipe$Run$auto_conf$wallclock if (recipe$Run$auto_conf$notify_completed) { jobs$verification$NOTIFY_ON <- paste(jobs$verification$NOTIFY_ON, - "COMPLETED") + "COMPLETED") } if (recipe$Run$auto_conf$notify_failed) { - jobs$verification$NOTIFY_ON <- paste(jobs$verification$NOTIFY_ON, - "FAILED") + jobs$verification$NOTIFY_ON <- paste(jobs$verification$NOTIFY_ON, + "FAILED") } jobs$verification$PROCESSORS <- recipe$Run$auto_conf$processors_per_job # ncores? # Return to original list if (auto_specs$auto_version == "4.0.0") { - conf$JOBS <- jobs + conf$JOBS <- jobs } else { - conf <- jobs + conf <- jobs } } else if (conf_type == "platforms") { # Section 4: platform configuration ## nord3v2 configuration... platform name? user, processors_per_node if (auto_specs$auto_version == "4.0.0") { - conf$Platforms[[auto_specs$platform]]$USER <- - recipe$Run$auto_conf$hpc_user + conf$Platforms[[auto_specs$platform]]$USER <- + recipe$Run$auto_conf$hpc_user } else { - conf[[auto_specs$platform]]$USER <- recipe$Run$auto_conf$hpc_user + conf[[auto_specs$platform]]$USER <- recipe$Run$auto_conf$hpc_user } } else if (conf_type == "proj") { # Section 5: proj @@ -75,31 +75,31 @@ write_autosubmit_conf <- function(recipe, nchunks) { # Write config file inside autosubmit dir ## TODO: Change write.type depending on autosubmit version write.config(conf, paste0(dest_dir, dest_file), - write.type = auto_specs$conf_format) + write.type = auto_specs$conf_format) Sys.chmod(paste0(dest_dir, dest_file), mode = "755", use_umask = F) } info(recipe$Run$logger, paste("##### AUTOSUBMIT CONFIGURATION WRITTEN FOR", expid, "#####")) info(recipe$Run$logger, paste0("You can check your experiment configuration at: ", - "/esarchive/autosubmit/", expid, "/conf/")) + "/esarchive/autosubmit/", expid, "/conf/")) # Print instructions/commands for user if (recipe$Run$Terminal) { ## TODO: Change SSH message for other environments (outside BSC) info(recipe$Run$logger, - paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", - "the following commands:")) + paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", + "the following commands:")) info(recipe$Run$logger, - paste("module load", auto_specs$module_version)) + paste("module load", auto_specs$module_version)) info(recipe$Run$logger, - paste("autosubmit create", expid)) + paste("autosubmit create", expid)) info(recipe$Run$logger, - paste("autosubmit refresh", expid)) + paste("autosubmit refresh", expid)) info(recipe$Run$logger, - paste("nohup autosubmit run", expid, "& disown")) + paste("nohup autosubmit run", expid, "& disown")) } else { print(paste("Please SSH into bscesautosubmit01 or bscesautosubmit02 and run", - "the following commands:")) + "the following commands:")) print(paste("module load", auto_specs$module_version)) print(paste("autosubmit create", expid)) print(paste("autosubmit refresh", expid)) -- GitLab From b2cb9fd78eddc6cf4694b820f9cc81db6dabaa64 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Thu, 13 Apr 2023 17:15:47 +0200 Subject: [PATCH 27/52] parallelization updates --- modules/Downscaling/tmp/Intlr.R | 37 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index 24c909f3..b8bbf0fd 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -446,7 +446,6 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t .intlr <- function(x, y, loocv) { tmp_df <- data.frame(x = x, y = y) - # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { @@ -456,13 +455,12 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t } else { # training lm1 <- train_lm(df = tmp_df, loocv = loocv) - + # prediction res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) } return(res) - } #----------------------------------- @@ -471,16 +469,20 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t #----------------------------------- train_lm <- function(df, loocv) { - # Remove columns containing only NA's - df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + # Remove predictor columns containing only NA's + df <- df[ , apply(df[,colnames(df) != 'y'], 2, function(x) !all(is.na(x)))] if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) lm(df[-j,], formula = y ~ .)) - + lm1 <- lapply(1:nrow(df), function(j) { + if (all(is.na(df[-j,]$y))) { + return(NA) + } else { + return(lm(df[-j,], formula = y ~ .)) + }}) } else { - lm1 <- list(lm(data = df, formula = y ~ .)) + lm1 <- ifelse(all(is.na(df$y)), NA, list(lm(data = df, formula = y ~ .))) } return(lm1) @@ -492,15 +494,20 @@ train_lm <- function(df, loocv) { pred_lm <- function(df, lm1, loocv) { if (loocv) { - - pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,])) - + pred_vals <- sapply(1:nrow(df), function(j) { + if (all(is.na(lm1[[j]]))) { + return(NA) + } else { + return(predict(lm1[[j]], df[j,])) + }}) } else { - - pred_vals_ls <- lapply(lm1, predict, data = df) - pred_vals <- unlist(pred_vals_ls) + if (!is.na(lm1)) { + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + } else { + pred_vals <- rep(NA, nrow(df)) + } } - return(pred_vals) } -- GitLab From b4e54c0bdf1357ab1d12a17bb17dd19139d59184 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 17 Apr 2023 09:25:46 +0200 Subject: [PATCH 28/52] Change 'save_outputs' to 'save' --- modules/Anomalies/Anomalies.R | 6 +- modules/Calibration/Calibration.R | 6 +- modules/Saving/Saving.R | 64 +++++++++---------- modules/Skill/Skill.R | 6 +- modules/Visualization/Visualization.R | 2 +- .../atomic_recipes/recipe_system7c3s-tas.yml | 8 +-- 6 files changed, 46 insertions(+), 46 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 2e98b265..8e26c680 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -86,17 +86,17 @@ compute_anomalies <- function(recipe, data) { recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Anomalies/") # Save forecast - if (recipe$Analysis$Workflow$Anomalies$save_outputs %in% + if (recipe$Analysis$Workflow$Anomalies$save %in% c('all', 'exp_only', 'fcst_only')) { save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') } # Save hindcast - if (recipe$Analysis$Workflow$Anomalies$save_outputs %in% + if (recipe$Analysis$Workflow$Anomalies$save %in% c('all', 'exp_only')) { save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') } # Save observation - if (recipe$Analysis$Workflow$Anomalies$save_outputs == 'all') { + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { save_observations(recipe = recipe, data_cube = data$obs) } } else { diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 9d1db55f..006fecd2 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -166,15 +166,15 @@ calibrate_datasets <- function(recipe, data) { ## TODO: What do we do with the full values? recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Calibration/") - if (recipe$Analysis$Workflow$Calibration$save_outputs %in% + if (recipe$Analysis$Workflow$Calibration$save %in% c('all', 'exp_only', 'fcst_only')) { save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') } - if (recipe$Analysis$Workflow$Calibration$save_outputs %in% + if (recipe$Analysis$Workflow$Calibration$save %in% c('all', 'exp_only')) { save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') } - if (recipe$Analysis$Workflow$Calibration$save_outputs == 'all') { + if (recipe$Analysis$Workflow$Calibration$save == 'all') { save_observations(recipe = recipe, data_cube = data$obs) } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index c900fffe..e872e832 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -4,8 +4,8 @@ source("modules/Saving/R/get_dir.R") source("modules/Saving/R/get_filename.R") save_data <- function(recipe, data, - skill_metrics = NULL, - probabilities = NULL) { + skill_metrics = NULL, + probabilities = NULL) { # Wrapper for the saving functions. # recipe: The auto-s2s recipe # archive: The auto-s2s archive @@ -184,7 +184,7 @@ save_forecast <- function(recipe, # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { ## Method 1: Use the first date as init_date. But it may be better to use ## the real initialized date (ask users) @@ -194,11 +194,11 @@ save_forecast <- function(recipe, if (type == 'hcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } } else { if (type == 'hcst') { @@ -208,7 +208,7 @@ save_forecast <- function(recipe, } else if (type == 'fcst') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) + format = '%Y%m%d', cal = calendar) } } # Get time difference in hours @@ -291,7 +291,7 @@ save_forecast <- function(recipe, } } info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) + "SAVED TO NETCDF FILE #####")) } @@ -467,10 +467,10 @@ save_metrics <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } attr(skill[[1]], 'global_attrs') <- global_attributes @@ -487,9 +487,9 @@ save_metrics <- function(recipe, dims <- c(lalo, 'time') } metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -530,7 +530,7 @@ save_metrics <- function(recipe, } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) + recipe$Analysis$Time$sdate) } else { fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) } @@ -544,7 +544,7 @@ save_metrics <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") + fcst.sdate, agg, "skill") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -585,10 +585,10 @@ save_corr <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(global_attributes, - list(from_anomalies = "Yes")) + list(from_anomalies = "Yes")) } else { global_attributes <- c(global_attributes, - list(from_anomalies = "No")) + list(from_anomalies = "No")) } attr(skill[[1]], 'global_attrs') <- global_attributes @@ -605,9 +605,9 @@ save_corr <- function(recipe, dims <- c(lalo, 'ensemble', 'time') } metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) attr(skill[[i]], 'variables') <- metadata names(dim(skill[[i]])) <- dims } @@ -624,7 +624,7 @@ save_corr <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -661,7 +661,7 @@ save_corr <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") + fcst.sdate, agg, "corr") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { @@ -694,7 +694,7 @@ save_percentiles <- function(recipe, # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { percentiles <- lapply(percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) + Reorder(x, c(lalo, 'time'))}) } # Add global and variable attributes @@ -703,10 +703,10 @@ save_percentiles <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } attr(percentiles[[1]], 'global_attrs') <- global_attributes @@ -730,12 +730,12 @@ save_percentiles <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) + cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -771,7 +771,7 @@ save_percentiles <- function(recipe, outdir <- get_dir(recipe) } outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") + fcst.sdate, agg, "percentiles") # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) @@ -817,10 +817,10 @@ save_probabilities <- function(recipe, if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && (recipe$Analysis$Workflow$Anomalies$compute)) { global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) + global_attributes) } else { global_attributes <- c(list(from_anomalies = "No"), - global_attributes) + global_attributes) } fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -834,7 +834,7 @@ save_probabilities <- function(recipe, init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', sprintf('%02d', init_month), '-01'), - cal = calendar) + cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -852,10 +852,10 @@ save_probabilities <- function(recipe, probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') if (tolower(agg) == "global") { probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) + Reorder(x, c(lalo, 'time'))}) } else { probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) + Reorder(x, c('country', 'time'))}) } ## TODO: Replace for loop with something more efficient? diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 2600e5eb..7028af57 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -285,7 +285,7 @@ compute_skill_metrics <- function(recipe, data) { recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Skill/") # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim - if (recipe$Analysis$Workflow$Skill$save_outputs == 'all') { + if (recipe$Analysis$Workflow$Skill$save == 'all') { corr_metric_names <- grep("^corr", names(skill_metrics)) # Save corr if (length(skill_metrics[corr_metric_names]) > 0) { @@ -405,13 +405,13 @@ compute_probabilities <- function(recipe, data) { recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Skill/") # Save percentiles - if (recipe$Analysis$Workflow$Probabilities$save_outputs %in% + if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'percentiles_only')) { save_percentiles(recipe = recipe, percentiles = results$percentiles, data_cube = data$hcst) } # Save probability bins - if (recipe$Analysis$Workflow$Probabilities$save_outputs %in% + if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = results$probs, data_cube = data$hcst, type = "hcst") diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index bd389fc8..5a018b02 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -70,7 +70,7 @@ plot_data <- function(recipe, probabilities, outdir) } else { error(recipe$Run$logger, - paste0("For the most likely terciles plot, both the fsct and the ", + paste0("For the most likely terciles plot, both the fcst and the ", "probabilities must be provided.")) } } diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml index 5b05e3c1..e5cd2aba 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml @@ -31,16 +31,16 @@ Analysis: Anomalies: compute: yes # yes/no, default yes cross_validation: yes # yes/no, default yes - save_outputs: 'all' # 'all'/'none'/'exp_only'/'fcst_only' + save: 'all' # 'all'/'none'/'exp_only'/'fcst_only' Calibration: method: mse_min - save_outputs: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS - save_outputs: 'all' # 'all'/'none' + save: 'all' # 'all'/'none' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] - save_outputs: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' Visualization: plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles Indicators: -- GitLab From 5ac5fff68525e49d867062ce9e18b972f9180ced Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 17 Apr 2023 16:35:48 +0200 Subject: [PATCH 29/52] Fix bugs and update unit tests --- modules/Skill/Skill.R | 23 ++++++++++------- tests/recipes/recipe-decadal_daily_1.yml | 6 ++++- tests/recipes/recipe-decadal_monthly_1.yml | 10 ++++++-- tests/recipes/recipe-decadal_monthly_1b.yml | 8 ++++-- tests/recipes/recipe-decadal_monthly_2.yml | 6 +++++ tests/recipes/recipe-decadal_monthly_3.yml | 6 ++++- tests/recipes/recipe-seasonal_daily_1.yml | 3 +++ tests/recipes/recipe-seasonal_monthly_1.yml | 6 +++++ tests/testthat/test-decadal_daily_1.R | 2 +- tests/testthat/test-decadal_monthly_1.R | 28 ++++++++------------- tests/testthat/test-decadal_monthly_2.R | 22 +++++++--------- tests/testthat/test-decadal_monthly_3.R | 2 +- tests/testthat/test-seasonal_daily.R | 2 +- tests/testthat/test-seasonal_monthly.R | 17 +++++++------ 14 files changed, 84 insertions(+), 57 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 7028af57..d3f74b13 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -287,15 +287,20 @@ compute_skill_metrics <- function(recipe, data) { # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim if (recipe$Analysis$Workflow$Skill$save == 'all') { corr_metric_names <- grep("^corr", names(skill_metrics)) - # Save corr - if (length(skill_metrics[corr_metric_names]) > 0) { - save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], - data_cube = data$hcst) - } - # Save other skill metrics - if (length(skill_metrics[-corr_metric_names]) > 0) { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + if (length(corr_metric_names) == 0) { + save_metrics(recipe = recipe, skill = skill_metrics, + data_cube = data$hcst) + } else { + # Save corr + if (length(skill_metrics[corr_metric_names]) > 0) { + save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], data_cube = data$hcst) + } + # Save other skill metrics + if (length(skill_metrics[-corr_metric_names]) > 0) { + save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + data_cube = data$hcst) + } } } # Return results @@ -417,7 +422,7 @@ compute_probabilities <- function(recipe, data) { data_cube = data$hcst, type = "hcst") if (!is.null(results$probs_fcst)) { save_probabilities(recipe = recipe, probs = results$probs_fcst, - data_cbue = data$fcst, type = "fcst") + data_cube = data$fcst, type = "fcst") } } # Return results diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml index 7a2a575b..88b87622 100644 --- a/tests/recipes/recipe-decadal_daily_1.yml +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: qmap + save: 'none' Skill: metric: RPSS + save: 'none' Probabilities: percentiles: [[1/10, 9/10]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml index 35b55b1a..a2849c27 100644 --- a/tests/recipes/recipe-decadal_monthly_1.yml +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -31,15 +31,21 @@ Analysis: Workflow: Anomalies: compute: no - cross-validation: + cross-validation: + save: Calibration: method: bias + save: 'all' Skill: metric: RPSS + save: 'all' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_1b.yml b/tests/recipes/recipe-decadal_monthly_1b.yml index 5551d9c7..5a1ce4fd 100644 --- a/tests/recipes/recipe-decadal_monthly_1b.yml +++ b/tests/recipes/recipe-decadal_monthly_1b.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: bias + save: 'none' Skill: metric: RPSS + save: 'none' Probabilities: - percentiles: [[1/3, 2/3], [1/10, 9/10]] + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml index 45eb01dd..b0892a0c 100644 --- a/tests/recipes/recipe-decadal_monthly_2.yml +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -32,14 +32,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: 'all' Calibration: method: raw + save: 'all' Skill: metric: RPSS_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: most_likely_terciles skill_metrics forecast_ensemble_mean ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml index 94bdfebc..fc42cd2d 100644 --- a/tests/recipes/recipe-decadal_monthly_3.yml +++ b/tests/recipes/recipe-decadal_monthly_3.yml @@ -31,13 +31,17 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: 'none' Calibration: method: 'evmos' + save: 'none' Skill: metric: BSS10 Corr + save: 'none' Probabilities: percentiles: [[1/3, 2/3]] + save: 'none' Indicators: index: FALSE ncores: # Optional, int: number of cores, defaults to 1 diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml index afa0f496..f70f0c03 100644 --- a/tests/recipes/recipe-seasonal_daily_1.yml +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -31,10 +31,13 @@ Analysis: Anomalies: compute: no cross_validation: + save: 'none' Calibration: method: qmap + save: 'none' Skill: metric: EnsCorr_specs + save: 'none' Indicators: index: no Output_format: S2S4E diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 68c58f83..5a2f5c48 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -31,14 +31,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: mse_min + save: 'all' Skill: metric: RPSS CRPSS EnsCorr Corr Enscorr_specs + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' Indicators: index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean Output_format: S2S4E Run: Loglevel: INFO diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 400b864d..c26b8978 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -219,4 +219,4 @@ as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') # #}) - +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 4fc92b3d..ee1520ad 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -30,21 +30,12 @@ suppressWarnings({invisible(capture.output( probs <- compute_probabilities(recipe, calibrated_data) ))}) -# Saving -suppressWarnings({invisible(capture.output( -save_data(recipe = recipe, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs) -))}) - # Plotting suppressWarnings({invisible(capture.output( plot_data(recipe = recipe, archive = archive, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) - -outdir <- get_dir(recipe) - #====================================== test_that("1. Loading", { @@ -256,10 +247,10 @@ tolerance = 0.0001 #====================================== test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -all(list.files(outdir) %in% -c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), @@ -270,30 +261,30 @@ TRUE #) expect_equal( -length(list.files(outdir)), -17 +length(list.files(outputs, recursive = T)), +16 ) }) test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") expect_equal( -all(list.files(paste0(outdir, "/plots/")) %in% +all(basename(list.files(plots, recursive = T)) %in% c("forecast_ensemble_mean-2021.png", "forecast_most_likely_tercile-2021.png", "rpss.png")), TRUE ) expect_equal( -length(list.files(paste0(outdir, "/plots/"))), +length(list.files(plots, recursive = T)), 3 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) - +unlink(recipe$Run$output_dir, recursive = TRUE) #============================================================== @@ -345,4 +336,5 @@ lapply(probs_b$probs_fcst, ClimProjDiags::Subset, 'syear', 2), probs$probs_fcst ) +unlink(recipe$Run$output_dir, recursive = TRUE) }) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 40a56abf..a6ca9254 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -29,11 +29,6 @@ suppressWarnings({invisible(capture.output( probs <- compute_probabilities(recipe, calibrated_data) ))}) -# Saving -suppressWarnings({invisible(capture.output( -save_data(recipe, calibrated_data, skill_metrics, probs) -))}) - # Plotting suppressWarnings({invisible(capture.output( plot_data(recipe = recipe, data = calibrated_data, @@ -250,18 +245,18 @@ tolerance = 0.0001 #====================================== test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -all(list.files(outdir) %in% -c("plots", "tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc")), TRUE ) expect_equal( -length(list.files(outdir)), -16 +length(list.files(outputs, recursive = T)), +15 ) }) @@ -269,19 +264,20 @@ length(list.files(outdir)), #====================================== test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") expect_equal( -all(list.files(paste0(outdir, "/plots/")) %in% +all(basename(list.files(plots, recursive = T)) %in% c("bss10_specs.png", "enscorr_specs.png", "forecast_ensemble_mean-2020.png", "forecast_ensemble_mean-2021.png", "forecast_most_likely_tercile-2020.png", "forecast_most_likely_tercile-2021.png", "frps_specs.png", "frps.png", "rpss_specs.png") ), TRUE ) expect_equal( -length(list.files(paste0(outdir, "/plots/"))), +length(list.files(plots, recursive = T)), 9 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 85c15c88..988172c6 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -196,4 +196,4 @@ tolerance = 0.0001 }) - +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 10cba33e..da0e789b 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -164,4 +164,4 @@ tolerance=0.0001 ) }) -unlink(recipe$Run$output_dir) +unlink(recipe$Run$output_dir, recursive = TRUE) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 6a166503..83b5ceab 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -212,10 +212,10 @@ rep(FALSE, 3) }) test_that("4. Saving", { - +outputs <- paste0(recipe$Run$output_dir, "/outputs/") expect_equal( -all(list.files(outdir) %in% -c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", +all(basename(list.files(outputs, recursive = T)) %in% +c("tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", @@ -224,26 +224,27 @@ c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", TRUE ) expect_equal( -length(list.files(outdir)), -18 +length(list.files(outputs, recursive = T)), +17 ) }) test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") expect_equal( -all(list.files(paste0(outdir, "/plots/")) %in% +all(basename(list.files(plots, recursive = T)) %in% c("crpss-november.png", "enscorr_specs-november.png", "enscorr-november.png", "forecast_ensemble_mean-20201101.png", "forecast_most_likely_tercile-20201101.png", "rpss-november.png")), TRUE ) expect_equal( -length(list.files(paste0(outdir, "/plots/"))), +length(list.files(plots, recursive = T)), 6 ) }) # Delete files -unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) +unlink(recipe$Run$output_dir, recursive = T) -- GitLab From ccd9b89b283b41a4e89d58afa5af9c65f406ad1e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 18 Apr 2023 16:23:26 +0200 Subject: [PATCH 30/52] Adapt recipe checks to new saving options, make files for saving and plotting functions --- modules/Saving/R/Utils.R | 69 ++++++++ modules/Saving/R/save_corr.R | 117 +++++++++++++ modules/Saving/R/save_forecast.R | 137 +++++++++++++++ modules/Saving/R/save_metrics.R | 119 +++++++++++++ modules/Saving/R/save_observations.R | 137 +++++++++++++++ modules/Saving/R/save_percentiles.R | 107 ++++++++++++ modules/Saving/R/save_probabilities.R | 124 ++++++++++++++ modules/Saving/Saving.R | 7 + modules/Visualization/R/plot_ensemble_mean.R | 88 ++++++++++ .../R/plot_most_likely_terciles_map.R | 87 ++++++++++ modules/Visualization/R/plot_skill_metrics.R | 161 ++++++++++++++++++ modules/Visualization/Visualization.R | 4 + tests/recipes/recipe-seasonal_monthly_1.yml | 2 +- tools/check_recipe.R | 63 ++++++- 14 files changed, 1220 insertions(+), 2 deletions(-) create mode 100644 modules/Saving/R/Utils.R create mode 100644 modules/Saving/R/save_corr.R create mode 100644 modules/Saving/R/save_forecast.R create mode 100644 modules/Saving/R/save_metrics.R create mode 100644 modules/Saving/R/save_observations.R create mode 100644 modules/Saving/R/save_percentiles.R create mode 100644 modules/Saving/R/save_probabilities.R create mode 100644 modules/Visualization/R/plot_ensemble_mean.R create mode 100644 modules/Visualization/R/plot_most_likely_terciles_map.R create mode 100644 modules/Visualization/R/plot_skill_metrics.R diff --git a/modules/Saving/R/Utils.R b/modules/Saving/R/Utils.R new file mode 100644 index 00000000..9ff695a6 --- /dev/null +++ b/modules/Saving/R/Utils.R @@ -0,0 +1,69 @@ +.get_global_attributes <- function(recipe, archive) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name + + attrs <- list(reference_period = hcst_period, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) + + return(attrs) +} + +.get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { + # Generates time dimensions and the corresponding metadata. + ## TODO: Subseasonal + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + + sdate <- 1:length(sdate) + dim(sdate) <- length(sdate) + metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), + collapse=", "), + units = paste0('Init date'))) + attr(sdate, 'variables') <- metadata + names(dim(sdate)) <- 'sdate' + + return(list(time=time)) +} + +.get_latlon <- function(latitude, longitude) { + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' + + return(list(lat=latitude, lon=longitude)) + +} diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R new file mode 100644 index 00000000..8b945318 --- /dev/null +++ b/modules/Saving/R/save_corr.R @@ -0,0 +1,117 @@ +save_corr <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the ensemble correlation in 'skill' + # and exports it to a netCDF file inside 'outdir'. + + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(global_attributes, + list(from_anomalies = "Yes")) + } else { + global_attributes <- c(global_attributes, + list(from_anomalies = "No")) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "corr") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- .get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R new file mode 100644 index 00000000..9ec8499c --- /dev/null +++ b/modules/Saving/R/save_forecast.R @@ -0,0 +1,137 @@ +save_forecast <- function(recipe, + data_cube, + type = "hcst", + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) + # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } + } else { + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + # expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R new file mode 100644 index 00000000..d48d9d8d --- /dev/null +++ b/modules/Saving/R/save_metrics.R @@ -0,0 +1,119 @@ +save_metrics <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the skill metrics in 'skill' + # and exports them to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + + + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "skill") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R new file mode 100644 index 00000000..dcaf0765 --- /dev/null +++ b/modules/Saving/R/save_observations.R @@ -0,0 +1,137 @@ +save_observations <- function(recipe, + data_cube, + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing the observations and + # exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$Reference[[global_attributes$reference]]$calendar + + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) +# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { + + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } + } + + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R new file mode 100644 index 00000000..b4aab605 --- /dev/null +++ b/modules/Saving/R/save_percentiles.R @@ -0,0 +1,107 @@ +save_percentiles <- function(recipe, + percentiles, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the percentiles + # and exports them to a netCDF file inside 'outdir'. + archive <- get_archive(recipe) + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + percentiles <- lapply(percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(percentiles[[i]], 'variables') <- metadata + names(dim(percentiles[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "percentiles") + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, percentiles), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, percentiles) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R new file mode 100644 index 00000000..20af9be7 --- /dev/null +++ b/modules/Saving/R/save_probabilities.R @@ -0,0 +1,124 @@ +save_probabilities <- function(recipe, + probs, + data_cube, + agg = "global", + type = "hcst", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports the corresponding category probabilities to a netCDF file. + # probs: array containing the probability data + # recipe: the auto-s2s recipe + # data_cube: s2dv_cube containing the data and metadata + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" + # type: 'hcst' or 'fcst' + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + # Add anomaly computation to global attributes + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) + } else { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) + } + + ## TODO: Replace for loop with something more efficient? + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } + } + + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index e872e832..3cb751a2 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -2,6 +2,13 @@ source("modules/Saving/R/get_dir.R") source("modules/Saving/R/get_filename.R") +source("modules/Saving/R/Utils.R") +source("modules/Saving/R/save_forecast.R") +source("modules/Saving/R/save_observations.R") +source("modules/Saving/R/save_metrics.R") +source("modules/Saving/R/save_corr.R") +source("modules/Saving/R/save_probabilities.R") +source("modules/Saving/R/save_percentiles.R") save_data <- function(recipe, data, skill_metrics = NULL, diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R new file mode 100644 index 00000000..c104c892 --- /dev/null +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -0,0 +1,88 @@ +plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + units <- attr(fcst$Variable, "variable")$units + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + # Compute ensemble mean + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + # Drop extra dims, add time dim if missing: + ensemble_mean <- drop(ensemble_mean) + + if (!("time" %in% names(dim(ensemble_mean)))) { + dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) + } + if (!'syear' %in% names(dim(ensemble_mean))) { + ensemble_mean <- Reorder(ensemble_mean, c("time", + "longitude", + "latitude")) + } else { + ensemble_mean <- Reorder(ensemble_mean, c("syear", + "time", + "longitude", + "latitude")) + } + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F + } else { + palette = "RdBu" + rev = T + } + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + fcst$attrs$Variable$metadata[[variable]]$long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(ensemble_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + } else { + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + } + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + options(bitmapType = "cairo") + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_ensemble_mean <- ensemble_mean + outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") + } else { + i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") + } + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_ensemble_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = cols, + brks = brks, + fileout = outfile, + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + } + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R new file mode 100644 index 00000000..fbdca980 --- /dev/null +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -0,0 +1,87 @@ +plot_most_likely_terciles <- function(recipe, archive, + fcst, + probabilities, + outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + + # Retrieve and rearrange probability bins for the forecast + if (is.null(probabilities$probs_fcst$prob_b33) || + is.null(probabilities$probs_fcst$prob_33_to_66) || + is.null(probabilities$probs_fcst$prob_a66)) { + stop("The forecast tercile probability bins are not present inside ", + "'probabilities', the most likely tercile map cannot be plotted.") + } + + probs_fcst <- abind(probabilities$probs_fcst$prob_b33, + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) + names(dim(probs_fcst)) <- c("bin", + names(dim(probabilities$probs_fcst$prob_b33))) + + ## TODO: Improve this section + # Drop extra dims, add time dim if missing: + probs_fcst <- drop(probs_fcst) + if (!("time" %in% names(dim(probs_fcst)))) { + dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) + } + if (!'syear' %in% names(dim(probs_fcst))) { + probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) + } else { + probs_fcst <- Reorder(probs_fcst, + c("syear", "time", "bin", "longitude", "latitude")) + } + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_probs_fcst <- probs_fcst + outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, + ".png") + } else { + i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") + } + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, + triangle_ends = c(F, F), width = 11, height = 8) + ) + } + + info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R new file mode 100644 index 00000000..8bc8ebc4 --- /dev/null +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -0,0 +1,161 @@ +plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, + outdir, significance = F) { + # recipe: Auto-S2S recipe + # archive: Auto-S2S archive + # data_cube: s2dv_cube object with the corresponding hindcast data + # skill_metrics: list of named skill metrics arrays + # outdir: output directory + # significance: T/F, whether to display the significance dots in the plots + + ## TODO: OPTION for CERISE: Using PuOr + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() + } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } + + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)) + month_label <- tolower(month.name[init_month]) + month_abbreviation <- month.abb[init_month] + + # Define color palette and number of breaks according to output format + ## TODO: Make separate function + if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { + diverging_palette <- "purpleorange" + sequential_palette <- "Oranges" + } else { + diverging_palette <- "bluered" + sequential_palette <- "Reds" + } + + # Group different metrics by type + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs", "rmsss") + scores <- c("rps", "frps", "crps", "frps_specs") + # Assign colorbar to each metric type + ## TODO: Triangle ends + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL + } else if (name %in% scores) { + skill <- skill_metrics[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "enssprerr") { + skill <- skill_metrics[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "mean_bias") { + skill <- skill_metrics[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + } + options(bitmapType = "cairo") + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + outfile <- paste0(outdir, name, "-", month_label, ".png") + } else { + outfile <- paste0(outdir, name, ".png") + } + toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, + "-", system_name, "-", month_abbreviation, + hcst_period) + months <- unique(lubridate::month(data_cube$attrs$Dates, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) + } + } + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 5a018b02..fa82a68d 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -3,6 +3,10 @@ ## TODO: Add param 'raw'? ## TODO: Decadal plot names +source("modules/Visualization/R/plot_ensemble_mean.R") +source("modules/Visualization/R/plot_most_likely_terciles_map.R") +source("modules/Visualization/R/plot_skill_metrics.R") + plot_data <- function(recipe, data, skill_metrics = NULL, diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 5a2f5c48..59b4845a 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -31,7 +31,7 @@ Analysis: Anomalies: compute: no cross_validation: - save: + save: 'none' Calibration: method: mse_min save: 'all' diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 64d1ef42..64bc9464 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -253,9 +253,19 @@ check_recipe <- function(recipe) { "The 'Calibration' element 'method' must be specified.") error_status <- T } + SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only") + if ((is.null(recipe$Analysis$Workflow$Calibration$save)) || + (!(recipe$Analysis$Workflow$Calibration$save %in% SAVING_OPTIONS_CALIB))) { + error(recipe$Run$logger, + paste0("Please specify which Calibration module outputs you want ", + "to save with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_CALIB, collapse = ", "), ".")) + error_status <- T + } } # Anomalies if ("Anomalies" %in% names(recipe$Analysis$Workflow)) { + # Computation and cross-validation checks if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { error(recipe$Run$logger, "Parameter 'compute' must be defined under 'Anomalies'.") @@ -273,6 +283,16 @@ check_recipe <- function(recipe) { and it must be a logical value (True/False or yes/no).")) error_status <- T } + # Saving checks + SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") + if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) || + (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) { + error(recipe$Run$logger, + paste0("Please specify which Anomalies module outputs you want ", + "to save with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) + error_status <- T + } } # Skill if (("Skill" %in% names(recipe$Analysis$Workflow)) && @@ -280,6 +300,16 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Skill'.") error_status <- T + # Saving checks + SAVING_OPTIONS_SKILL <- c("all", "none") + if ((is.null(recipe$Analysis$Workflow$Skill$save)) || + (!(recipe$Analysis$Workflow$Skill$save %in% SAVING_OPTIONS_SKILL))) { + error(recipe$Run$logger, + paste0("Please specify whether you want to save the Skill metrics ", + "with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_SKILL, collapse = ", "), ".")) + error_status <- T + } } # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { @@ -293,6 +323,37 @@ check_recipe <- function(recipe) { "See documentation in the wiki for examples.")) error_status <- T } + # Saving checks + SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") + if ((is.null(recipe$Analysis$Workflow$Probabilities$save)) || + (!(recipe$Analysis$Workflow$Probabilities$save %in% SAVING_OPTIONS_PROBS))) { + error(recipe$Run$logger, + paste0("Please specify whether you want to save the percentiles ", + "and probability bins with the 'save' parameter. The ", + "options are: ", + paste(SAVING_OPTIONS_PROBS, collapse = ", "), ".")) + error_status <- T + } + } + # Visualization + if ("Visualization" %in% names(recipe$Analysis$Workflow)) { + PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", + "most_likely_terciles") + ## Separate plots parameter and check if all elements are in PLOT_OPTIONS + if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { + error(recipe$Run$logger, + "The 'plots' element must be defined under 'Visualization'.") + error_status <- T + } else { + plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, + ", | |,")[[1]] + if (!all(plots %in% PLOT_OPTIONS)) { + error(recipe$Run$logger, + paste0("The options available for the plots are: ", + paste(PLOT_OPTIONS, collapse = ", "), ".")) + error_status <- T + } + } } # --------------------------------------------------------------------- @@ -424,7 +485,7 @@ check_recipe <- function(recipe) { if (error_status) { error(recipe$Run$logger, "RECIPE CHECK FAILED.") stop("The recipe contains some errors. Find the full list in the", - "startup.log file.") + " startup.log file.") } else { info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") # return(append(nverifications, fcst.sdate)) -- GitLab From a6870d72e87484a270021c41f93ee05effc67b27 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 19 Apr 2023 10:16:33 +0200 Subject: [PATCH 31/52] Remove 'archive' param from plotting functions, eliminate duplicated funs --- modules/Saving/Saving.R | 757 ------------------ modules/Visualization/R/plot_ensemble_mean.R | 3 +- .../R/plot_most_likely_terciles_map.R | 3 +- modules/Visualization/R/plot_skill_metrics.R | 3 +- modules/Visualization/Visualization.R | 356 +------- tests/recipes/recipe-seasonal_monthly_1.yml | 8 +- 6 files changed, 13 insertions(+), 1117 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 3cb751a2..75c7908c 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -162,760 +162,3 @@ get_latlon <- function(latitude, longitude) { return(list(lat=latitude, lon=longitude)) } - -save_forecast <- function(recipe, - data_cube, - type = "hcst", - agg = "global", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) - # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } - } else { - if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - # expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "exp") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) -} - - -save_observations <- function(recipe, - data_cube, - agg = "global", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing the observations and - # exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$Reference[[global_attributes$reference]]$calendar - - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) -# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { - - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') - } else { - fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) - } - } - - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") -} - -## TODO: Place inside a function somewhere -# if (tolower(agg) == "country") { -# load(mask.path) -# grid <- europe.countries.iso -# } else { -# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, -# lat=attr(var.obs, 'Variables')$dat1$latitude) -# } - -save_metrics <- function(recipe, - skill, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the skill metrics in 'skill' - # and exports them to a netCDF file inside 'outdir'. - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - - - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") -} - -save_corr <- function(recipe, - skill, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the ensemble correlation in 'skill' - # and exports it to a netCDF file inside 'outdir'. - - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'ensemble', 'time'))}) - } - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(global_attributes, - list(from_anomalies = "Yes")) - } else { - global_attributes <- c(global_attributes, - list(from_anomalies = "No")) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'ensemble', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) - dims <- c(lalo, 'ensemble', 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, - "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") -} - -save_percentiles <- function(recipe, - percentiles, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the percentiles - # and exports them to a netCDF file inside 'outdir'. - archive <- get_archive(recipe) - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - percentiles <- lapply(percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(percentiles[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(percentiles)) { - ## TODO: replace with proper standard names - percentile <- names(percentiles[i]) - long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = percentile, long_name = long_name)) - attr(percentiles[[i]], 'variables') <- metadata - names(dim(percentiles[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, percentiles), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, percentiles) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") -} - -save_probabilities <- function(recipe, - probs, - data_cube, - agg = "global", - type = "hcst", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports the corresponding category probabilities to a netCDF file. - # probs: array containing the probability data - # recipe: the auto-s2s recipe - # data_cube: s2dv_cube containing the data and metadata - # outdir: directory where the files should be saved - # type: 'exp' (hcst and fcst) or 'obs' - # agg: aggregation, "global" or "country" - # type: 'hcst' or 'fcst' - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - # Add anomaly computation to global attributes - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') - if (tolower(agg) == "global") { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) - } else { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) - } - - ## TODO: Replace for loop with something more efficient? - for (bin in 1:length(probs_syear)) { - prob_bin <- names(probs_syear[bin]) - long_name <- paste0(prob_bin, " probability category") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = prob_bin, long_name = long_name)) - attr(probs_syear[[bin]], 'variables') <- metadata - names(dim(probs_syear[[bin]])) <- dims # is this necessary? - } - - # Add global attributes - attr(probs_syear[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "probs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) - ArrayToNc(vars, outfile) - } - } - - info(recipe$Run$logger, - paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) -} diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index c104c892..e0fa8b84 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -1,4 +1,4 @@ -plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { +plot_ensemble_mean <- function(recipe, fcst, outdir) { ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily @@ -8,6 +8,7 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { latitude <- fcst$coords$lat longitude <- fcst$coords$lon + archive <- get_archive(recipe) system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name units <- attr(fcst$Variable, "variable")$units diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index fbdca980..9ab0199e 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -1,4 +1,4 @@ -plot_most_likely_terciles <- function(recipe, archive, +plot_most_likely_terciles <- function(recipe, fcst, probabilities, outdir) { @@ -11,6 +11,7 @@ plot_most_likely_terciles <- function(recipe, archive, latitude <- fcst$coords$lat longitude <- fcst$coords$lon + archive <- get_archive(recipe) system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name start_date <- paste0(recipe$Analysis$Time$fcst_year, diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index 8bc8ebc4..f8be19d9 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -1,4 +1,4 @@ -plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, +plot_skill_metrics <- function(recipe, data_cube, skill_metrics, outdir, significance = F) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive @@ -21,6 +21,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, latitude <- data_cube$coords$lat longitude <- data_cube$coords$lon + archive <- get_archive(recipe) system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index fa82a68d..1ae48109 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -11,11 +11,9 @@ plot_data <- function(recipe, data, skill_metrics = NULL, probabilities = NULL, - archive = NULL, significance = F) { # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() - # archive: the auto-s2s archive as read by read_yaml() # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects # calibrated_data: list containing the calibrated hcst and (optional) fcst # s2dv_cube objects @@ -34,20 +32,10 @@ plot_data <- function(recipe, stop() } - if (is.null(archive)) { - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - archive <- - read_yaml(paste0("conf/archive.yml"))[[recipe$Run$filesystem]] - } else if (tolower(recipe$Analysis$Horizon) == "decadal") { - archive <- - read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]] - } - } - # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, + plot_skill_metrics(recipe, data$hcst, skill_metrics, outdir, significance) } else { error(recipe$Run$logger, @@ -59,7 +47,7 @@ plot_data <- function(recipe, # Plot forecast ensemble mean if ("forecast_ensemble_mean" %in% plots) { if (!is.null(data$fcst)) { - plot_ensemble_mean(recipe, archive, data$fcst, outdir) + plot_ensemble_mean(recipe, data$fcst, outdir) } else { error(recipe$Run$logger, paste0("The forecast ensemble mean plot has been requested, but ", @@ -70,7 +58,7 @@ plot_data <- function(recipe, # Plot Most Likely Terciles if ("most_likely_terciles" %in% plots) { if ((!is.null(probabilities)) && (!is.null(data$fcst))) { - plot_most_likely_terciles(recipe, archive, data$fcst, + plot_most_likely_terciles(recipe, data$fcst, probabilities, outdir) } else { error(recipe$Run$logger, @@ -80,341 +68,3 @@ plot_data <- function(recipe, } } -plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, - outdir, significance = F) { - # recipe: Auto-S2S recipe - # archive: Auto-S2S archive - # data_cube: s2dv_cube object with the corresponding hindcast data - # skill_metrics: list of named skill metrics arrays - # outdir: output directory - # significance: T/F, whether to display the significance dots in the plots - - ## TODO: OPTION for CERISE: Using PuOr - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - error(recipe$Run$logger, "Visualization functions not yet implemented - for daily data.") - stop() - } - # Abort if skill_metrics is not list - if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { - stop("The element 'skill_metrics' must be a list of named arrays.") - } - - latitude <- data_cube$coords$lat - longitude <- data_cube$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", - recipe$Analysis$Time$hcst_end) - init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, - start = 1, stop = 2)) - month_label <- tolower(month.name[init_month]) - month_abbreviation <- month.abb[init_month] - - # Define color palette and number of breaks according to output format - ## TODO: Make separate function - if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { - diverging_palette <- "purpleorange" - sequential_palette <- "Oranges" - } else { - diverging_palette <- "bluered" - sequential_palette <- "Reds" - } - - # Group different metrics by type - skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", - "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", - "enscorr_specs", "rmsss") - scores <- c("rps", "frps", "crps", "frps_specs") - # Assign colorbar to each metric type - ## TODO: Triangle ends - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { - # Define plot characteristics and metric name to display in plot - if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", - "rmsss")) { - display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias Skill Score" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name %in% c("enscorr", "enscorr_specs")) { - display_name <- "Ensemble Mean Correlation" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL - } else if (name %in% scores) { - skill <- skill_metrics[[name]] - display_name <- toupper(strsplit(name, "_")[[1]][1]) - brks <- seq(0, 1, by = 0.1) - colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) - cols <- colorbar[1:(length(colorbar) - 1)] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "enssprerr") { - skill <- skill_metrics[[name]] - display_name <- "Spread-to-Error Ratio" - brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) - colorbar <- clim.colors(length(brks), diverging_palette) - cols <- colorbar[1:length(colorbar) - 1] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "mean_bias") { - skill <- skill_metrics[[name]] - display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) - brks <- max_value * seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- colorbar[length(colorbar)] - } - options(bitmapType = "cairo") - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL - } - # Define output file name and titles - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - outfile <- paste0(outdir, name, "-", month_label, ".png") - } else { - outfile <- paste0(outdir, name, ".png") - } - toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, - "-", system_name, "-", month_abbreviation, - hcst_period) - months <- unique(lubridate::month(data_cube$attrs$Dates, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = cols, - col_inf = col_inf, - col_sup = col_sup, - fileout = outfile, - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - ) - } - } - info(recipe$Run$logger, - "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") -} - -plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") - } - - latitude <- fcst$coords$lat - longitude <- fcst$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - units <- attr(fcst$Variable, "variable")$units - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') - # Drop extra dims, add time dim if missing: - ensemble_mean <- drop(ensemble_mean) - - if (!("time" %in% names(dim(ensemble_mean)))) { - dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) - } - if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", - "longitude", - "latitude")) - } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", - "time", - "longitude", - "latitude")) - } - ## TODO: Redefine column colors, possibly depending on variable - if (variable == 'prlr') { - palette = "BrBG" - rev = F - } else { - palette = "RdBu" - rev = T - } - # Define brks, centered on in the case of anomalies - ## - if (grepl("anomaly", - fcst$attrs$Variable$metadata[[variable]]$long_name)) { - variable <- paste(variable, "anomaly") - max_value <- max(abs(ensemble_mean)) - ugly_intervals <- seq(-max_value, max_value, max_value/20) - brks <- pretty(ugly_intervals, n = 12, min.n = 8) - } else { - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - } - cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - options(bitmapType = "cairo") - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_ensemble_mean <- ensemble_mean - outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") - } else { - i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] - outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") - } - toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - titles <- as.vector(months) - # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = outfile, - bar_label_digits = 4, - bar_extra_margin = rep(0.7, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - } - info(recipe$Run$logger, - "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") -} - -plot_most_likely_terciles <- function(recipe, archive, - fcst, - probabilities, - outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") - } - - latitude <- fcst$coords$lat - longitude <- fcst$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - - # Retrieve and rearrange probability bins for the forecast - if (is.null(probabilities$probs_fcst$prob_b33) || - is.null(probabilities$probs_fcst$prob_33_to_66) || - is.null(probabilities$probs_fcst$prob_a66)) { - stop("The forecast tercile probability bins are not present inside ", - "'probabilities', the most likely tercile map cannot be plotted.") - } - - probs_fcst <- abind(probabilities$probs_fcst$prob_b33, - probabilities$probs_fcst$prob_33_to_66, - probabilities$probs_fcst$prob_a66, - along = 0) - names(dim(probs_fcst)) <- c("bin", - names(dim(probabilities$probs_fcst$prob_b33))) - - ## TODO: Improve this section - # Drop extra dims, add time dim if missing: - probs_fcst <- drop(probs_fcst) - if (!("time" %in% names(dim(probs_fcst)))) { - dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) - } - if (!'syear' %in% names(dim(probs_fcst))) { - probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) - } else { - probs_fcst <- Reorder(probs_fcst, - c("syear", "time", "bin", "longitude", "latitude")) - } - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_probs_fcst <- probs_fcst - outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, - ".png") - } else { - i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] - outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") - } - toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) - - # Plots - ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. - suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits = 2, - bar_scale = rep(0.7, 4), - bar_label_scale = 1.2, - axes_label_scale = 1.3, - triangle_ends = c(F, F), width = 11, height = 8) - ) - } - - info(recipe$Run$logger, - "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") -} diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml index 59b4845a..21321fab 100644 --- a/tests/recipes/recipe-seasonal_monthly_1.yml +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -28,10 +28,10 @@ Analysis: method: bilinear type: to_system Workflow: - Anomalies: - compute: no - cross_validation: - save: 'none' + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' Calibration: method: mse_min save: 'all' -- GitLab From b687f5f564067e8ec8130cc427a016b0de959d72 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 19 Apr 2023 13:28:38 +0200 Subject: [PATCH 32/52] large-scale name was corrected for Intlr.R --- modules/Downscaling/Downscaling.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 8e919430..c6456da4 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -51,7 +51,7 @@ downscale_datasets <- function(recipe, data) { DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping", "sbc", "cal", "qm") - LR_METHODS <- c("basic", "large_scale", "4nn") + LR_METHODS <- c("basic", "large-scale", "4nn") LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") if (!(type %in% DOWNSCAL_TYPES)) { @@ -160,7 +160,7 @@ downscale_datasets <- function(recipe, data) { if (is.null(lr_method)) { stop("Please provide one linear regression method in the recipe. Accepted ", - "methods are 'basic', 'large_scale', '4nn'.") + "methods are 'basic', 'large-scale', '4nn'.") } if (is.null(target_grid)) { @@ -169,11 +169,11 @@ downscale_datasets <- function(recipe, data) { if (!(lr_method %in% LR_METHODS)) { stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", - "are 'basic', 'large_scale', '4nn'.")) + "are 'basic', 'large-scale', '4nn'.")) } # TO DO: add the possibility to have the element 'pred' in 'data' - if (lr_method == "large_scale") { + if (lr_method == "large-scale") { if (is.null(data$pred$data)) { stop("Please provide the large scale predictors in the element 'data$pred$data'.") } -- GitLab From bdfa59c1e8aa4d3ec2f670e5efac2983995d2fc7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 19 Apr 2023 15:31:07 +0200 Subject: [PATCH 33/52] Adjust anomaly savcing check --- tools/check_recipe.R | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 64bc9464..a9170707 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -275,23 +275,25 @@ check_recipe <- function(recipe) { paste("Parameter 'Anomalies:compute' must be a logical value", "(True/False or yes/no).")) error_status <- T - } else if ((recipe$Analysis$Workflow$Anomalies$compute) && - (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation))) { - error(recipe$Run$logger, - paste("If anomaly computation is requested, parameter", - "'cross_validation' must be defined under 'Anomalies', - and it must be a logical value (True/False or yes/no).")) - error_status <- T - } - # Saving checks - SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") - if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) || - (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) { - error(recipe$Run$logger, - paste0("Please specify which Anomalies module outputs you want ", - "to save with the 'save' parameter. The options are: ", - paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) - error_status <- T + } else if ((recipe$Analysis$Workflow$Anomalies$compute)) { + # Cross-validation check + if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) { + error(recipe$Run$logger, + paste("If anomaly computation is requested, parameter", + "'cross_validation' must be defined under 'Anomalies', + and it must be a logical value (True/False or yes/no).")) + error_status <- T + } + # Saving checks + SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only") + if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) || + (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) { + error(recipe$Run$logger, + paste0("Please specify which Anomalies module outputs you want ", + "to save with the 'save' parameter. The options are: ", + paste(SAVING_OPTIONS_ANOM, collapse = ", "), ".")) + error_status <- T + } } } # Skill -- GitLab From 875ccfc3e8d358c96ff8e3eee8423e3677c6cf92 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 20 Apr 2023 09:41:14 +0200 Subject: [PATCH 34/52] Update a few recipes, delete paths2save file --- modules/Saving/paths2save.R | 113 ------------------ .../atomic_recipes/recipe_system5c3s-tas.yml | 7 ++ .../atomic_recipes/recipe_system7c3s-prlr.yml | 8 +- recipes/recipe_decadal_split.yml | 8 +- recipes/recipe_splitting_example.yml | 7 ++ recipes/tests/recipe_autosubmit_marstest.yml | 6 + recipes/tests/recipe_multiregion.yml | 6 + recipes/tests/recipe_seasonal_example.yml | 6 + .../tests/recipe_seasonal_two-variables.yml | 6 + 9 files changed, 52 insertions(+), 115 deletions(-) delete mode 100644 modules/Saving/paths2save.R diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R deleted file mode 100644 index 24e1fce7..00000000 --- a/modules/Saving/paths2save.R +++ /dev/null @@ -1,113 +0,0 @@ -## TODO: Separate by time aggregation -## TODO: Build a default path that accounts for: -## variable, system, reference, start date and region name - -get_filename <- function(dir, recipe, var, date, agg, file.type) { - # This function builds the path of the output file based on directory, - # variable, forecast date, startdate, aggregation, forecast horizon and - # type of metric/forecast/probability. - - if (recipe$Analysis$Horizon == "subseasonal") { - shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") - dd <- "week" - } else { - shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") - dd <- "month" - } - - switch(tolower(agg), - "country" = {gg <- "-country"}, - "global" = {gg <- ""}) - - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) - reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) - - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - # Define output dir name accordint to Scorecards format - dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - # Get necessary names - hcst_start <- recipe$Analysis$Time$hcst_start - hcst_end <- recipe$Analysis$Time$hcst_end - - switch(file.type, - "skill" = {type_info <- "-skill_"}, - "corr" = {type_info <- "-corr_"}, - "exp" = {type_info <- paste0("_", date, "_")}, - "obs" = {type_info <- paste0("-obs_", date, "_")}, - "percentiles" = {type_info <- "-percentiles_"}, - "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}) - - # Build file name - file <- paste0("scorecards_", system, "_", reference, "_", - var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) - } else { - switch(file.type, - "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, - "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, - "exp" = {file <- paste0(var, gg, "_", date)}, - "obs" = {file <- paste0(var, gg, "-obs_", date)}, - "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, - shortdate)}, - "probs" = {file <- paste0(var, gg, "-probs_", date)}, - "bias" = {file <- paste0(var, gg, "-bias_", date)}) - } - return(paste0(dir, file, ".nc")) -} - -get_dir <- function(recipe, agg = "global") { - # This function builds the path for the output directory. The output - # directories will be subdirectories within outdir, organized by variable, - # startdate, and aggregation. - - ## TODO: Get aggregation from recipe - outdir <- recipe$Run$output_dir - ## TODO: multivar case - variable <- recipe$Analysis$Variables$name - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) - - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - # Define output dir name accordint to Scorecards format - dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - dir <- paste0(outdir, "/", system, "/", variable, "/") - } else { - # Default generic output format based on FOCUS - # Get startdate or hindcast period - if (!is.null(recipe$Analysis$Time$fcst_year)) { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') - } else { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } - } else { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - # decadal doesn't have sdate - fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$hcst_end, - sep = '_')) - } else { - fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) - } - } - - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - store.freq <- recipe$Analysis$Variables$freq - ## TODO: Change "_country" - if (!is.null(recipe$Analysis$Region$name)) { - outdir <- paste0(outdir, "/", recipe$Analysis$Region$name) - } - switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, "/", - fcst.sdate, "/")}) - } - ## TODO: Multivar case - dir.create(dir, showWarnings = FALSE, recursive = TRUE) - return(dir) -} diff --git a/recipes/atomic_recipes/recipe_system5c3s-tas.yml b/recipes/atomic_recipes/recipe_system5c3s-tas.yml index 31ae079d..c4606d59 100644 --- a/recipes/atomic_recipes/recipe_system5c3s-tas.yml +++ b/recipes/atomic_recipes/recipe_system5c3s-tas.yml @@ -31,12 +31,19 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: raw + save: fcst_only Skill: metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + save: all Probabilities: percentiles: [[1/3, 2/3]] + save: all + Visualization: + plots: skill_metrics forecast_ensemble_mean + Indicators: index: no Output_format: S2S4E diff --git a/recipes/atomic_recipes/recipe_system7c3s-prlr.yml b/recipes/atomic_recipes/recipe_system7c3s-prlr.yml index 58030bf3..fa7bee7f 100644 --- a/recipes/atomic_recipes/recipe_system7c3s-prlr.yml +++ b/recipes/atomic_recipes/recipe_system7c3s-prlr.yml @@ -31,15 +31,21 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: mse_min + save: 'all' Skill: metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'all' + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles Indicators: index: no - ncores: 1 + ncores: 12 remove_NAs: no Output_format: S2S4E Run: diff --git a/recipes/recipe_decadal_split.yml b/recipes/recipe_decadal_split.yml index 708037f7..a94ffd03 100644 --- a/recipes/recipe_decadal_split.yml +++ b/recipes/recipe_decadal_split.yml @@ -28,13 +28,19 @@ Analysis: Workflow: Anomalies: compute: no - cross_validation: + cross_validation: + save: Calibration: method: 'bias' + save: 'all' Skill: metric: EnsCorr RPSS + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' + Visualization: + plots: skill_metrics Indicators: index: FALSE ncores: 8 # Optional, int: number of cores, defaults to 1 diff --git a/recipes/recipe_splitting_example.yml b/recipes/recipe_splitting_example.yml index 94a94468..93e5994e 100644 --- a/recipes/recipe_splitting_example.yml +++ b/recipes/recipe_splitting_example.yml @@ -38,12 +38,19 @@ Analysis: method: bilinear ## TODO: allow multiple methods? type: to_system Workflow: + Anomalies: + compute: no Calibration: method: mse_min ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean Indicators: index: no # ? ncores: 7 diff --git a/recipes/tests/recipe_autosubmit_marstest.yml b/recipes/tests/recipe_autosubmit_marstest.yml index dfd2159f..5bf62fb9 100644 --- a/recipes/tests/recipe_autosubmit_marstest.yml +++ b/recipes/tests/recipe_autosubmit_marstest.yml @@ -40,12 +40,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: 'all' Calibration: method: raw ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics Indicators: index: no # ? ncores: 8 diff --git a/recipes/tests/recipe_multiregion.yml b/recipes/tests/recipe_multiregion.yml index bcb4d126..69d8621d 100644 --- a/recipes/tests/recipe_multiregion.yml +++ b/recipes/tests/recipe_multiregion.yml @@ -40,12 +40,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: none Calibration: method: raw + save: none Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS + save: all Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: all + Visualization: + plots: skill_metrics Indicators: index: no ncores: 8 diff --git a/recipes/tests/recipe_seasonal_example.yml b/recipes/tests/recipe_seasonal_example.yml index cb941f84..f4f3a8f5 100644 --- a/recipes/tests/recipe_seasonal_example.yml +++ b/recipes/tests/recipe_seasonal_example.yml @@ -40,12 +40,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: 'all' Calibration: method: raw + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + save: 'all' + Visualization: + plots: skill_metrics Indicators: index: no ncores: 8 diff --git a/recipes/tests/recipe_seasonal_two-variables.yml b/recipes/tests/recipe_seasonal_two-variables.yml index 5dbd892f..1cb5c3b2 100644 --- a/recipes/tests/recipe_seasonal_two-variables.yml +++ b/recipes/tests/recipe_seasonal_two-variables.yml @@ -38,12 +38,18 @@ Analysis: Anomalies: compute: yes cross_validation: yes + save: 'all' Calibration: method: raw ## TODO: list, split? + save: 'none' Skill: metric: RPS, RPSS, CRPS, CRPSS, FRPSS, BSS10, BSS90, mean_bias, mean_bias_SS # list, don't split + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles Indicators: index: no # ? ncores: 7 -- GitLab From 5d1bdd953d1ce4d4d400643746cb002b46d14250 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 20 Apr 2023 12:09:37 +0200 Subject: [PATCH 35/52] Fix pipeline, update decadal recipe --- recipes/atomic_recipes/recipe_decadal.yml | 6 ++++++ tests/testthat/test-decadal_monthly_1.R | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml index 986578f7..2dca96c0 100644 --- a/recipes/atomic_recipes/recipe_decadal.yml +++ b/recipes/atomic_recipes/recipe_decadal.yml @@ -32,14 +32,20 @@ Analysis: Anomalies: compute: no cross_validation: + save: Calibration: method: bias + save: 'all' Skill: metric: RPSS Corr + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] + save: 'all' Indicators: index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles ncores: # Optional, int: number of cores, defaults to 1 remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE Output_format: S2S4E diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index ee1520ad..9b46cce8 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -32,8 +32,9 @@ probs <- compute_probabilities(recipe, calibrated_data) # Plotting suppressWarnings({invisible(capture.output( -plot_data(recipe = recipe, archive = archive, data = calibrated_data, - skill_metrics = skill_metrics, probabilities = probs, significance = T) +plot_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) ))}) #====================================== -- GitLab From debf8b42cb3f003b1f593d2bfd06c5dc89d91972 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Fri, 21 Apr 2023 14:31:03 +0200 Subject: [PATCH 36/52] parallelisation is corrected --- modules/Downscaling/tmp/Analogs.R | 52 +++---- modules/Downscaling/tmp/Intbc.R | 42 +++--- modules/Downscaling/tmp/Interpolation.R | 72 ++++++---- modules/Downscaling/tmp/Intlr.R | 173 ++++++++++++------------ modules/Downscaling/tmp/LogisticReg.R | 127 +++++++++++------ modules/Downscaling/tmp/Utils.R | 14 +- 6 files changed, 278 insertions(+), 202 deletions(-) diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R index a69a66df..f0ebd610 100644 --- a/modules/Downscaling/tmp/Analogs.R +++ b/modules/Downscaling/tmp/Analogs.R @@ -66,8 +66,8 @@ #'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension #''window'. It indicates whether to apply leave-one-out cross-validation in the creation #'of the window. It is recommended to be set to TRUE. Default to TRUE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#' +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the #'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL #'(default), the output array in 'data' also contains the dimension 'analog' with the best @@ -87,7 +87,7 @@ #'@export CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", - region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = 1) { + region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = NULL) { # input exp and obs must be s2dv_cube objects if (!inherits(exp,'s2dv_cube')) { @@ -204,7 +204,8 @@ CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analo #'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension #''window'. It indicates whether to apply leave-one-out cross-validation in the creation #'of the window. It is recommended to be set to TRUE. Default to TRUE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@import multiApply #'@import CSTools #'@importFrom s2dv InsertDim CDORemap @@ -232,7 +233,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs2_lats = NULL, obs2_lons = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", region = NULL, return_indices = FALSE, - loocv_window = TRUE, ncores = 1) { + loocv_window = TRUE, ncores = NULL) { #----------------------------------- # Checkings #----------------------------------- @@ -336,7 +337,14 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, stop("Missing time dimension in 'obs2', or does not match the parameter 'time_dim'") } } - + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + # Select a function to apply to the analogs selected for a given observation if (!is.null(fun_analog)) { stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) @@ -370,22 +378,22 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = "conservative", region = region) + method_remap = "conservative", region = region, ncores = ncores) # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to # the same grid to force the matching if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", - region = region)$data + region = region, ncores = ncores)$data } else { exp_interpolated <- exp } # Create window if user does not have it in the training observations if ( !("window" %in% names(dim(obs_interpolated$data))) ) { - obs_train_interpolated <- generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, - time_dim = time_dim, loocv = loocv_window) - obs_hres <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) + obs_train_interpolated <- .generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, + time_dim = time_dim, loocv = loocv_window, ncores = ncores) + obs_hres <- .generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window, ncores = ncores) } #----------------------------------- @@ -411,7 +419,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), - output_dims = c("ind", sdate_dim))$output1 + output_dims = c("ind", sdate_dim), ncores = ncores)$output1 } # restore ensemble dimension in observations if it existed originally @@ -484,7 +492,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # Add the dimension window to an array that contains, at least, the start date and time # dimensions # object has at least dimensions sdate and time -generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { +.generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL, ncores = NULL) { rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] ntimes <- dim(obj)[names(dim(obj)) == time_dim] @@ -497,11 +505,11 @@ generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { if (loocv) { obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), - output_dims = c('window', sdate_dim))$output1 + output_dims = c('window', sdate_dim), ncores = ncores)$output1 # Generate window without cross-validation } else { obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), - fun = as.vector, output_dims = 'window')$output1 + fun = as.vector, output_dims = 'window', ncores = ncores)$output1 } } # Generate window of the size specified by the user. Only applied with CV @@ -513,28 +521,20 @@ generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { # Concatenate data from previous, target and posterior months obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), - fun = as.vector, output_dims = "time")$output1 + fun = as.vector, output_dims = "time", ncores = ncores )$output1 if (loocv) { obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), - output_dims = 'window')$output1 + output_dims = 'window', ncores = ncores)$output1 names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) } else { obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), - output_dims = c('window', time_dim))$output1 + output_dims = c('window', time_dim), ncores = ncores)$output1 } } return(obj_window) } - - - - - - - - diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 27de3735..1cb558d5 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -49,7 +49,8 @@ #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. -#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the #'downscaled latitudes, and 'lon' the downscaled longitudes. #'@examples @@ -68,7 +69,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1,...) + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = NULL, ...) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -83,7 +84,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point int_method = int_method, bc_method = bc_method, points = points, source_file = exp$attrs$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - region = region, ncores = ncores,...) + region = region, ncores = ncores, ...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data @@ -162,8 +163,8 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'to the left border, while lonmax refers to the right border. latmin indicates the lower #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#' +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@import CSTools #' #'@seealso \code{\link[CSTools]{BiasCorrection}} @@ -186,7 +187,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'@export Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = 1, ...) { + time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = NULL, ...) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -250,11 +251,18 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, "'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } - + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, - region = region) + region = region, ncores = ncores) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching @@ -263,7 +271,7 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, region = region) + method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { obs_ref <- obs @@ -289,32 +297,32 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ...) + sdate_dim = sdate_dim, ncores = ncores, ...) } else if (bc_method == 'cal' | bc_method == 'calibration') { if (dim(exp_interpolated$data)[member_dim] == 1) { stop('Calibration must not be used with only one ensemble member.') } res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ...) + sdate_dim = sdate_dim, ncores = ncores, ...) } else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, - memb_dim = member_dim, sdate_dim = sdate_dim, ...) + memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) } else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { # the temporal dimension must be only one dimension called "time" if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), - fun = as.vector, output_dims = "time")$output1 + fun = as.vector, output_dims = "time", ncores = ncores)$output1 } if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, - output_dims = "time")$output1 + output_dims = "time", ncores = ncores)$output1 } # REMEMBER to add na.rm = T in colMeans in .proxiesattractor - res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) + res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ncores = ncores, ...) } # Return a list of three elements @@ -322,7 +330,3 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, return(res) } - - - - diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R index 1599bf3b..e939e0c3 100644 --- a/modules/Downscaling/tmp/Interpolation.R +++ b/modules/Downscaling/tmp/Interpolation.R @@ -37,7 +37,8 @@ #'@param method_point_interp a character vector indicating the interpolation method to #'interpolate model gridded data into the point locations. Accepted methods are "nearest", #'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". -#' +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@seealso \code{\link[s2dverification]{CDORemap}} #' #'@return An s2dv object containing the dowscaled field. @@ -52,7 +53,7 @@ #'@export CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, - method_point_interp = NULL) + method_point_interp = NULL, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -70,7 +71,7 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$coords[[lon_dim]], source_file = exp$attrs$source_files[1], points = points, method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, - lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) + lon_dim = lon_dim, region = region, method_point_interp = method_point_interp, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data @@ -129,6 +130,8 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr #'interpolate model gridded data into the point locations. Accepted methods are "nearest", #'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling #'is to a point location. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@import multiApply #'@import plyr #'@importFrom s2dv CDORemap @@ -147,7 +150,7 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr #'@export Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, - method_point_interp = NULL) + method_point_interp = NULL, ncores = NULL) { if (!is.null(method_remap)) { if (!inherits(method_remap, 'character')) { @@ -232,7 +235,14 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") } } - + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + #---------------------------------- # Limits of the region defined by the model data #---------------------------------- @@ -256,12 +266,17 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # Map regrid with CDO #---------------------------------- if (is.null(points)) { - res <- CDORemap(data_array = exp, - lats = lats, - lons = lons, - grid = target_grid, - method = method_remap, - crop = region) + + .KnownLonNames <- s2dv:::.KnownLonNames + .KnownLatNames <- s2dv:::.KnownLatNames + .warning <- s2dv:::.warning + + res <- CDORemap(data_array = exp, + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) # Return a list res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) @@ -271,16 +286,16 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me #---------------------------------- } else { # First create interpolation weights, depending on the chosen method - weights <- create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), + weights <- .create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), lats = points[[lat_dim]], lons = points[[lon_dim]], method = method_point_interp, region = list(lat_min = region[3], lat_max = region[4], lon_min = region[1], lon_max = region[2])) # Select coarse-scale data to be interpolated - model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp) + model_data_gridpoints <- .get_model_data(weights.df = weights, mdata = exp, ncores = ncores) # Interpolate model data to point locations - res <- interpolate_data(model_data_gridpoints, weights) + res <- .interpolate_data(model_data_gridpoints, weights, ncores = ncores) # Return a list res <- list(data = res, obs = NULL, lon = points[[lon_dim]], lat = points[[lat_dim]]) @@ -293,7 +308,7 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # Compute weights for interpolation at several (lat,lon) positions # We assume that grid boxes are centered in the grid point. #====================== -create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, +.create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW")) { @@ -307,7 +322,7 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, #---------------- # Read grid description and compute (i,j) of requested locations (including decimals) #---------------- - griddes <- get_griddes(nc_cropped1) + griddes <- .get_griddes(nc_cropped1) if (is.null(griddes$yinc)) { system(paste0('rm ', nc_cropped1)) @@ -318,7 +333,7 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, # If latitudes are decreasingly ordered, revert them if (griddes$yinc < 0) { system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) - griddes <- get_griddes(nc_cropped2) + griddes <- .get_griddes(nc_cropped2) } # remove temporary files system(paste0('rm ', nc_cropped1)) @@ -328,7 +343,7 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, stop("'griddes' not found in the NetCDF source files") } - gridpoints <- latlon2ij(griddes, lats, lons) + gridpoints <- .latlon2ij(griddes, lats, lons) #---------------- # Compute the weights according to the selected method @@ -573,7 +588,7 @@ create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, # Works only for 'lonlat' and 'gaussian' grids. # Grids are supposed to cover whole globe. #====================== -latlon2ij <- function(griddes, lats, lons) { +.latlon2ij <- function(griddes, lats, lons) { #------------ # Check input params #------------ @@ -623,7 +638,7 @@ latlon2ij <- function(griddes, lats, lons) { #====================== # Use cdo griddes to obtain grid information #====================== -get_griddes <- function(ncfile) { +.get_griddes <- function(ncfile) { tmp <- system(paste0("cdo griddes ", ncfile, " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) @@ -631,7 +646,7 @@ get_griddes <- function(ncfile) { names(griddes) <- arr[,1] if(griddes$gridtype == "gaussian") { - griddes$yvals <- get_lats(ncfile) + griddes$yvals <- .get_lats(ncfile) } # Convert some fields to numeric. Ensures all fields are present. @@ -645,7 +660,7 @@ get_griddes <- function(ncfile) { #====================== # Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". #====================== -get_lats <- function(ncfile) { +.get_lats <- function(ncfile) { tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) @@ -663,7 +678,7 @@ get_lats <- function(ncfile) { # Uses StartR. All ... parameters go to Start (i.e. specify dat, var, # sdate, time, ensemble, num_procs, etc) #====================== -get_model_data <- function(weights.df, mdata) { +.get_model_data <- function(weights.df, mdata, ncores = NULL) { #----------------- # Get data for all combinations of i and j. @@ -705,7 +720,9 @@ get_model_data <- function(weights.df, mdata) { #----------------- # Retrieve with multiApply #----------------- - sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 + sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), + fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) }, + ncores = ncores)$output1 names(dim(sub_mdata))[1] <- "gridpoint" #----------------- @@ -718,7 +735,7 @@ get_model_data <- function(weights.df, mdata) { # Multiply the grid-point series by the weights, # to obtain the desired interpolations #====================== -interpolate_data <- function(model_data, weights.df) { +.interpolate_data <- function(model_data, weights.df, ncores) { #----------------- # Multiply each gridpoint matrix by its corresponding weight #----------------- @@ -732,9 +749,8 @@ interpolate_data <- function(model_data, weights.df) { #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) #names(dim(interp_data))[1] <- "location" interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { - rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"))$output1 + rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"), + ncores = ncores)$output1 return(interp_data) } - - diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index b8bbf0fd..b4b8a75e 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -68,7 +68,7 @@ #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. #'@param ncores an integer indicating the number of cores to use in parallel computation. -#' +#'The default value is NULL. #'@import multiApply #' #'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the @@ -89,16 +89,16 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", - large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { - + large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = NULL) { + if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } - + if (!inherits(obs,'s2dv_cube')) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } - + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], points = points, source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], @@ -107,18 +107,18 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, region = region, ncores = ncores) - + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data exp$dims <- dim(exp$data) exp$coords[[lon_dim]] <- res$lon exp$coords[[lat_dim]] <- res$lat - + obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - + res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) } @@ -205,7 +205,7 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in #'@param loocv a logical indicating whether to apply leave-one-out cross-validation when #'generating the linear regressions. Default to FALSE. #'@param ncores an integer indicating the number of cores to use in parallel computation. -#' +#'The default value is NULL. #'@import multiApply #' #'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the @@ -226,68 +226,68 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, - ncores = 1) { - + ncores = NULL) { + #----------------------------------- # Checkings #----------------------------------- if (!inherits(lr_method, 'character')) { stop("Parameter 'lr_method' must be of the class 'character'") } - + if (!inherits(large_scale_predictor_dimname, 'character')) { stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") } - + if (!inherits(loocv, 'logical')) { stop("Parameter 'loocv' must be set to TRUE or FALSE") } - + if (!inherits(lat_dim, 'character')) { stop("Parameter 'lat_dim' must be of the class 'character'") } - + if (!inherits(lon_dim, 'character')) { stop("Parameter 'lon_dim' must be of the class 'character'") } - + if (!inherits(sdate_dim, 'character')) { stop("Parameter 'sdate_dim' must be of the class 'character'") } - + if (!inherits(large_scale_predictor_dimname, 'character')) { stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") } - + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", "'lon_dim'") } - + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", "'lat_dim'") } - + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", "'sdate_dim'") } - + if (!is.null(points) & (is.null(source_file_exp) | is.null(source_file_obs))) { stop("No source files found. Source files for exp and obs must be provided in the parameters ", "'source_file_exp' and 'source_file_obs', respectively.") } - + if (!is.null(points) & is.null(method_point_interp)) { stop("Please provide the interpolation method to interpolate gridded data to point locations ", "through the parameter 'method_point_interp'.") } - + # sdate must be the time dimension in the input data stopifnot(sdate_dim %in% names(dim(exp))) stopifnot(sdate_dim %in% names(dim(obs))) - + # the code is not yet prepared to handle members in the observations restore_ens <- FALSE if (member_dim %in% names(dim(obs))) { @@ -299,7 +299,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t "but it should be of length = 1).") } } - + # checkings for the parametre 'predictors' if (!is.null(predictors)) { if (!is.array(predictors)) { @@ -311,29 +311,36 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) } } - + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + #----------------------------------- # Interpolation #----------------------------------- if (lr_method != '4nn') { - + if (is.null(int_method)) { stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", "Accepted methods are con, bil, bic, nn, con2") } - + if (is.null(region)) { warning("The borders of the downscaling region have not been provided. Assuming the ", "four borders of the downscaling region are defined by the first and last ", "elements of the parametres 'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } - + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, points = points, method_point_interp = method_point_interp, source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region) - + method_remap = int_method, region = region, ncores = ncores) + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, @@ -341,8 +348,8 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, points = points, method_point_interp = method_point_interp, source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region) - + method_remap = int_method, region = region, ncores = ncores) + lats <- obs_interpolated$lat lons <- obs_interpolated$lon obs_interpolated <- obs_interpolated$data @@ -352,7 +359,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t lons <- obs_lons } } - + #----------------------------------- # Linear regressions #----------------------------------- @@ -366,7 +373,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t target_dims_predictor <- sdate_dim target_dims_predictand <- sdate_dim } - + # (Multi) linear regression with large-scale predictors # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations # Predictand: model data @@ -374,27 +381,27 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t if (is.null(predictors)) { stop("The large-scale predictors must be passed through the parametre 'predictors'") } - + predictand <- obs_interpolated predictor <- predictors target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) target_dims_predictand <- sdate_dim } - + # Multi-linear regression with the four nearest neighbours # Predictors: model data # Predictand: observations else if (lr_method == '4nn') { - predictor <- find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, - lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) - + predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4, ncores = ncores) + if (is.null(points)) { if (!is.null(target_grid)) { warning("Interpolating to the 'obs' grid") } predictand <- obs - + lats <- obs_lats lons <- obs_lons } @@ -403,40 +410,40 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t else { predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region) - + source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region) - + source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) + lats <- predictor$lat lons <- predictor$lon predictor <- predictor$data predictand <- predictand$data } - + target_dims_predictor <- c(sdate_dim,'nn') target_dims_predictand <- sdate_dim } - + else { stop(paste0(lr_method, " method is not implemented yet")) } - + # Apply the linear regressions res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), fun = .intlr, loocv = loocv, ncores = ncores)$output1 - + names(dim(res))[1] <- sdate_dim - + # restore ensemble dimension in observations if it existed originally if (restore_ens) { predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) } - + # Return a list of three elements res <- list(data = res, obs = predictand, lon = lons, lat = lats) - + return(res) } @@ -444,20 +451,20 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # Atomic function to generate and apply the linear regressions #----------------------------------- .intlr <- function(x, y, loocv) { - + tmp_df <- data.frame(x = x, y = y) # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { - + n <- nrow(tmp_df) res <- rep(NA, n) - + } else { # training - lm1 <- train_lm(df = tmp_df, loocv = loocv) + lm1 <- .train_lm(df = tmp_df, loocv = loocv) # prediction - res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + res <- .pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) } return(res) @@ -467,39 +474,39 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # Function to generate the linear regressions. # Returns a list #----------------------------------- -train_lm <- function(df, loocv) { - +.train_lm <- function(df, loocv) { + # Remove predictor columns containing only NA's - df <- df[ , apply(df[,colnames(df) != 'y'], 2, function(x) !all(is.na(x)))] - + df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] + if (loocv) { - + lm1 <- lapply(1:nrow(df), function(j) { - if (all(is.na(df[-j,]$y))) { - return(NA) - } else { - return(lm(df[-j,], formula = y ~ .)) - }}) + if (all(is.na(df[-j,]$y))) { + return(NA) + } else { + return(lm(df[-j,], formula = y ~ .)) + }}) } else { - + lm1 <- ifelse(all(is.na(df$y)), NA, list(lm(data = df, formula = y ~ .))) } - + return(lm1) } #----------------------------------- # Function to apply the linear regressions. #----------------------------------- -pred_lm <- function(df, lm1, loocv) { - +.pred_lm <- function(df, lm1, loocv) { + if (loocv) { pred_vals <- sapply(1:nrow(df), function(j) { - if (all(is.na(lm1[[j]]))) { - return(NA) - } else { - return(predict(lm1[[j]], df[j,])) - }}) + if (all(is.na(lm1[[j]]))) { + return(NA) + } else { + return(predict(lm1[[j]], df[j,])) + }}) } else { if (!is.na(lm1)) { pred_vals_ls <- lapply(lm1, predict, data = df) @@ -515,21 +522,19 @@ pred_lm <- function(df, lm1, loocv) { # Function to find N nearest neighbours. # 'coar' is an array with named dimensions #----------------------------------- -find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4) { - +.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4, ncores = NULL) { + # Sort the distances from closest to furthest idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) - + names(dim(idx_lat)) <- c('nn', lat_dim) names(dim(idx_lon)) <- c('nn', lon_dim) - + # obtain the values of the nearest neighbours nearest <- Apply(list(coar, idx_lat, idx_lon), target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), - fun = function(x, y, z) x[y, z])$output1 - + fun = function(x, y, z) x[y, z], ncores = ncores)$output1 + return(nearest) } - - diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index c514d254..f569610f 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -62,8 +62,8 @@ #'takes the first and last elements of the latitudes and longitudes. #'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation #'in the fitting of the logistic regression. Default to FALSE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#' +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@import multiApply #'@import nnet #'@importFrom laply plyr @@ -91,7 +91,7 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", region = NULL, loocv = FALSE, ncores = 1) { + member_dim = "member", region = NULL, loocv = FALSE, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -198,8 +198,8 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #'takes the first and last elements of the latitudes and longitudes. #'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation #'in the fitting of the logistic regression. Default to FALSE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#' +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'The default value is NULL. #'@import multiApply #'@import nnet #'@importFrom laply plyr @@ -227,7 +227,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - source_file = NULL, region = NULL, loocv = FALSE, ncores = 1) { + source_file = NULL, region = NULL, loocv = FALSE, ncores = NULL) { #----------------------------------- # Checkings @@ -303,7 +303,14 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target "'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } - + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + # the code is not yet prepared to handle members in the observations restore_ens <- FALSE if (member_dim %in% names(dim(obs))) { @@ -319,18 +326,18 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, - region = region) + region = region, ncores = ncores) # compute ensemble mean anomalies if (log_reg_method == "ens_mean") { - predictor <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim) + predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores) target_dims_predictor <- sdate_dim } else if (log_reg_method == "ens_mean_sd") { - ens_mean_anom <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, - sdate_dim = sdate_dim) - ens_sd <- get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim) + ens_mean_anom <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, + sdate_dim = sdate_dim, ncores = ncores) + ens_sd <- .get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) #merge two arrays into one array of predictors predictor <- abind(ens_mean_anom, ens_sd, along = 1/2) @@ -338,7 +345,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target target_dims_predictor <- c(sdate_dim, "pred") } else if (log_reg_method == "sorted_members") { - predictor <- sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim) + predictor <- .sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) target_dims_predictor <- c(sdate_dim, member_dim) } else { @@ -352,7 +359,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, region = region) + method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { obs_ref <- obs @@ -362,19 +369,19 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { terc <- convert2prob(as.vector(x), prob = probs_cat) apply(terc, 1, function(r) which (r == 1))}, - output_dims = sdate_dim)$output1 + output_dims = sdate_dim, ncores = ncores)$output1 res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"))$output1 + .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 if (return_most_likely_cat) { - res <- Apply(res, target_dims = c(sdate_dim, "category"), most_likely_category, - output_dims = sdate_dim)$output1 + res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, + output_dims = sdate_dim, ncores = ncores)$output1 } # restore ensemble dimension in observations if it existed originally if (restore_ens) { - obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim, ncores = ncores) } res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) @@ -382,7 +389,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target return(res) } -most_likely_category <- function(data) { +.most_likely_category <- function(data) { # data, expected dims: start date, category (in this order) if (all(is.na(data))) { @@ -393,39 +400,39 @@ most_likely_category <- function(data) { return(mlc) } -sort_members <- function(obj_ens, member_dim) { +.sort_members <- function(obj_ens, member_dim, ncores = NULL) { - sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE)$output1 + sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 return(sorted) } -get_ens_sd <- function(obj_ens, member_dim) { +.get_ens_sd <- function(obj_ens, member_dim, ncores = NULL) { # compute ensemble spread - ens_sd <- Apply(obj_ens, target_dims = member_dim, sd, na.rm = TRUE)$output1 + ens_sd <- Apply(obj_ens, target_dims = member_dim, sd, na.rm = TRUE, ncores = ncores)$output1 return(ens_sd) } -get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { +.get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim, ncores = NULL) { require(s2dv) # compute climatology - clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean)$output1 + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores)$output1 # compute ensemble mean - ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE)$output1 + ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE, ncores = ncores)$output1 # compute ensemble mean anomalies - anom <- Ano(ens_mean, clim) + anom <- Ano(ens_mean, clim, ncores = ncores) return(anom) } # atomic functions for logistic regressions -.log_reg <- function(x, y, loocv) { +.log_reg <- function(x, y, loocv,probs_cat) { tmp_df <- data.frame(x = x, y = y) @@ -437,10 +444,10 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { } else { # training - lm1 <- train_lr(df = tmp_df, loocv = loocv) + lm1 <- .train_lr(df = tmp_df, loocv = loocv) # prediction - res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv,probs_cat=probs_cat) } return(res) } @@ -448,7 +455,7 @@ get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { #----------------------------------- # Function to train the logistic regressions. #----------------------------------- -train_lr <- function(df, loocv) { +.train_lr <- function(df, loocv) { require(nnet) @@ -457,11 +464,11 @@ train_lr <- function(df, loocv) { if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + lm1 <- lapply(1:nrow(df), function(j) ifelse(length(unique(df[-j,]$y))==1,NA,return(multinom(y ~ ., data = df[ -j, ])))) ## if all the observed categories are the same for the corresponding loocv step, assign NA to the relevant lm1 element. } else { - lm1 <- list(multinom(y ~ ., data = df)) + lm1 <- ifelse(length(unique(df$y))==1,list(NA),list(multinom(y ~ ., data = df))) } @@ -471,7 +478,7 @@ train_lr <- function(df, loocv) { #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -pred_lr <- function(df, lm1, loocv) { +pred_lr <- function(df, lm1, loocv,probs_cat) { require(plyr) @@ -479,22 +486,58 @@ pred_lr <- function(df, lm1, loocv) { # The error: "Error: Results must have the same dimensions." can # appear when the number of sdates is insufficient - pred_vals_ls <- list() - for (j in 1:nrow(df)) { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + + pred_vals_ls <-list() + for (j in 1:nrow(df)) { + if(length(unique(df[-j,]$y))==1) + { + pred_vals_ls[[j]] <-NA ## if all the observed categories are the same for the corresponding loocv step, assign NA as predicted class (we need this step for the two-class cases. predict.multinom function provides the probability for the second category for two-class examples.we can obtain the prob of the first categort by 1-prob of second category) + } else{ + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + } } - + pred_vals <- laply(pred_vals_ls, .fun = as.array) + + if( length(probs_cat)+1==2) + { + + if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. + { + ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, + pred_vals[is.na(pred_vals)]<-1) + } + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } - } else { + } else { # type = class, probs #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") #pred_vals <- unlist(pred_vals_ls) pred_vals <- predict(lm1[[1]], df, type = "probs") + + if( length(probs_cat)+1==2) + { + + if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. + { + ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, + pred_vals[is.na(pred_vals)]<-1) + } + + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) + } + } return(pred_vals) } - - diff --git a/modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R index 4c727465..d332aa3c 100644 --- a/modules/Downscaling/tmp/Utils.R +++ b/modules/Downscaling/tmp/Utils.R @@ -1,8 +1,9 @@ .check_coords <- function(lat1, lon1, lat2, lon2) { - match <- TRUE - if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { + if (all(lat1 == lat2) & all(lon1 == lon2)) { + match <- TRUE + } else { match <- FALSE - } + } return(match) } @@ -28,4 +29,11 @@ return(Reorder(data = arr_to_reorder, order = orddim)) } +#.check_coords <- function(lat1, lon1, lat2, lon2) { +# match <- TRUE +# if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { +# match <- FALSE +# } +# return(match) +#} -- GitLab From b937892c5a2c4d27971402d18b3060c568788537 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 11 May 2023 11:26:02 +0200 Subject: [PATCH 37/52] Add 'daily' option --- modules/Calibration/Calibration.R | 3 ++- modules/Loading/Loading.R | 9 ++++++--- modules/Loading/dates2load.R | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 899b1291..318af90b 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -108,7 +108,8 @@ calibrate_datasets <- function(recipe, data) { fcst_calibrated <- NULL } } - } else if (recipe$Analysis$Variables$freq == "daily_mean") { + } else if ((recipe$Analysis$Variables$freq == "daily_mean") || + (recipe$Analysis$Variables$freq == "daily")) { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { error(recipe$Run$logger, diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index a63f031c..e5e1d83e 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -120,7 +120,8 @@ load_datasets <- function(recipe) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - if (recipe$Analysis$Variables$freq == "daily_mean") { + if ((recipe$Analysis$Variables$freq == "daily_mean") || + (recipe$Analysis$Variables$freq == "daily")) { # Adjusts dims for daily case, could be removed if startR allows # multidim split names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" @@ -176,7 +177,8 @@ load_datasets <- function(recipe) { split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - if (recipe$Analysis$Variables$freq == "daily_mean") { + if ((recipe$Analysis$Variables$freq == "daily_mean") || + (recipe$Analysis$Variables$freq == "daily")) { # Adjusts dims for daily case, could be removed if startR allows # multidim split names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" @@ -242,7 +244,8 @@ load_datasets <- function(recipe) { split_multiselected_dims = TRUE, retrieve = TRUE) - } else if (store.freq == "daily_mean") { + } else if ((store.freq == "daily_mean") || + (store.freq == "daily")) { # Get year and month for file_date dates_file <- sapply(dates, format, '%Y%m') diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R index 0e3613f3..03563268 100644 --- a/modules/Loading/dates2load.R +++ b/modules/Loading/dates2load.R @@ -60,7 +60,8 @@ dates2load <- function(recipe, logger) { get_timeidx <- function(sdates, ltmin, ltmax, time_freq="monthly_mean") { - if (time_freq == "daily_mean") { + if ((time_freq == "daily_mean") || + (time_freq == "daily")) { sdates <- ymd(sdates) idx_min <- sdates + months(ltmin - 1) -- GitLab From 8acaa3386c2d674dc56de5a897fd7ad83c066207 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 11 May 2023 13:13:17 +0200 Subject: [PATCH 38/52] Add 'daily' to data_summary() --- tools/data_summary.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 5f532dcf..712bd825 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -8,7 +8,8 @@ data_summary <- function(data_cube, recipe) { object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { date_format <- "%b %Y" - } else if (recipe$Analysis$Variables$freq == "daily_mean") { + } else if ((recipe$Analysis$Variables$freq == "daily_mean") || + (recipe$Analysis$Variables$freq == "daily")) { date_format <- "%b %d %Y" } months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) -- GitLab From c3c3710ffee765610a2fd92ceffe27647bca3016 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Thu, 11 May 2023 16:00:02 +0200 Subject: [PATCH 39/52] edited to retrieve daily tasmax(min) data from the folder named daily, instead of daily_mean. For CERRA and system5c3s datasets --- conf/archive.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/archive.yml b/conf/archive.yml index 87e9cd65..1b013c5f 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -5,9 +5,9 @@ archive: name: "ECMWF SEAS5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" + daily: {"tasmin":"/", "tasmax":"/"} daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", - "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", "tdps":"_f6h/", "hurs":"_f6h/"} @@ -183,6 +183,7 @@ archive: name: "ECMWF CERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/cerra/" + daily: {"tasmax":"-r2631x1113/", "tasmin":"-r2631x1113/"} daily_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/"} monthly_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", -- GitLab From fcc1362f51dc020bc0ca281bcf6b698b982f22d7 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Fri, 12 May 2023 15:31:05 +0200 Subject: [PATCH 40/52] dev-point-obs updates are included --- modules/Downscaling/tmp/Intbc.R | 47 ++++++++++++++++--------- modules/Downscaling/tmp/Interpolation.R | 13 ++++++- modules/Downscaling/tmp/Intlr.R | 27 +++++++++----- 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 1cb558d5..a436f90e 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -82,9 +82,9 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, int_method = int_method, bc_method = bc_method, points = points, - source_file = exp$attrs$source_files[1], method_point_interp = method_point_interp, - lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - region = region, ncores = ncores, ...) + source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], + method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores, ...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data @@ -156,7 +156,9 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element #''data' in exp and obs. Default set to "member". -#'@param source_file a character vector with a path to an example file of the exp data. +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. #'Only needed if the downscaling is to a point location. #'@param region a numeric vector indicating the borders of the region defined in obs. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers @@ -187,7 +189,8 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'@export Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = NULL, ...) { + time_dim = "time", member_dim = "member", source_file_exp = NULL, source_file_obs = NULL, + region = NULL, ncores = NULL, ...) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -209,14 +212,13 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, stop("Parameter 'member_dim' must be of the class 'character'") } - # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names - if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { - stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter ", "'lon_dim'") } - if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { - stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter ", "'lat_dim'") } @@ -235,9 +237,20 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", "'sbc', 'cal', 'qm' can also be used.") } + + # When observations are pointwise + if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { + point_obs <- T + # dimension aux in obs is needed + if (is.na(match("aux", names(dim(obs))))) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") + } + } else { + point_obs <- F + } - if (!is.null(points) & is.null(source_file)) { - stop("No source file found. Source file must be provided in the parameter 'source_file'.") + if (!is.null(points) & (is.null(source_file_exp))) { + stop("No source file found. Source file must be provided in the parameter 'source_file_exp'.") } if (!is.null(points) & is.null(method_point_interp)) { @@ -260,18 +273,18 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file, + method_remap = int_method, points = points, source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, region = region, ncores = ncores) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, region = region, ncores = ncores) + method_remap = int_method, points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { obs_ref <- obs diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R index e939e0c3..4cf0c1ed 100644 --- a/modules/Downscaling/tmp/Interpolation.R +++ b/modules/Downscaling/tmp/Interpolation.R @@ -688,7 +688,18 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me #----------------- is <- weights.df$i js <- weights.df$j - + + #----------------- + # If any of the indices happens to be 0, + # change it by 1 but give a warning + #----------------- + if (any(is == 0) | any(js == 0)) { + warning("Is the point location in the border of the region? The code can run but ", + "results will be less accurate than those obtained with a larger region." ) + is[is == 0] <- 1 + js[js == 0] <- 1 + } + #----------------- # Get indices of original is and js in unique(is),unique(js) that were requested #----------------- diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index b4b8a75e..f31b7d3b 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -259,13 +259,13 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") } - if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { - stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter ", "'lon_dim'") } - if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { - stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter ", "'lat_dim'") } @@ -273,10 +273,21 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", "'sdate_dim'") } + + # When observations are pointwise + if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { + point_obs <- T + # dimension aux in obs is needed + if (is.na(match("aux", names(dim(obs))))) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") + } + } else { + point_obs <- F + } - if (!is.null(points) & (is.null(source_file_exp) | is.null(source_file_obs))) { - stop("No source files found. Source files for exp and obs must be provided in the parameters ", - "'source_file_exp' and 'source_file_obs', respectively.") + if (!is.null(points) & is.null(source_file_exp)) { + stop("No source file found. Source file for exp must be provided in the parameter ", + "'source_file_exp'.") } if (!is.null(points) & is.null(method_point_interp)) { @@ -344,7 +355,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, points = points, method_point_interp = method_point_interp, source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, -- GitLab From 407561deb7994e6a2b2a36f8909bde9f56820219 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Fri, 12 May 2023 15:31:27 +0200 Subject: [PATCH 41/52] dev-point-obs and dev-logreg updates are included --- modules/Downscaling/tmp/LogisticReg.R | 98 ++++++++++++++------------- 1 file changed, 51 insertions(+), 47 deletions(-) diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index f569610f..60681b97 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -53,8 +53,6 @@ #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element #''data' in exp and obs. Default set to "member". -#'@param source_file a character vector with a path to an example file of the exp data. -#'Only needed if the downscaling is to a point location. #'@param region a numeric vector indicating the borders of the region defined in obs. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -108,8 +106,8 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me int_method = int_method, log_reg_method = log_reg_method, points = points, method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - source_file = exp$attrs$source_files[1], region = region, loocv = loocv, - ncores = ncores) + source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], + region = region, loocv = loocv, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data @@ -189,7 +187,9 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element #''data' in exp and obs. Default set to "member". -#'@param source_file a character vector with a path to an example file of the exp data. +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. #'Only needed if the downscaling is to a point location. #'@param region a numeric vector indicating the borders of the region defined in obs. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers @@ -224,10 +224,10 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #'probs_cat = c(1/3, 2/3)) #'@export LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, - int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - source_file = NULL, region = NULL, loocv = FALSE, ncores = NULL) { + source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = FALSE, ncores = NULL) { #----------------------------------- # Checkings @@ -260,8 +260,12 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target stop("Parameter 'member_dim' must be of the class 'character'") } - if (!is.null(source_file) & !inherits(source_file, 'character')) { - stop("Parameter 'source_file' must be of the class 'character'") + if (!is.null(source_file_exp) & !inherits(source_file_exp, 'character')) { + stop("Parameter 'source_file_exp' must be of the class 'character'") + } + + if (!is.null(source_file_obs) & !inherits(source_file_obs, 'character')) { + stop("Parameter 'source_file_obs' must be of the class 'character'") } if (!inherits(loocv, 'logical')) { @@ -288,8 +292,19 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target "'member_dim'") } - if (!is.null(points) & (is.null(source_file))) { - stop("No source files found. One source file for exp must be provided in the parameter 'source_file'.") + # When observations are pointwise + if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { + point_obs <- T + # dimension aux in obs is needed + if (is.na(match("aux", names(dim(obs))))) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") + } + } else { + point_obs <- F + } + + if (!is.null(points) & (is.null(source_file_exp))) { + stop("No source file found. Source file must be provided in the parameter 'source_file_exp'.") } if (!is.null(points) & is.null(method_point_interp)) { @@ -324,13 +339,14 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target } exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file, + method_remap = int_method, points = points, source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, region = region, ncores = ncores) # compute ensemble mean anomalies if (log_reg_method == "ens_mean") { - predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores) + predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, + ncores = ncores) target_dims_predictor <- sdate_dim } @@ -355,24 +371,29 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, region = region, ncores = ncores) + method_remap = int_method, points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { obs_ref <- obs } # convert observations to categorical predictands - obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { - terc <- convert2prob(as.vector(x), prob = probs_cat) - apply(terc, 1, function(r) which (r == 1))}, - output_dims = sdate_dim, ncores = ncores)$output1 + +obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + if (!any(!is.na(x))) { + rep(NA,length(x)) + } else { + terc <- convert2prob(as.vector(x), prob = probs_cat) + apply(terc, 1, function(r) which (r == 1))}}, + output_dims = sdate_dim, ncores = ncores)$output1 + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 if (return_most_likely_cat) { res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, @@ -437,10 +458,11 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target tmp_df <- data.frame(x = x, y = y) # if the data is all NA, force return return NA - if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1) | all(is.na(tmp_df$y))) { - n <- nrow(tmp_df) - res <- matrix(NA, nrow = n, ncol = length(unique(tmp_df$y))) + n1 <- nrow(tmp_df) + n2<- length(probs_cat)+1 + res <- matrix(NA, nrow = n1, ncol = n2) } else { # training @@ -464,11 +486,11 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target if (loocv) { - lm1 <- lapply(1:nrow(df), function(j) ifelse(length(unique(df[-j,]$y))==1,NA,return(multinom(y ~ ., data = df[ -j, ])))) ## if all the observed categories are the same for the corresponding loocv step, assign NA to the relevant lm1 element. + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) } else { - lm1 <- ifelse(length(unique(df$y))==1,list(NA),list(multinom(y ~ ., data = df))) + lm1 <- list(multinom(y ~ ., data = df)) } @@ -489,31 +511,20 @@ pred_lr <- function(df, lm1, loocv,probs_cat) { pred_vals_ls <-list() for (j in 1:nrow(df)) { - if(length(unique(df[-j,]$y))==1) - { - pred_vals_ls[[j]] <-NA ## if all the observed categories are the same for the corresponding loocv step, assign NA as predicted class (we need this step for the two-class cases. predict.multinom function provides the probability for the second category for two-class examples.we can obtain the prob of the first categort by 1-prob of second category) - } else{ pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") } - } pred_vals <- laply(pred_vals_ls, .fun = as.array) if( length(probs_cat)+1==2) { - - if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. - { - ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, - pred_vals[is.na(pred_vals)]<-1) - } pred_vals_dum<-array(NA,dim=c(nrow(df),2)) pred_vals_dum[,2]<-pred_vals pred_vals_dum[,1]<-1-pred_vals pred_vals<-pred_vals_dum colnames(pred_vals)<-c(1,2) - } - + } + } else { # type = class, probs @@ -523,13 +534,6 @@ pred_lr <- function(df, lm1, loocv,probs_cat) { if( length(probs_cat)+1==2) { - - if (any(is.na(pred_vals))) ### if all the observed categories are the same for the corresponding loocv step, assign 100% probability to the observed categories for the relevant prediction. - { - ifelse(names(which.max(table(df$y)))==1,pred_vals[is.na(pred_vals)]<-0, - pred_vals[is.na(pred_vals)]<-1) - } - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) pred_vals_dum[,2]<-pred_vals pred_vals_dum[,1]<-1-pred_vals -- GitLab From 97a835540c4601155defab67a1096d608c9343c0 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Fri, 15 Sep 2023 15:25:12 +0200 Subject: [PATCH 42/52] all updates of CSDownscale are included --- modules/Downscaling/Downscaling.R | 92 +++++++++++++------------ modules/Downscaling/tmp/Analogs.R | 22 ++++-- modules/Downscaling/tmp/Intbc.R | 42 +++++------ modules/Downscaling/tmp/Interpolation.R | 10 +-- modules/Downscaling/tmp/Intlr.R | 20 ++++-- modules/Downscaling/tmp/LogisticReg.R | 23 ++++--- modules/Downscaling/tmp/Utils.R | 2 +- 7 files changed, 114 insertions(+), 97 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index c6456da4..306c6dce 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -30,11 +30,11 @@ downscale_datasets <- function(recipe, data) { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) - cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) + size <- recipe$Analysis$Workflow$Downscaling$size if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -50,7 +50,7 @@ downscale_datasets <- function(recipe, data) { } DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") - BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping", "sbc", "cal", "qm") + BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") LR_METHODS <- c("basic", "large-scale", "4nn") LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") @@ -103,8 +103,8 @@ downscale_datasets <- function(recipe, data) { if (is.null(bc_method)) { stop("Please provide one bias-correction method in the recipe. Accepted ", - "methods are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', ", - "'qm'.") + "methods are 'quantile_mapping', 'bias', 'evmos', 'mse_min', 'crps_min' ", + "'rpc-based', 'qm'. ") } if (is.null(target_grid)) { @@ -113,46 +113,23 @@ downscale_datasets <- function(recipe, data) { if (!(bc_method %in% BC_METHODS)) { stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", - "are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', 'qm'.")) + "are 'quantile_mapping', 'bias', 'evmos', 'mse_min', 'crps_min' ", + "'rpc-based', 'qm'.")) } - if (bc_method=="cal" | bc_method=="calibration") - { - if (length(cal_method)==0) { - stop("Please provide one (and only one) calibration method in the recipe.") - } - - hcst_downscal <- CST_Intbc(data$hcst, data$obs, - target_grid = target_grid, - bc_method = bc_method, - int_method = int_method, - cal.method = cal_method, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } else - { - hcst_downscal <- CST_Intbc(data$hcst, data$obs, - target_grid = target_grid, - bc_method = bc_method, - int_method = int_method, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - ncores = ncores) + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } } else if (type == "intlr") { if (length(int_method) == 0) { stop("Please provide one (and only one) interpolation method in the recipe.") @@ -206,11 +183,24 @@ downscale_datasets <- function(recipe, data) { "recipe. Setting it to 3.") nanalogs <- 3 } - + + if (!is.null(size) & recipe$Analysis$Variables$freq == "monthly_mean") { + size <- NULL + warning("Size is set to NULL. ", + "It must be NULL for the monthly input data.") + } + + if (!is.null(size)) { + dum <- data$obs$data ## keep obs data before the window process to provide it later as the output + data$obs$data <- .generate_window(data$obs$data,sdate_dim = 'syear',time_dim = 'time',loocv = TRUE,size=size) + data$obs$data <- Apply(data$obs$data,target_dims="window",fun=function (x) x[!is.na(x)])$output1 + } + hcst_downscal <- CST_Analogs(data$hcst, data$obs, - grid_exp = data$hcst$attrs$source_files[1], + grid_exp = data$hcst$attrs$source_files[ + which(!is.na(data$hcst$attrs$source_files))[1]], nanalogs = nanalogs, - fun_analog = "min", + fun_analog = "wmean", lat_dim = "latitude", lon_dim = "longitude", sdate_dim = "syear", @@ -220,6 +210,10 @@ downscale_datasets <- function(recipe, data) { return_indices = FALSE, loocv_window = loocv, ncores = ncores) + + if (!is.null(size)) { + hcst_downscal$obs$data <- Apply(dum,target_dims=c("time","smonth"),function (x) x[1:(dim(data$hcst$data)["time"]),2],ncores=ncores,output_dims="time")$output1 ## 2nd month is the target month + } DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "logreg") { @@ -268,7 +262,15 @@ downscale_datasets <- function(recipe, data) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } - } + } + + if (recipe$Analysis$Horizon == "seasonal" & (recipe$Analysis$Variables$freq == "daily_mean" | recipe$Analysis$Variables$freq == "daily")) { + hcst_downscal$exp$data <- Apply(hcst_downscal$exp$data,target_dims="time",mean)$output1 + hcst_downscal$obs$data <- Apply(hcst_downscal$obs$data,target_dims="time",mean)$output1 + hcst_downscal$exp$data <- InsertDim(hcst_downscal$exp$data,length(names(dim(hcst_downscal$exp$data))),1,name="time") + hcst_downscal$obs$data <- InsertDim(hcst_downscal$obs$data,length(names(dim(hcst_downscal$obs$data))),1,name="time") + } + print(DOWNSCAL_MSG) return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R index f0ebd610..99fc45e7 100644 --- a/modules/Downscaling/tmp/Analogs.R +++ b/modules/Downscaling/tmp/Analogs.R @@ -381,7 +381,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, method_remap = "conservative", region = region, ncores = ncores) # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to # the same grid to force the matching - if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { + if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat), lat2 = exp_lats, lon1 = as.numeric(obs_interpolated$lon), lon2 = exp_lons)) { exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", region = region, ncores = ncores)$data @@ -394,6 +394,11 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs_train_interpolated <- .generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window, ncores = ncores) obs_hres <- .generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window, ncores = ncores) + } else { + obs_train_interpolated <- obs_interpolated$data + dim(obs_train_interpolated) <- dim(obs_train_interpolated)[-which(names(dim(obs_train_interpolated))=="time")] + obs_hres <- obs + dim(obs_hres) <- dim(obs_hres)[-which(names(dim(obs_hres))=="time")] } #----------------------------------- @@ -443,9 +448,9 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # For each element in test, find the indices of the k nearest neigbhors in train .analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { - + require(FNN) - + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) # test dim: 2 dimensions lat and lon (in this order) # Number of lats/lons of the high-resolution data @@ -480,6 +485,10 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, if (fun_analog == "wmean") { weight <- 1 / dist res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else if (fun_analog == "min") { + res<-res[,,which.min(dist)] + } else if (fun_analog == "max") { + res<-res[,,which.max(dist)] } else { res <- apply(res, c(1,2), fun_analog) } @@ -489,6 +498,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, return(res) } + # Add the dimension window to an array that contains, at least, the start date and time # dimensions # object has at least dimensions sdate and time @@ -524,13 +534,13 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, fun = as.vector, output_dims = "time", ncores = ncores )$output1 if (loocv) { - obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), - fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), + obj_window <- Apply(list(obj_new, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL), + fun = function(x, s) as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), -s]), output_dims = 'window', ncores = ncores)$output1 names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) } else { obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), - fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), + fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), ])), output_dims = c('window', time_dim), ncores = ncores)$output1 } diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index a436f90e..dc5d050b 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -23,8 +23,8 @@ #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param bc_method a character vector indicating the bias adjustment method to be applied after -#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The -#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'the interpolation. Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', +#''mse_min', 'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used. #'@param int_method a character vector indicating the regridding method to be passed to CDORemap. #'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 #'or newer version is required. @@ -135,8 +135,8 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param bc_method a character vector indicating the bias adjustment method to be applied after -#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The -#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'the interpolation. Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', +#''mse_min', 'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used. #'@param int_method a character vector indicating the regridding method to be passed to CDORemap. #'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 #'or newer version is required. @@ -231,11 +231,11 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") } - if (!(bc_method %in% c('sbc', 'cal', 'qm', 'dbc', 'simple_bias', 'calibration', - 'quantile_mapping', 'dynamical_bias'))) { + if (!(bc_method %in% c('qm', 'dbc', 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', 'mse_min', + 'crps_min', 'rpc-based'))) { stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", - "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", - "'sbc', 'cal', 'qm' can also be used.") + "Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', 'mse_min', ", + "'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used.") } # When observations are pointwise @@ -304,22 +304,7 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, # which(names(dim(obs_ref)) == sdate_dim), 'sdate') #} - if (bc_method == 'sbc' | bc_method == 'simple_bias') { - if (dim(obs_ref)[sdate_dim] == 1) { - warning('Simple Bias Correction should not be used with only one observation. Returning NA.') - } - - res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores, ...) - } - else if (bc_method == 'cal' | bc_method == 'calibration') { - if (dim(exp_interpolated$data)[member_dim] == 1) { - stop('Calibration must not be used with only one ensemble member.') - } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores, ...) - } - else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + if (bc_method == 'qm' | bc_method == 'quantile_mapping') { res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) @@ -336,6 +321,15 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } # REMEMBER to add na.rm = T in colMeans in .proxiesattractor res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ncores = ncores, ...) + } else { + if (dim(exp_interpolated$data)[member_dim] == 1) { + stop('Calibration must not be used with only one ensemble member.') + } + if (dim(obs_ref)[sdate_dim] == 1) { + warning('Simple Bias Correction should not be used with only one observation. Returning NA.') + } + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) } # Return a list of three elements diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R index 4cf0c1ed..ed79f4fd 100644 --- a/modules/Downscaling/tmp/Interpolation.R +++ b/modules/Downscaling/tmp/Interpolation.R @@ -272,11 +272,11 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me .warning <- s2dv:::.warning res <- CDORemap(data_array = exp, - lats = lats, - lons = lons, - grid = target_grid, - method = method_remap, - crop = region) + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) # Return a list res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index f31b7d3b..36a7f11b 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -61,7 +61,7 @@ #'@param large_scale_predictor_dimname a character vector indicating the name of the #'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. #'@param loocv a logical indicating whether to apply leave-one-out cross-validation when -#'generating the linear regressions. Default to FALSE. +#'generating the linear regressions. Default to TRUE. #'@param region a numeric vector indicating the borders of the region defined in exp. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -89,7 +89,7 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", - large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = NULL) { + large_scale_predictor_dimname = 'vars', loocv = TRUE, region = NULL, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -203,7 +203,7 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in #'@param large_scale_predictor_dimname a character vector indicating the name of the #'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. #'@param loocv a logical indicating whether to apply leave-one-out cross-validation when -#'generating the linear regressions. Default to FALSE. +#'generating the linear regressions. Default to TRUE. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'The default value is NULL. #'@import multiApply @@ -225,7 +225,7 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = TRUE, ncores = NULL) { #----------------------------------- @@ -404,10 +404,11 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # Predictors: model data # Predictand: observations else if (lr_method == '4nn') { + predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4, ncores = ncores) - if (is.null(points)) { + if (is.null(points) | ("location" %in% names(dim(obs)))) { if (!is.null(target_grid)) { warning("Interpolating to the 'obs' grid") } @@ -440,12 +441,19 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t else { stop(paste0(lr_method, " method is not implemented yet")) } - + + print(paste0('dim predictor',dim(predictor))) + print(paste0('dim predictand',dim(predictand))) + print(dim(list(predictor[1]))) # Apply the linear regressions + + + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), fun = .intlr, loocv = loocv, ncores = ncores)$output1 names(dim(res))[1] <- sdate_dim + # names(dim(res))[which(names(dim(res)) == '')] # restore ensemble dimension in observations if it existed originally if (restore_ens) { diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index 60681b97..a85a1b3f 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -59,7 +59,7 @@ #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. #'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation -#'in the fitting of the logistic regression. Default to FALSE. +#'in the fitting of the logistic regression. Default to TRUE. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'The default value is NULL. #'@import multiApply @@ -89,7 +89,7 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", region = NULL, loocv = FALSE, ncores = NULL) { + member_dim = "member", region = NULL, loocv = TRUE, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -197,7 +197,7 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #'border, whereas latmax indicates the upper border. If set to NULL (default), the function #'takes the first and last elements of the latitudes and longitudes. #'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation -#'in the fitting of the logistic regression. Default to FALSE. +#'in the fitting of the logistic regression. Default to TRUE. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'The default value is NULL. #'@import multiApply @@ -227,7 +227,7 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = FALSE, ncores = NULL) { + source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = TRUE, ncores = NULL) { #----------------------------------- # Checkings @@ -272,13 +272,13 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target stop("Parameter 'loocv' must be set to TRUE or FALSE") } - if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { - stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter ", "'lon_dim'") } - if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { - stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter ", "'lat_dim'") } @@ -351,6 +351,9 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target target_dims_predictor <- sdate_dim } else if (log_reg_method == "ens_mean_sd") { + + require(abind) + ens_mean_anom <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores) ens_sd <- .get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) @@ -370,8 +373,8 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { + if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, + lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, diff --git a/modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R index d332aa3c..3cd65852 100644 --- a/modules/Downscaling/tmp/Utils.R +++ b/modules/Downscaling/tmp/Utils.R @@ -1,5 +1,5 @@ .check_coords <- function(lat1, lon1, lat2, lon2) { - if (all(lat1 == lat2) & all(lon1 == lon2)) { + if (all(as.numeric(lat1) == as.numeric(lat2)) & all(as.numeric(lon1) == as.numeric(lon2))) { match <- TRUE } else { match <- FALSE -- GitLab From 56efa48bc6d2a30f72fdd695cc331df3c96e7964 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 27 Sep 2023 11:28:15 +0200 Subject: [PATCH 43/52] Add saving to Downscaling module --- modules/Downscaling/Downscaling.R | 43 +++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 306c6dce..57887f1d 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -6,8 +6,7 @@ source('modules/Downscaling/tmp/Analogs.R') source('modules/Downscaling/tmp/LogisticReg.R') source('modules/Downscaling/tmp/Utils.R') -## Entry params data and recipe? -downscale_datasets <- function(recipe, data) { +Downscaling <- function(recipe, data) { # Function that downscale the hindcast using the method stated in the # recipe. For the moment, forecast must be null. # @@ -15,18 +14,18 @@ downscale_datasets <- function(recipe, data) { # recipe: object obtained when passing the .yml recipe file to read_yaml() type <- tolower(recipe$Analysis$Workflow$Downscaling$type) - - if (!is.null(data$fcst)) { - warning("The downscaling will be only performed to the hindcast data") - data$fcst <- NULL - } - + if (type == "none") { - hcst_downscal <- data$hcst DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" } else { + + if (!is.null(data$fcst)) { + warn(recipe$Run$logger, + "The downscaling will be only performed to the hindcast data") + data$fcst <- NULL + } # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) @@ -264,7 +263,8 @@ downscale_datasets <- function(recipe, data) { } } - if (recipe$Analysis$Horizon == "seasonal" & (recipe$Analysis$Variables$freq == "daily_mean" | recipe$Analysis$Variables$freq == "daily")) { + if (recipe$Analysis$Horizon == "seasonal" & + (recipe$Analysis$Variables$freq %in% c("daily_mean", "daily"))) { hcst_downscal$exp$data <- Apply(hcst_downscal$exp$data,target_dims="time",mean)$output1 hcst_downscal$obs$data <- Apply(hcst_downscal$obs$data,target_dims="time",mean)$output1 hcst_downscal$exp$data <- InsertDim(hcst_downscal$exp$data,length(names(dim(hcst_downscal$exp$data))),1,name="time") @@ -272,7 +272,24 @@ downscale_datasets <- function(recipe, data) { } print(DOWNSCAL_MSG) - return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) -} - + # Saving + if (recipe$Analysis$Workflow$Downscaling$save != 'none') { + info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") + } + ## TODO: What do we do with the full values? + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Downscaling/") + # if ((recipe$Analysis$Workflow$Downscaling$save %in% + # c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + # save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + # } + if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') + } + if (recipe$Analysis$Workflow$Downscaling$save == 'all') { + save_observations(recipe = recipe, data_cube = hcst_downscal$obs) + } + + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) +} -- GitLab From d5945e08db6ef8b6280cfcd70f46782f125faade Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 27 Sep 2023 12:03:18 +0200 Subject: [PATCH 44/52] Remove computation of mean; formatting changes --- modules/Downscaling/Downscaling.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 57887f1d..15f5ea9b 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -191,8 +191,14 @@ Downscaling <- function(recipe, data) { if (!is.null(size)) { dum <- data$obs$data ## keep obs data before the window process to provide it later as the output - data$obs$data <- .generate_window(data$obs$data,sdate_dim = 'syear',time_dim = 'time',loocv = TRUE,size=size) - data$obs$data <- Apply(data$obs$data,target_dims="window",fun=function (x) x[!is.na(x)])$output1 + data$obs$data <- .generate_window(data$obs$data, + sdate_dim = 'syear', + time_dim = 'time', + loocv = TRUE, + size = size) + data$obs$data <- Apply(data$obs$data, + target_dims="window", + fun=function (x) x[!is.na(x)])$output1 } hcst_downscal <- CST_Analogs(data$hcst, data$obs, @@ -211,7 +217,10 @@ Downscaling <- function(recipe, data) { ncores = ncores) if (!is.null(size)) { - hcst_downscal$obs$data <- Apply(dum,target_dims=c("time","smonth"),function (x) x[1:(dim(data$hcst$data)["time"]),2],ncores=ncores,output_dims="time")$output1 ## 2nd month is the target month + hcst_downscal$obs$data <- Apply(dum, target_dims=c("time", "smonth"), + function (x) {x[1:(dim(data$hcst$data)["time"]), 2]}, + ncores = ncores, + output_dims = "time")$output1 ## 2nd month is the target month } DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" @@ -262,15 +271,6 @@ Downscaling <- function(recipe, data) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } } - - if (recipe$Analysis$Horizon == "seasonal" & - (recipe$Analysis$Variables$freq %in% c("daily_mean", "daily"))) { - hcst_downscal$exp$data <- Apply(hcst_downscal$exp$data,target_dims="time",mean)$output1 - hcst_downscal$obs$data <- Apply(hcst_downscal$obs$data,target_dims="time",mean)$output1 - hcst_downscal$exp$data <- InsertDim(hcst_downscal$exp$data,length(names(dim(hcst_downscal$exp$data))),1,name="time") - hcst_downscal$obs$data <- InsertDim(hcst_downscal$obs$data,length(names(dim(hcst_downscal$obs$data))),1,name="time") - } - print(DOWNSCAL_MSG) # Saving -- GitLab From 200c9294af4c7137d0a7bc02be0012a06df844f2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Sep 2023 11:17:28 +0200 Subject: [PATCH 45/52] Update Downscaling checks --- tools/check_recipe.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 5b81420b..9baa0e52 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -302,8 +302,8 @@ check_recipe <- function(recipe) { downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) # Define accepted entries DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") - BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") - LR_METHODS <- c("basic", "large_scale", "4nn") + BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") + LR_METHODS <- c("basic", "large-scale", "4nn") LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") # Check downscaling type if ("type" %in% names(downscal_params)) { -- GitLab From d4780aed769e2680cb2334f9fed36baa235ef1f1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Sep 2023 11:17:47 +0200 Subject: [PATCH 46/52] Fix typo --- modules/Downscaling/Downscaling.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 15f5ea9b..59233dc2 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -275,7 +275,7 @@ Downscaling <- function(recipe, data) { # Saving if (recipe$Analysis$Workflow$Downscaling$save != 'none') { - info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") + info(recipe$Run$logger, "##### START SAVING DOWNSCALED DATA #####") } ## TODO: What do we do with the full values? recipe$Run$output_dir <- paste0(recipe$Run$output_dir, -- GitLab From 283ec851edb8f38d4535602d5becc408ae21b46e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Sep 2023 11:18:16 +0200 Subject: [PATCH 47/52] Add example downscaling script and recipe --- example_scripts/example_downscaling.R | 29 ++++++++ .../recipe_system5c3s-tas_downscaling.yml | 62 ----------------- .../recipe_seasonal_downscaling.yml | 68 +++++++++++++++++++ 3 files changed, 97 insertions(+), 62 deletions(-) create mode 100644 example_scripts/example_downscaling.R delete mode 100644 modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml create mode 100644 recipes/atomic_recipes/recipe_seasonal_downscaling.yml diff --git a/example_scripts/example_downscaling.R b/example_scripts/example_downscaling.R new file mode 100644 index 00000000..4e7e6294 --- /dev/null +++ b/example_scripts/example_downscaling.R @@ -0,0 +1,29 @@ +############################################################################### +## Author: V. Agudetse +## Description: Example script for seasonal downscaling. +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Downscaling/Downscaling.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Read recipe +recipe_file <- "recipes/atomic_recipes/recipe_seasonal_downscaling.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +# Change units +data <- Units(recipe, data) +# Downscale datasets +data <- Downscaling(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +# Plot data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml deleted file mode 100644 index 99e02c7d..00000000 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ /dev/null @@ -1,62 +0,0 @@ -# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ -#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ -#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ -#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ -#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ -#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ -# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ -#░ ░ ░ -Description: - Author: J. Ramon -Info: downscaling of seasonal predictions from coarse to fine grids - -Analysis: - Horizon: Seasonal - Variables: - name: tas - freq: monthly_mean - Datasets: - System: - name: ECMWF-SEAS5 - Multimodel: no - Reference: - name: ERA5 - Time: - sdate: '0501' - hcst_start: '2000' - hcst_end: '2005' - ftime_min: 1 - ftime_max: 1 -#sometimes we want the region for the obs to be bigger than for hcst - Region: - latmin: 34.1 - latmax: 45.1 - lonmin: -12.5 - lonmax: 6.35 - Regrid: - method: bilinear - type: none - Workflow: - Calibration: - method: raw - Skill: - metric: BSS10 BSS90 Mean_Bias_SS RPSS ROCSS - Downscaling: - # Assumption 1: leave-one-out cross-validation is always applied - # Assumption 2: for analogs, we select the best analog (minimum distance) - # TO DO: add downscaling to point locations - type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: conservative # optional, regridding method accepted by CDO - bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: # optional, 'basic', 'large_scale', '4nn' - log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' - target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO - nanalogs: # optional, number of analogs to be searched - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml new file mode 100644 index 00000000..509bd467 --- /dev/null +++ b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml @@ -0,0 +1,68 @@ +Description: + Author: V. Agudetse + Description: Analysis of MF System 7 with temperature +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '0501' + fcst_year: + hcst_start: '2000' + hcst_end: '2005' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: 34.1 + latmax: 45.1 + lonmin: -12.5 + lonmax: 6.35 + Regrid: + method: + type: none + Workflow: + Anomalies: + compute: no # yes/no, default yes + cross_validation: no # yes/no, default yes + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Calibration: + method: mse_min + save: 'none' # 'all'/'none'/'exp_only'/'fcst_only' + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: conservative # optional, regridding method accepted by CDO + bc_method: bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' + lr_method: # optional, 'basic', 'large_scale', '4nn' + log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO + nanalogs: # optional, number of analogs to be searched + save: 'all' + Skill: + metric: BSS10 BSS90 + save: 'all' # 'all'/'none' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only' + Visualization: + plots: skill_metrics + multi_panel: no + projection: cylindrical_equidistant + Indicators: + index: no + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From a1576cf84db9660a246611e75673d8a908a4580e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Sep 2023 11:42:58 +0200 Subject: [PATCH 48/52] Update recipe parameter description --- .../recipe_seasonal_downscaling.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml index 509bd467..219c5d61 100644 --- a/recipes/atomic_recipes/recipe_seasonal_downscaling.yml +++ b/recipes/atomic_recipes/recipe_seasonal_downscaling.yml @@ -1,6 +1,6 @@ Description: Author: V. Agudetse - Description: Analysis of MF System 7 with temperature + Description: ECMWF-SEAS5 Downscaling Analysis: Horizon: Seasonal Variables: @@ -38,14 +38,14 @@ Analysis: Downscaling: # Assumption 1: leave-one-out cross-validation is always applied # Assumption 2: for analogs, we select the best analog (minimum distance) - type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: conservative # optional, regridding method accepted by CDO - bc_method: bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: # optional, 'basic', 'large_scale', '4nn' - log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' - target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO - nanalogs: # optional, number of analogs to be searched - save: 'all' + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. + bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # 'all'/'none'/'exp_only' Skill: metric: BSS10 BSS90 save: 'all' # 'all'/'none' -- GitLab From df2f33c3f3a5c935d18cab494f0c518af9e3e025 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Sep 2023 12:56:04 +0200 Subject: [PATCH 49/52] Fix bug in analogs check --- tools/check_recipe.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 9baa0e52..74d16acf 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -372,10 +372,11 @@ check_recipe <- function(recipe) { error_status <- T } } else if (downscal_params$type == "analogs") { - if (length(nanalogs) == 0) { + + if (is.null(recipe$Analysis$Workflow$Downscaling$nanalogs)) { warn(recipe$Run$logger, paste("Downscaling type is 'analogs, but the number of analogs", - "has not been provided in the recipe.")) + "has not been provided in the recipe. The default is 3.")) } } else if (downscal_params$type == "logreg") { if (length(downscal_params$int_method) == 0) { -- GitLab From a0c612ae222876fe42648b302338c14f3db34e60 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Sep 2023 14:27:44 +0200 Subject: [PATCH 50/52] Fix downscaling recipe checks --- tools/check_recipe.R | 182 +++++++++++++++++++++---------------------- 1 file changed, 90 insertions(+), 92 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 74d16acf..3de1f63b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -296,107 +296,105 @@ check_recipe <- function(recipe) { } } } + # Downscaling - ## TODO: Simplify checks (reduce number of lines) - ## TODO: Add saving checks - downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) - # Define accepted entries - DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") - BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") - LR_METHODS <- c("basic", "large-scale", "4nn") - LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") - # Check downscaling type - if ("type" %in% names(downscal_params)) { - if (length(downscal_params$type) == 0) { - downscal_params$type <- "none" - warn(recipe$Run$logger, - paste("Downscaling 'type' is empty in the recipe, setting it to", - "'none'.")) - } - if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, - paste0("The type of Downscaling request in the recipe is not ", - "available. It must be one of the following: ", - paste(DOWNSCAL_TYPES, collapse = ", "), ".")) - error_status <- T - } - if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && - (length(downscal_params$target_grid) == 0)) { - error(recipe$Run$logger, - paste("A target grid is required for the downscaling method", - "requested in the recipe.")) - error_status <- T - } - if (downscal_params$type == "int") { - if (length(downscal_params$int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) - error_status <- T - } - } else if (downscal_params$type == "intbc") { - if (length(downscal_params$int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested in the recipe, but no", - "interpolation method is provided.")) - error_status <- T - } - if (length(downscal_params$bc_method)== 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) - error_status <- T - } else if (!(downscal_params$bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted Bias Correction methods for the downscaling", - " module are: ", paste(BC_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "intlr") { - if (length(downscal_params$int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no interpolation method was provided.")) - error_status <- T - } - if (length(downscal_params$lr_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method was provided.")) - error_status <- T - } else if (!(downscal_params$lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted linear regression methods for the", - " downscaling module are: ", - paste(LR_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "analogs") { - - if (is.null(recipe$Analysis$Workflow$Downscaling$nanalogs)) { + if ("Downscaling" %in% names(recipe$Analysis$Workflow)) { + downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) + # Define accepted entries + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", + "rpc-based", "qm") + LR_METHODS <- c("basic", "large-scale", "4nn") + LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + # Check downscaling type + if ("type" %in% names(downscal_params)) { + if (length(downscal_params$type) == 0) { + downscal_params$type <- "none" warn(recipe$Run$logger, - paste("Downscaling type is 'analogs, but the number of analogs", - "has not been provided in the recipe. The default is 3.")) + paste("Downscaling 'type' is empty in the recipe, setting it to", + "'none'.")) } - } else if (downscal_params$type == "logreg") { - if (length(downscal_params$int_method) == 0) { + if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method was provided.")) + paste0("The type of Downscaling request in the recipe is not ", + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) error_status <- T } - if (length(downscal_params$log_reg_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe,", - "but no logistic regression method is provided.")) - error_status <- T - } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && + (is.null(downscal_params$target_grid))) { error(recipe$Run$logger, - paste0("The accepted logistic regression methods for the ", - "downscaling module are: ", - paste(LOGREG_METHODS, collapse = ", "), ".")) + paste("A target grid is required for the downscaling method", + "requested in the recipe.")) error_status <- T } + if (downscal_params$type == "int") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) + error_status <- T + } + } else if (downscal_params$type %in% + c("int", "intbc", "intlr", "logreg")) { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type", downscal_params$type, + "was requested in the recipe, but no", + "interpolation method is provided.")) + error_status <- T + } + } else if (downscal_parans$type == "intbc") { + if (is.null(downscal_params$bc_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + error_status <- T + } else if (!(downscal_params$bc_method %in% BC_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted Bias Correction methods for the downscaling", + " module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "intlr") { + if (length(downscal_params$lr_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method was provided.")) + error_status <- T + } else if (!(downscal_params$lr_method %in% LR_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted linear regression methods for the", + " downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "analogs") { + if (is.null(downscal_params$nanalogs)) { + warn(recipe$Run$logger, + paste("Downscaling type is 'analogs, but the number of analogs", + "has not been provided in the recipe. The default is 3.")) + } + } else if (downscal_params$type == "logreg") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (is.null(downscal_params$log_reg_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe,", + "but no logistic regression method is provided.")) + error_status <- T + } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted logistic regression methods for the ", + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T + } + } } } -- GitLab From 184a10f198c7fe98c3ada970482ddd13848b5ea3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Sep 2023 16:47:26 +0200 Subject: [PATCH 51/52] Fix bug caused by typo --- tools/check_recipe.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 3de1f63b..9f52ced3 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -344,7 +344,7 @@ check_recipe <- function(recipe) { "interpolation method is provided.")) error_status <- T } - } else if (downscal_parans$type == "intbc") { + } else if (downscal_params$type == "intbc") { if (is.null(downscal_params$bc_method)) { error(recipe$Run$logger, paste("Downscaling type 'intbc' was requested in the recipe, but", -- GitLab From b340282dc4a709f6ff09a19bb11337350fcf4635 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Tue, 3 Oct 2023 10:16:47 +0200 Subject: [PATCH 52/52] unit test for the Downscaling module --- tests/recipes/recipe-seasonal_downscaling.yml | 53 +++++ tests/testthat/test-seasonal_downscaling.R | 195 ++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 tests/recipes/recipe-seasonal_downscaling.yml create mode 100644 tests/testthat/test-seasonal_downscaling.R diff --git a/tests/recipes/recipe-seasonal_downscaling.yml b/tests/recipes/recipe-seasonal_downscaling.yml new file mode 100644 index 00000000..1a2a3110 --- /dev/null +++ b/tests/recipes/recipe-seasonal_downscaling.yml @@ -0,0 +1,53 @@ +Description: + Author: E. Duzenli + +Analysis: + Horizon: Seasonal + Variables: + - {name: tas, freq: daily_mean} + Datasets: + System: + - name: ECMWF-SEAS5 + Multimodel: False + Reference: + - name: ERA5 + Time: + sdate: '1201' + fcst_year: + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 1 + Region: + - {latmin: 17, latmax: 20, lonmin: 12, lonmax: 15} + Regrid: + method: 'none' + type: 'none' + Workflow: + Anomalies: + compute: no + cross_validation: + save: 'none' + Calibration: + method: 'none' + save: 'none' + Skill: + metric: BSS10 CRPSS RPSS mean_bias + save: 'none' + Indicators: + index: no + Downscaling: + type: analogs + int_method: + bc_method: + lr_method: + log_reg_method: + nanalogs: 3 + target_grid: /esarchive/recon/ecmwf/era5/daily_mean/tas_f1h/tas_199301.nc + save: 'none' + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R new file mode 100644 index 00000000..0353a04e --- /dev/null +++ b/tests/testthat/test-seasonal_downscaling.R @@ -0,0 +1,195 @@ +library(testthat) +context("Seasonal daily data") + +source("modules/Loading/Loading.R") +source("modules/Skill/Skill.R") +source("modules/Downscaling/Downscaling.R") + + +recipe_file <- "tests/recipes/recipe-seasonal_downscaling.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Downscale the data +suppressWarnings({invisible(capture.output( +downscaled_data <- Downscaling(recipe, data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- Skill(recipe, downscaled_data) +))}) + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +data$fcst, +NULL +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +dim(data$obs$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 13, longitude = 13, ensemble = 1) +) +expect_equal( +dim(data$obs$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 31) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(295.5691, 291.7752, 294.0874, 290.1173), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +288.3723, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(280.1490, 298.2324), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-12-02 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') +) + +}) + +#====================================== +test_that("2. Downscaling", { + +expect_equal( +is.list(downscaled_data), +TRUE +) +expect_equal( +names(downscaled_data), +c("hcst", "obs", "fcst") +) +expect_equal( +class(downscaled_data$hcst), +"s2dv_cube" +) +expect_equal( +class(downscaled_data$obs), +"s2dv_cube" +) +expect_equal( +downscaled_data$fcst, +NULL +) +expect_equal( +names(downscaled_data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(downscaled_data$hcst), +names(downscaled_data$obs) +) +expect_equal( +dim(downscaled_data$hcst$data), +c(latitude = 13, longitude = 13, syear = 4, dat = 1, var = 1, sday = 1, sweek = 1, time = 31, ensemble = 25) +) +expect_equal( +dim(downscaled_data$obs$data), +c(ensemble = 1, dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 13, longitude = 13) +) +expect_equal( +as.vector(drop(downscaled_data$hcst$data)[1:2,1:2,1,2,3]), +c(293.8468, 293.5830, 294.0713, 293.7104), +tolerance = 0.0001 +) +expect_equal( +mean(downscaled_data$hcst$data), +288.5359, +tolerance = 0.0001 +) +expect_equal( +range(downscaled_data$hcst$data), +c(284.0950, 297.4405), +tolerance = 0.0001 +) + +}) + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(latitude = 13, longitude = 13, var = 1, time = 31) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +skill_metrics$rpss[1], +-0.05325714, +tolerance = 0.0001 +) +expect_equal( +skill_metrics$rpss_significance[1], +FALSE +) + +}) + +unlink(recipe$Run$output_dir, recursive = TRUE) -- GitLab