diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R new file mode 100644 index 0000000000000000000000000000000000000000..1f814b7466a9e66d5efa981075cb94522e75cd03 --- /dev/null +++ b/modules/Downscaling/Downscaling.R @@ -0,0 +1,262 @@ +# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ +#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ +#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ +#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ +#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ +#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ +# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ +#░ ░ ░ +## TODO: Remove once CSDownscale is on CRAN +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +#source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") + + +## Entry params data and recipe? +downscale_datasets <- function(recipe, data) { + # Function that downscale the hindcast using the method stated in the + # recipe. For the moment, forecast must be null. + # + # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # recipe: object obtained when passing the .yml recipe file to read_yaml() + + type <- tolower(recipe$Analysis$Workflow$Downscaling$type) + + if (!is.null(data$fcst)) { + warning("The downscaling will be only performed to the hindcast data") + data$fcst <- NULL + } + + if (type == "none") { + + hcst_downscal <- data$hcst + DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" + + } else { + # Downscaling function params + int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) + bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) + log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) + + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + + #TO DO: add the parametre loocv where it corresponds + if (is.null(recipe$Analysis$loocv)) { + loocv <- TRUE + } else { + loocv <- recipe$Analysis$loocv + } + + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") + LR_METHODS <- c("basic", "large_scale", "4nn") + LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + + if (!(type %in% DOWNSCAL_TYPES)) { + stop("Downscaling type in the recipe is not available. Accepted types ", + "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") + } + + if (type == "int") { + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + # Ensure that observations are in the same grid as experiments + # Only needed for this method because the others already return the + # observations + latmin <- data$hcst$lat[1] + lonmin <- data$hcst$lon[1] + latmax <- data$hcst$lat[length(data$hcst$lat)] + lonmax <- data$hcst$lon[length(data$hcst$lon)] + hcst_downscal <- CST_Interpolation(data$hcst, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + obs_downscal <- CST_Interpolation(data$obs, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + hcst_downscal$obs <- obs_downscal$exp + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intbc") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(bc_method)) { + stop("Please provide one bias-correction method in the recipe. Accepted ", + "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(bc_method %in% BC_METHODS)) { + stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", + "are 'simple_bias', 'calibration', 'quantile_mapping'.")) + } + + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intlr") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(lr_method)) { + stop("Please provide one linear regression method in the recipe. Accepted ", + "methods are 'basic', 'large_scale', '4nn'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(lr_method %in% LR_METHODS)) { + stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", + "are 'basic', 'large_scale', '4nn'.")) + } + + # TO DO: add the possibility to have the element 'pred' in 'data' + if (lr_method == "large_scale") { + if (is.null(data$pred$data)) { + stop("Please provide the large scale predictors in the element 'data$pred$data'.") + } + } else { + data$pred$data <- NULL + } + + hcst_downscal <- CST_Intlr(data$hcst, data$obs, + lr_method = lr_method, + target_grid = target_grid, + points = NULL, + int_method = int_method, + method_point_interp = NULL, + predictors = data$pred$data, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + large_scale_predictor_dimname = 'vars', + loocv = loocv, + region = NULL, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "analogs") { + + if (is.null(nanalogs)) { + warning("The number of analogs for searching has not been provided in the ", + "recipe. Setting it to 3.") + nanalogs <- 3 + } + + hcst_downscal <- CST_Analogs(data$hcst, data$obs, + grid_exp = data$hcst$source_files[1], + nanalogs = nanalogs, + fun_analog = "min", + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + region = NULL, + return_indices = FALSE, + loocv_window = loocv, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "logreg") { + + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(log_reg_method)) { + stop("Please provide one logistic regression method in the recipe. Accepted ", + "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + # Since we are forcing to create three categories, and applying cross-validation, + # we need at least six years of data for the logistic regression function to not + # crash + if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { + stop("The number of start dates is insufficient for the logisitic regression method. ", + "Please provide six or more.") + } + + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) + } + + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, + target_grid = target_grid, + int_method = int_method, + log_reg_method = log_reg_method, + probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + loocv = loocv, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + } + print(DOWNSCAL_MSG) + return(list(hcst = hcst_downscal, fcst = NULL)) +} + diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 31ae079d2dfe76c600ecef3083db5943e5f2ae20..fbb1ffe4fadc47744097674884b88e6a80fe3289 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -43,5 +43,6 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml new file mode 100644 index 0000000000000000000000000000000000000000..0cad69ec0fd5e8d4b87cf22e3d5abd9417ad1622 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -0,0 +1,62 @@ +# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ +#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ +#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ +#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ +#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ +#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ +# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ +#░ ░ ░ +Description: + Author: J. Ramon +Info: downscaling of seasonal predictions from coarse to fine grids + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: '0501' + hcst_start: '2000' + hcst_end: '2005' + ftime_min: 1 + ftime_max: 1 +#sometimes we want the region for the obs to be bigger than for hcst + Region: + latmin: 34.1 + latmax: 45.1 + lonmin: -12.5 + lonmax: 6.35 + Regrid: + method: bilinear + type: none + Workflow: + Calibration: + method: raw + Skill: + metric: BSS10 BSS90 Mean_Bias_SS RPSS ROCSS + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + # TO DO: add downscaling to point locations + type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: # optional, regridding method accepted by CDO + bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' + lr_method: # optional, 'basic', 'large_scale', '4nn' + log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO + nanalogs: 4 # optional, number of analogs to be searched + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/ diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a8664569047bb60185733e27a1250ec985cd481d..e7236da334b492c7ef2511ac316bb8d85016e535 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -22,7 +22,7 @@ plot_data <- function(recipe, # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - outdir <- paste0(get_dir(recipe), "/plots/") + outdir <- paste0(get_dir(recipe), "plots/") dir.create(outdir, showWarnings = FALSE, recursive = TRUE) if ((is.null(skill_metrics)) && (is.null(data$fcst))) { @@ -47,6 +47,11 @@ plot_data <- function(recipe, plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, significance) } + + # Plot downscaled data + if (!is.null(downscaled_data)) { + plot_ensemble_mean(recipe, archive, downscaled_data$hcst$exp, outdir) + } # Plot forecast ensemble mean if (!is.null(data$fcst)) { @@ -80,9 +85,10 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { stop("The element 'skill_metrics' must be a list of named arrays.") } - + latitude <- data_cube$lat longitude <- data_cube$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b8541488c8540e61c694c42a0f36be60595699a9..44394cbdfb7dcb5db5dc0a57dc320cde36eba968 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,3 +1,4 @@ +setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Anomalies/Anomalies.R") diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R new file mode 100644 index 0000000000000000000000000000000000000000..5f375abe5a700ec70e9879dd6ffe84e30202a957 --- /dev/null +++ b/modules/test_seasonal_downscaling.R @@ -0,0 +1,111 @@ + +# DO source /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/MODULES before opening the R session + +setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") +source("modules/Loading/Loading.R") +source("modules/Downscaling/Downscaling.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# use this development to run the analogs function. Also change the Interpolation.R script to +# not load CDORemap from s2dv. +library(easyNCDF) +library(ncdf4) +library(ClimProjDiags) +source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/Utils.R') +source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/CDORemap.R') + +# Loop all downscaling methods and types +#types <- c('int', 'intbc', 'intlr', 'analogs', 'logreg') +#int_methods <- c('con', 'bil', 'bic', 'nn', 'con2') +#bc_methods <- c('simple_bias', 'calibration', 'quantile_mapping') +#lr_methods <- c('basic', 'large_scale', '4nn') +#logreg_methods <- c('ens_mean', 'ens_mean_sd', 'sorted_members') + +# Read recipe +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- load_datasets(recipe) + +# Modify the recipe manually +recipe$Analysis$Workflow$Downscaling$type <- 'analogs' +recipe$Analysis$Workflow$Downscaling$int_method <- NULL # con, bil, bic, nn, con2 +recipe$Analysis$Workflow$Downscaling$bc_method <- NULL +recipe$Analysis$Workflow$Downscaling$lr_method <- NULL +recipe$Analysis$Workflow$Downscaling$log_reg_method <- NULL + +# Downscale datasets +downscaled_data <- downscale_datasets(recipe, data) + +# Compute skill +# NOTE: LogReg method does not have member dimension because it provides directly the +# predicted categories +# The following line should work only for Interpolation, Intbc, Intlr, Analogs +#stopifnot(recipe$Analysis$Workflow$Downscaling$type %in% c('int', 'intbc', 'intlr', 'analogs')) +#skill_metrics <- compute_skill_metrics(recipe, downscaled_data$hcst$exp, downscaled_data$hcst$obs) + +# Save downscaled data to later intercompare them +saveRDS(downscaled_data, file = paste0('/esarchive/scratch/jramon/downscaling/data/FOCUS/', + recipe$Analysis$Workflow$Downscaling$type, '-', + recipe$Analysis$Workflow$Downscaling$int_method, '-', + recipe$Analysis$Workflow$Downscaling$bc_method, '-', + recipe$Analysis$Workflow$Downscaling$lr_method, '-', + recipe$Analysis$Workflow$Downscaling$log_reg_method, '.RDS')) + +# Plot downscaled data +plot_data(recipe, data = NULL, downscaled_data = downscaled_data) + +# Plot skill metrics +my_data <- list() +my_data$hcst <- downscaled_data$hcst$exp +plot_data(recipe, data = my_data, skill_metrics = skill_metrics) + +# Plot Taylor diagram intercomparison +source('/esarchive/scratch/jramon/GitLab_jramon/nc2ts/taylor_diagram.R') +rds_files <- list.files(path = '/esarchive/scratch/jramon/downscaling/data/FOCUS', full.names = T) +down_data <- list() +methods <- c(list.files(path = '/esarchive/scratch/jramon/downscaling/data/FOCUS')) +for (nfile in seq(rds_files)) { + + # to plot all points in the same Taylor diagram + flag <- ifelse(nfile == 1, F, T) + + down_data[[nfile]] <- readRDS(rds_files[[nfile]]) + + #compute model ensemble mean + ens_mean <- Apply(down_data[[nfile]]$hcst$exp$data, 'ensemble', mean)$output1 + + # remove ensemble dimension in obs + obs <- Subset(down_data[[nfile]]$hcst$obs$data, along = 'ensemble', indices = 1, drop = 'selected') + + ens_mean <- .reorder_dims(arr_ref = obs, arr_to_reorder = ens_mean) + + plot <- taylor.diagram(ref = as.vector(obs), model = as.vector(ens_mean), normalize = TRUE, + add = flag, pch = nfile, sd.arcs = TRUE, maxsd = 1.5) + +} + +legend(1.3, 1.5, cex = 0.8, pt.cex = 1, legend = methods, pch = 1:length(rds_files), ncol = 1) + +################################## +# EXAMPLE FOR SEASONAL CALIBRATION +################################## +#data <- load_datasets(recipe) +# Calibrate datasets +#calibrated_data <- calibrate_datasets(recipe, data) +# Compute skill metrics +#skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +# Compute percentiles and probability bins +#probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +# Export all data to netCDF +#save_data(recipe, data, calibrated_data, skill_metrics, probabilities) +# Plot data +#plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, +# significance = T) + + +