From ba62ffc23c2ea862bc4ef8e70c82b6b27b6982e3 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 26 Oct 2022 14:45:01 +0200 Subject: [PATCH 01/21] Downscaling recipie --- modules/Downscaling/Downscaling.R | 147 ++++++++++++++++++ .../testing_recipes/recipe_system5c3s-tas.yml | 60 ++++--- modules/test_seasonal.R | 3 +- modules/test_seasonal_downscaling.R | 27 ++++ 4 files changed, 212 insertions(+), 25 deletions(-) create mode 100644 modules/Downscaling/Downscaling.R create mode 100644 modules/test_seasonal_downscaling.R diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R new file mode 100644 index 00000000..11c57a2a --- /dev/null +++ b/modules/Downscaling/Downscaling.R @@ -0,0 +1,147 @@ + +## TODO: Remove once CSDownscale is on CRAN +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") + + +## Entry params data and recipe? +downscale_datasets <- function(data, recipe) { + # Function that downscale the hindcast using the method stated in the + # recipe. For the moment, forecast must be null. + # + # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # recipe: object obtained when passing the .yml recipe file to read_yaml() + + type <- tolower(recipe$Analysis$Workflow$Downscaling[["type"]]) + + if (!is.null(data$fcst)) { + warning("The downscaling will be only performed to the hindcast data") + data$fcst <- NULL + } + + if (type == "none") { + + hcst_calibrated <- data$hcst + DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" + + } else { + # Downscaling function params + int_method <- tolower(recipe$Analysis$Workflow$Downscaling[["int_method"]]) + bc_method <- tolower(recipe$Analysis$Workflow$Downscaling[["bc_method"]]) + lr_method <- tolower(recipe$Analysis$Workflow$Downscaling[["lr_method"]]) + target_grid <- tolower(recipe$Analysis$Workflow$Downscaling[["target_grid"]]) + nanalogs <- tolower(recipe$Analysis$Workflow$Downscaling[["nanalogs"]]) + + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + + if (is.null(recipe$Analysis$loocv)) { + loocv <- TRUE + } else { + loocv <- recipe$Analysis$loocv + } + + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" + # Replicate observation array for the multi-model case + if (mm) { + obs.mm <- obs$data + for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, data$obs$data, + along=which(names(dim(data$obs$data)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs$data)) + data$obs$data <- obs.mm + remove(obs.mm) + } + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + ## TODO: Restructure the code? + if (!(method %in% CST_CALIB_METHODS)) { + stop("Calibration method in the recipe is not available for monthly", + " data.") + } else { + ## Alba's version of CST_Calibration (pending merge) is being used + # Calibrate the hindcast + hcst_calibrated <- CST_Calibration(data$hcst, data$obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + if (!is.null(data$fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + } + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + # Daily data calibration using Quantile Mapping + if (!(method %in% c("qmap"))) { + stop("Calibration method in the recipe is not available at daily ", + "frequency. Only quantile mapping 'qmap' is implemented.") + } + # Calibrate the hindcast + hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = NULL, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + wet.day = FALSE, + ncores = ncores, + na.rm = na.rm) + + if (!is.null(data$fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = data$fcst, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + wet.day = FALSE, + ncores = ncores, + na.rm = na.rm) + } else { + fcst_calibrated <- NULL + } + } + } +print(CALIB_MSG) + ## TODO: Return observations too? + return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) +} diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 27cdccdc..15159553 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -1,11 +1,22 @@ +# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ +#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ +#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ +#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ +#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ +#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ +# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ +#░ ░ ░ Description: - Author: V. Agudetse + Author: J. Ramon +Info: downscaling of seasonal predictions from coarse to fine grids Analysis: Horizon: Seasonal Variables: name: tas - freq: monthly_mean + freq: daily_mean Datasets: System: name: system5c3s @@ -13,32 +24,33 @@ Analysis: Reference: name: era5 Time: - sdate: '0601' - fcst_year: '2020' - hcst_start: '1993' - hcst_end: '2006' + sdate: '0501' + hcst_start: '2000' + hcst_end: '2004' ftime_min: 1 ftime_max: 3 +#sometimes we want the region for the obs to be bigger than for hcst Region: - latmin: -10 - latmax: 10 - lonmin: 0 - lonmax: 20 - Regrid: - method: bilinear - type: to_system - Workflow: - Calibration: - method: raw - Skill: - metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS - Probabilities: - percentiles: [[1/3, 2/3]] - Indicators: - index: no + latmin: 34.1 + latmax: 45.1 + lonmin: -12.5 + lonmax: 6.35 + Regrid: + type: none + Workflow: + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # TO DO: add downscaling to point locations + # TO DO: how can we pass the parameter 'predictors'? + type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logres' + int_method: # optional, regridding method accepted by CDO + bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' + lr_method: # optional, 'basic', 'large_scale', '4nn' + target_grid: # optional, nc file or grid accepted by CDO + nanalogs: # optional, number of analogs to be searched Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/ diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 5f59794f..b9f94b13 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,11 +1,12 @@ +setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas.yml" recipe <- read_yaml(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R new file mode 100644 index 00000000..ec1d2f2e --- /dev/null +++ b/modules/test_seasonal_downscaling.R @@ -0,0 +1,27 @@ + +# DO source ../MODULES before opening the R session + +setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas.yml" +recipe <- read_yaml(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +data <- load_datasets(recipe_file) +# Calibrate datasets +calibrated_data <- calibrate_datasets(data, recipe) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(calibrated_data$hcst, recipe) +# Export all data to netCDF +save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, archive, data, calibrated_data, skill_metrics, + probabilities, significance = T) -- GitLab From 0052980a4b978b76823d93afe0b716419dbed043 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Fri, 28 Oct 2022 15:09:28 +0200 Subject: [PATCH 02/21] Downscaling module v0 --- modules/Downscaling/Downscaling.R | 299 ++++++++++++++++++++---------- 1 file changed, 198 insertions(+), 101 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 11c57a2a..8ff8dee9 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -1,4 +1,13 @@ - +# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ +#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ +#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ +#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ +#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ +#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ +# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ +#░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') @@ -16,7 +25,7 @@ downscale_datasets <- function(data, recipe) { # data: list of s2dv_cube objects containing the hcst, obs and fcst. # recipe: object obtained when passing the .yml recipe file to read_yaml() - type <- tolower(recipe$Analysis$Workflow$Downscaling[["type"]]) + type <- tolower(recipe$Analysis$Workflow$Downscaling$type) if (!is.null(data$fcst)) { warning("The downscaling will be only performed to the hindcast data") @@ -30,118 +39,206 @@ downscale_datasets <- function(data, recipe) { } else { # Downscaling function params - int_method <- tolower(recipe$Analysis$Workflow$Downscaling[["int_method"]]) - bc_method <- tolower(recipe$Analysis$Workflow$Downscaling[["bc_method"]]) - lr_method <- tolower(recipe$Analysis$Workflow$Downscaling[["lr_method"]]) - target_grid <- tolower(recipe$Analysis$Workflow$Downscaling[["target_grid"]]) - nanalogs <- tolower(recipe$Analysis$Workflow$Downscaling[["nanalogs"]]) - + int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) + bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) + log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + nanalogs <- tolower(recipe$Analysis$Workflow$Downscaling$nanalog) + if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { ncores <- recipe$Analysis$ncores } - + + #TO DO: add the parametre loocv where it corresponds if (is.null(recipe$Analysis$loocv)) { loocv <- TRUE } else { loocv <- recipe$Analysis$loocv } - if (is.null(recipe$Analysis$remove_NAs)) { - na.rm = F - } else { - na.rm = recipe$Analysis$remove_NAs - } - - CALIB_MSG <- "##### CALIBRATION COMPLETE #####" - # Replicate observation array for the multi-model case - if (mm) { - obs.mm <- obs$data - for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { - obs.mm <- abind(obs.mm, data$obs$data, - along=which(names(dim(data$obs$data)) == 'dat')) - } - names(dim(obs.mm)) <- names(dim(obs$data)) - data$obs$data <- obs.mm - remove(obs.mm) - } - - if (recipe$Analysis$Variables$freq == "monthly_mean") { + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logres") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") + LR_METHODS <- c("basic", "large_scale", "4nn") + LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + + if (!(type %in% DOWNSCAL_TYPES)) { + stop("Downscaling type in the recipe is not available. Accepted types ", + "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logres'.") + } + + if (type == "int") { + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + hcst_downscal <- CST_Interpolation(data$hcst$data, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = NULL, + method_point_interp = NULL) + + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intbc") { + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + + if (is.null(bc_method)) { + stop("Please provide one bias-correction method in the recipe. Accepted ", + "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(bc_method %in% BC_METHODS)) { + stop("Bias-correction method in the recipe is not available. Accepted methods ", + "are 'simple_bias', 'calibration', 'quantile_mapping'.") + } + + hcst_downscal <- CST_Intbc(data$hcst$data, data$obs$data, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) - CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") - ## TODO: implement other calibration methods - ## TODO: Restructure the code? - if (!(method %in% CST_CALIB_METHODS)) { - stop("Calibration method in the recipe is not available for monthly", - " data.") - } else { - ## Alba's version of CST_Calibration (pending merge) is being used - # Calibrate the hindcast - hcst_calibrated <- CST_Calibration(data$hcst, data$obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - if (!is.null(data$fcst)) { - # Calibrate the forecast - fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - } else { - fcst_calibrated <- NULL + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intlr") { + + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(lr_method)) { + stop("Please provide one linear regression method in the recipe. Accepted ", + "methods are 'basic', 'large_scale', '4nn'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(lr_method %in% LR_METHODS)) { + stop("Linear regression method in the recipe is not available. Accepted methods ", + "are 'basic', 'large_scale', '4nn'.") + } + + # TO DO: add the possibility to have the element 'pred' in 'data' + if (lr_method == "large_scale") { + if (is.null(data$pred$data)) { + stop("Please provide the large scale predictors in the element 'data$pred$data'.") } - } - } else if (recipe$Analysis$Variables$freq == "daily_mean") { - # Daily data calibration using Quantile Mapping - if (!(method %in% c("qmap"))) { - stop("Calibration method in the recipe is not available at daily ", - "frequency. Only quantile mapping 'qmap' is implemented.") - } - # Calibrate the hindcast - hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = NULL, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, - method = "QUANT", - wet.day = FALSE, - ncores = ncores, - na.rm = na.rm) - - if (!is.null(data$fcst)) { - # Calibrate the forecast - fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, - exp_cor = data$fcst, - sample_dims = c("syear", - "time", - "ensemble"), - sample_length = NULL, - method = "QUANT", - wet.day = FALSE, - ncores = ncores, - na.rm = na.rm) } else { - fcst_calibrated <- NULL + data$pred$data <- NULL + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") } + + + hcst_downscal <- CST_Intlr(data$hcst$data, data$obs$data, + lr_method = lr_method, + target_grid = target_grid, + points = NULL, + int_method = int_method, + method_point_interp = NULL, + predictors = data$pred$data, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + large_scale_predictor_dimname = 'vars', + loocv = loocv, + region = NULL, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "analogs") { + + if (is.null(nanalogs)) { + warning("The number of analogs for searching has not been provided in the ", + "recipe. Setting it to 3.") + nanalogs <- 3 + } + + hcst_downscal <- CST_Analogs(data$hcst$data, data$obs$data, + grid_exp = data$hcst$source_files[1], + nanalogs = nanalogs, + fun_analog = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + region = NULL, + return_indices = FALSE, + loocv_window = loocv, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "logres") { + + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(log_reg_method)) { + stop("Please provide one logistic regression method in the recipe. Accepted ", + "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop("Logistic regression method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + } + + hcst_downscal <- CST_LogisticReg(data$hcst$data, data$obs$data, + target_grid = target_grid, + int_method = int_method, + log_reg_method = log_reg_method, + probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + loocv = loocv, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } - } -print(CALIB_MSG) - ## TODO: Return observations too? - return(list(hcst = hcst_calibrated, fcst = fcst_calibrated)) -} + + print(DOWNSCAL_MSG) + return(list(hcst = hcst_downscal, fcst = NULL)) +} + -- GitLab From 4a59cf73766e0d92544c069cd89fe6914330ecb2 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 2 Nov 2022 11:01:29 +0100 Subject: [PATCH 03/21] . --- .../testing_recipes/recipe_system5c3s-tas.yml | 61 ++++++++----------- .../recipe_system5c3s-tas_downscaling.yml | 57 +++++++++++++++++ modules/test_seasonal.R | 2 +- 3 files changed, 83 insertions(+), 37 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 15159553..38e23c8b 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -1,22 +1,11 @@ -# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ -#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ -#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ -#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ -#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ -#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ -# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ -#░ ░ ░ Description: - Author: J. Ramon -Info: downscaling of seasonal predictions from coarse to fine grids + Author: V. Agudetse Analysis: Horizon: Seasonal Variables: name: tas - freq: daily_mean + freq: monthly_mean Datasets: System: name: system5c3s @@ -24,33 +13,33 @@ Analysis: Reference: name: era5 Time: - sdate: '0501' - hcst_start: '2000' - hcst_end: '2004' + sdate: '0601' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2006' ftime_min: 1 ftime_max: 3 -#sometimes we want the region for the obs to be bigger than for hcst Region: - latmin: 34.1 - latmax: 45.1 - lonmin: -12.5 - lonmax: 6.35 - Regrid: - type: none - Workflow: - Downscaling: - # Assumption 1: leave-one-out cross-validation is always applied - # TO DO: add downscaling to point locations - # TO DO: how can we pass the parameter 'predictors'? - type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logres' - int_method: # optional, regridding method accepted by CDO - bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: # optional, 'basic', 'large_scale', '4nn' - target_grid: # optional, nc file or grid accepted by CDO - nanalogs: # optional, number of analogs to be searched + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Skill: + metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: no Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml new file mode 100644 index 00000000..13f4e156 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -0,0 +1,57 @@ +# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ +#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ +#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ +#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ +#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ +#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ +# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ +#░ ░ ░ +Description: + Author: J. Ramon +Info: downscaling of seasonal predictions from coarse to fine grids + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: '0501' + hcst_start: '2000' + hcst_end: '2004' + ftime_min: 1 + ftime_max: 3 +#sometimes we want the region for the obs to be bigger than for hcst + Region: + latmin: 34.1 + latmax: 45.1 + lonmin: -12.5 + lonmax: 6.35 + Regrid: + type: none + Workflow: + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # TO DO: add downscaling to point locations + # TO DO: how can we pass the parameter 'predictors'? + type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: # optional, regridding method accepted by CDO + bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' + lr_method: # optional, 'basic', 'large_scale', '4nn' + log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: # optional, nc file or grid accepted by CDO + nanalogs: # optional, number of analogs to be searched + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/ diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b9f94b13..58adece6 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -6,7 +6,7 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system7c3s-tas.yml" recipe <- read_yaml(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive -- GitLab From fac2975020bc4efc5504a8fd0ed70bb1f5f56cc3 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Wed, 2 Nov 2022 17:30:21 +0100 Subject: [PATCH 04/21] Downscaling recipe with the downscaling module --- .../testing_recipes/recipe_system5c3s-tas_downscaling.yml | 8 +++++++- modules/test_seasonal_downscaling.R | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index 13f4e156..8b56f3aa 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -16,7 +16,7 @@ Analysis: Horizon: Seasonal Variables: name: tas - freq: daily_mean + freq: monthly_mean Datasets: System: name: system5c3s @@ -25,6 +25,7 @@ Analysis: name: era5 Time: sdate: '0501' + fcst_year: '2020' hcst_start: '2000' hcst_end: '2004' ftime_min: 1 @@ -36,8 +37,13 @@ Analysis: lonmin: -12.5 lonmax: 6.35 Regrid: + method: bilinear type: none Workflow: + Calibration: + method: raw + Skill: + metric: RPSS Downscaling: # Assumption 1: leave-one-out cross-validation is always applied # TO DO: add downscaling to point locations diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R index ec1d2f2e..9043c2a9 100644 --- a/modules/test_seasonal_downscaling.R +++ b/modules/test_seasonal_downscaling.R @@ -3,17 +3,20 @@ setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") source("modules/Loading/Loading.R") +source("modules/Loading/Downscaling.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" recipe <- read_yaml(recipe_file) archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets -data <- load_datasets(recipe_file) +data <- load_datasets(recipe) +# Downscale datasets +downscaled_data <- downscale_datasets(data, recipe) # Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics -- GitLab From 521489d25a4a1c4b72b73fd112b254b08b2c2b65 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Thu, 10 Nov 2022 12:53:13 +0100 Subject: [PATCH 05/21] module working for Interpolation and Intbc --- modules/Downscaling/Downscaling.R | 223 ++++++++++-------- .../recipe_system5c3s-tas_downscaling.yml | 14 +- modules/test_seasonal_downscaling.R | 24 +- 3 files changed, 134 insertions(+), 127 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 8ff8dee9..4d3edd2e 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -18,7 +18,7 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorr ## Entry params data and recipe? -downscale_datasets <- function(data, recipe) { +downscale_datasets <- function(recipe, data) { # Function that downscale the hindcast using the method stated in the # recipe. For the moment, forecast must be null. # @@ -34,17 +34,17 @@ downscale_datasets <- function(data, recipe) { if (type == "none") { - hcst_calibrated <- data$hcst + hcst_downscal <- data$hcst DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" } else { # Downscaling function params - int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) - bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) - lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) - log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) - target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) - nanalogs <- tolower(recipe$Analysis$Workflow$Downscaling$nanalog) + int_methods <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) + bc_methods <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + lr_methods <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) + log_reg_methods <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -70,32 +70,36 @@ downscale_datasets <- function(data, recipe) { } if (type == "int") { - if (is.null(int_method)) { - stop("Please provide one interpolation method in the recipe.") + if (is.null(int_methods)) { + stop("Please provide one or more interpolation methods in the recipe.") } if (is.null(target_grid)) { stop("Please provide the target grid in the recipe.") } - hcst_downscal <- CST_Interpolation(data$hcst$data, - points = NULL, - method_remap = int_method, - target_grid = target_grid, - lat_dim = "latitude", - lon_dim = "longitude", - region = NULL, - method_point_interp = NULL) - + hcst_downscal <- list() + for (int_method in strsplit(int_methods, ", | |,")[[1]]) { + + hcst_d <- CST_Interpolation(data$hcst, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = NULL, + method_point_interp = NULL) + + hcst_downscal[[ int_method ]] <- hcst_d + } DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intbc") { - if (is.null(int_method)) { - stop("Please provide one interpolation method in the recipe.") + if (is.null(int_methods) | (length(strsplit(int_methods, ", | |,")[[1]]) != 1)) { + stop("Please provide one (and only one) interpolation method in the recipe.") } - - if (is.null(bc_method)) { + if (is.null(bc_methods)) { stop("Please provide one bias-correction method in the recipe. Accepted ", "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") } @@ -104,32 +108,38 @@ downscale_datasets <- function(data, recipe) { stop("Please provide the target grid in the recipe.") } - if (!(bc_method %in% BC_METHODS)) { - stop("Bias-correction method in the recipe is not available. Accepted methods ", - "are 'simple_bias', 'calibration', 'quantile_mapping'.") - } - - hcst_downscal <- CST_Intbc(data$hcst$data, data$obs$data, - target_grid = target_grid, - bc_method = bc_method, - int_method = int_method, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - ncores = ncores) - + hcst_downscal <- list() + for (bc_method in strsplit(bc_methods, ", | |,")[[1]]) { + + if (!(bc_method %in% BC_METHODS)) { + stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", + "are 'simple_bias', 'calibration', 'quantile_mapping'.")) + } + + hcst_d <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_methods, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) + + hcst_downscal[[ bc_method ]] <- hcst_d + } + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intlr") { - if (is.null(int_method)) { - stop("Please provide one interpolation method in the recipe.") + if (is.null(int_methods) | (length(strsplit(int_methods, ", | |,")[[1]]) != 1)) { + stop("Please provide one (and only one) interpolation method in the recipe.") } - if (is.null(lr_method)) { + if (is.null(lr_methods)) { stop("Please provide one linear regression method in the recipe. Accepted ", "methods are 'basic', 'large_scale', '4nn'.") } @@ -138,44 +148,42 @@ downscale_datasets <- function(data, recipe) { stop("Please provide the target grid in the recipe.") } - if (!(lr_method %in% LR_METHODS)) { - stop("Linear regression method in the recipe is not available. Accepted methods ", - "are 'basic', 'large_scale', '4nn'.") - } + hcst_downscal <- list() + for (lr_method in strsplit(lr_methods, ", | |,")[[1]]) { - # TO DO: add the possibility to have the element 'pred' in 'data' - if (lr_method == "large_scale") { - if (is.null(data$pred$data)) { - stop("Please provide the large scale predictors in the element 'data$pred$data'.") + if (!(lr_method %in% LR_METHODS)) { + stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", + "are 'basic', 'large_scale', '4nn'.")) } - } else { - data$pred$data <- NULL - } - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") - } - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") - } + # TO DO: add the possibility to have the element 'pred' in 'data' + if (lr_method == "large_scale") { + if (is.null(data$pred$data)) { + stop("Please provide the large scale predictors in the element 'data$pred$data'.") + } + } else { + data$pred$data <- NULL + } + hcst_d <- CST_Intlr(data$hcst, data$obs, + lr_method = lr_method, + target_grid = target_grid, + points = NULL, + int_method = int_methods, + method_point_interp = NULL, + predictors = data$pred$data, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + large_scale_predictor_dimname = 'vars', + loocv = loocv, + region = NULL, + ncores = ncores) - hcst_downscal <- CST_Intlr(data$hcst$data, data$obs$data, - lr_method = lr_method, - target_grid = target_grid, - points = NULL, - int_method = int_method, - method_point_interp = NULL, - predictors = data$pred$data, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", - large_scale_predictor_dimname = 'vars', - loocv = loocv, - region = NULL, - ncores = ncores) - + hcst_downscal[[ lr_method ]] <- hcst_d + } + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { @@ -184,8 +192,9 @@ downscale_datasets <- function(data, recipe) { "recipe. Setting it to 3.") nanalogs <- 3 } - - hcst_downscal <- CST_Analogs(data$hcst$data, data$obs$data, + + + hcst_downscal <- CST_Analogs(data$hcst, data$obs, grid_exp = data$hcst$source_files[1], nanalogs = nanalogs, fun_analog = NULL, @@ -201,11 +210,11 @@ downscale_datasets <- function(data, recipe) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "logres") { - if (is.null(int_method)) { - stop("Please provide one interpolation method in the recipe.") + if (is.null(int_methods) | (length(strsplit(int_methods, ", | |,")[[1]]) != 1)) { + stop("Please provide one (and only one) interpolation method in the recipe.") } - if (is.null(log_reg_method)) { + if (is.null(log_reg_methods)) { stop("Please provide one logistic regression method in the recipe. Accepted ", "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") } @@ -214,31 +223,37 @@ downscale_datasets <- function(data, recipe) { stop("Please provide the target grid in the recipe.") } - if (!(log_reg_method %in% LOG_REG_METHODS)) { - stop("Logistic regression method in the recipe is not available. Accepted methods ", - "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") - } + hcst_downscal <- list() + for (log_reg_method in strsplit(log_reg_methods, ", | |,")[[1]]) { + + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) + } - hcst_downscal <- CST_LogisticReg(data$hcst$data, data$obs$data, - target_grid = target_grid, - int_method = int_method, - log_reg_method = log_reg_method, - probs_cat = c(1/3,2/3), - return_most_likely_cat = FALSE, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - loocv = loocv, - ncores = ncores) + hcst_d <- CST_LogisticReg(data$hcst, data$obs, + target_grid = target_grid, + int_method = int_methods, + log_reg_method = log_reg_method, + probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + loocv = loocv, + ncores = ncores) + + hcst_downscal[[ log_reg_method ]] <- hcst_d + } DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } - - print(DOWNSCAL_MSG) - return(list(hcst = hcst_downscal, fcst = NULL)) + } + print(DOWNSCAL_MSG) + return(list(hcst = hcst_downscal, fcst = NULL)) } diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index 8b56f3aa..a6549863 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -25,11 +25,10 @@ Analysis: name: era5 Time: sdate: '0501' - fcst_year: '2020' hcst_start: '2000' hcst_end: '2004' ftime_min: 1 - ftime_max: 3 + ftime_max: 1 #sometimes we want the region for the obs to be bigger than for hcst Region: latmin: 34.1 @@ -43,17 +42,16 @@ Analysis: Calibration: method: raw Skill: - metric: RPSS + metric: RPSS EnsCorr BSS90 Mean_Bias_SS Downscaling: # Assumption 1: leave-one-out cross-validation is always applied # TO DO: add downscaling to point locations - # TO DO: how can we pass the parameter 'predictors'? - type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: # optional, regridding method accepted by CDO - bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: bilinear # optional, regridding method accepted by CDO + bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' lr_method: # optional, 'basic', 'large_scale', '4nn' log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' - target_grid: # optional, nc file or grid accepted by CDO + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO nanalogs: # optional, number of analogs to be searched Output_format: S2S4E Run: diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R index 9043c2a9..2f9ffbd0 100644 --- a/modules/test_seasonal_downscaling.R +++ b/modules/test_seasonal_downscaling.R @@ -1,30 +1,24 @@ -# DO source ../MODULES before opening the R session +# DO source /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/MODULES before opening the R session setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") source("modules/Loading/Loading.R") -source("modules/Loading/Downscaling.R") +source("modules/Downscaling/Downscaling.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" -recipe <- read_yaml(recipe_file) -archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive +recipe <- prepare_outputs(recipe_file) +#archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe) # Downscale datasets -downscaled_data <- downscale_datasets(data, recipe) -# Calibrate datasets -calibrated_data <- calibrate_datasets(data, recipe) -# Compute skill metrics -skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe) -# Compute percentiles and probability bins -probabilities <- compute_probabilities(calibrated_data$hcst, recipe) -# Export all data to netCDF -save_data(recipe, archive, data, calibrated_data, skill_metrics, probabilities) +downscaled_data <- downscale_datasets(recipe, data) +# Compute skill +skill_metrics <- compute_skill_metrics(recipe, downscaled_data$hcst$simple_bias, data$obs) # Plot data -plot_data(recipe, archive, data, calibrated_data, skill_metrics, - probabilities, significance = T) +plot_data(recipe, calibrated_data = NULL, skill_metrics = NULL, + probabilities = NULL, significance = F) -- GitLab From 99bf98713140707e445a4c29182d17cd70497671 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Tue, 29 Nov 2022 11:57:10 +0100 Subject: [PATCH 06/21] Workflow work arounded --- modules/test_seasonal_downscaling.R | 45 ++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R index 2f9ffbd0..898af7eb 100644 --- a/modules/test_seasonal_downscaling.R +++ b/modules/test_seasonal_downscaling.R @@ -9,16 +9,53 @@ source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") +# use this development to run the analogs function. Also change the Interpolation.R script to +# not load CDORemap from s2dv. +library(easyNCDF) +library(ncdf4) +library(ClimProjDiags) +source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/Utils.R') +source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/CDORemap.R') + recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" recipe <- prepare_outputs(recipe_file) -#archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive # Load datasets data <- load_datasets(recipe) + # Downscale datasets downscaled_data <- downscale_datasets(recipe, data) + # Compute skill -skill_metrics <- compute_skill_metrics(recipe, downscaled_data$hcst$simple_bias, data$obs) +# NOTE: LogReg method does not have member dimension because it provides directly the +# predicted categories +# The following line should work only for Interpolation, Intbc, Intlr, Analogs +stopifnot(recipe$Analysis$Workflow$Downscaling$type %in% c('int', 'intbc', 'intlr', 'analogs')) +skill_metrics <- lapply(downscaled_data$hcst, function(x) compute_skill_metrics(recipe, x$exp, x$obs)) + +# Plot downscaled data +lapply(downscaled_data$hcst, function(x) plot_data(recipe, downscaled_data = x, data = NULL)) + +# Plot skill data +my_data <- list() +my_data$hcst <- downscaled_data$hcst$bilinear$exp +plot_data(recipe, skill_metrics = skill_metrics$bilinear, data = my_data) #Plot skill metrics + +################################## +# EXAMPLE FOR SEASONAL CALIBRATION +################################## +#data <- load_datasets(recipe) +# Calibrate datasets +#calibrated_data <- calibrate_datasets(recipe, data) +# Compute skill metrics +#skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +# Compute percentiles and probability bins +#probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +# Export all data to netCDF +#save_data(recipe, data, calibrated_data, skill_metrics, probabilities) # Plot data -plot_data(recipe, calibrated_data = NULL, skill_metrics = NULL, - probabilities = NULL, significance = F) +#plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, +# significance = T) + + + -- GitLab From 35476e7d5dd22e3f0f03136bf183ac5d938a18f9 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Tue, 29 Nov 2022 17:18:49 +0100 Subject: [PATCH 07/21] Scripts adapted to work with several downscaling methods --- modules/Downscaling/Downscaling.R | 80 ++++++++++++++----- .../recipe_system5c3s-tas_downscaling.yml | 11 +-- modules/Saving/paths2save.R | 24 ++++-- modules/Visualization/Visualization.R | 29 +++++-- modules/test_seasonal_downscaling.R | 7 +- 5 files changed, 115 insertions(+), 36 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 4d3edd2e..c3d063b4 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -13,6 +13,7 @@ source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interp source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") @@ -59,14 +60,14 @@ downscale_datasets <- function(recipe, data) { loocv <- recipe$Analysis$loocv } - DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logres") + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") LR_METHODS <- c("basic", "large_scale", "4nn") LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") if (!(type %in% DOWNSCAL_TYPES)) { stop("Downscaling type in the recipe is not available. Accepted types ", - "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logres'.") + "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") } if (type == "int") { @@ -81,21 +82,43 @@ downscale_datasets <- function(recipe, data) { hcst_downscal <- list() for (int_method in strsplit(int_methods, ", | |,")[[1]]) { + # Ensure that observations are in the same grid as experiments + # Only needed for this method because the others already return the + # observations + latmin <- data$hcst$lat[1] + lonmin <- data$hcst$lon[1] + latmax <- data$hcst$lat[length(data$hcst$lat)] + lonmax <- data$hcst$lon[length(data$hcst$lon)] hcst_d <- CST_Interpolation(data$hcst, points = NULL, method_remap = int_method, target_grid = target_grid, lat_dim = "latitude", lon_dim = "longitude", - region = NULL, + region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) + obs_d <- CST_Interpolation(data$obs, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + + hcst_d$obs <- obs_d$exp hcst_downscal[[ int_method ]] <- hcst_d } DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intbc") { - if (is.null(int_methods) | (length(strsplit(int_methods, ", | |,")[[1]]) != 1)) { + if (length(int_methods) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (length(strsplit(int_methods, ", | |,")[[1]]) != 1) { stop("Please provide one (and only one) interpolation method in the recipe.") } @@ -134,8 +157,11 @@ downscale_datasets <- function(recipe, data) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intlr") { + if (length(int_methods) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } - if (is.null(int_methods) | (length(strsplit(int_methods, ", | |,")[[1]]) != 1)) { + if (length(strsplit(int_methods, ", | |,")[[1]]) != 1) { stop("Please provide one (and only one) interpolation method in the recipe.") } @@ -176,6 +202,7 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", sdate_dim = "syear", time_dim = "time", + member_dim = "ensemble", large_scale_predictor_dimname = 'vars', loocv = loocv, region = NULL, @@ -193,24 +220,29 @@ downscale_datasets <- function(recipe, data) { nanalogs <- 3 } - - hcst_downscal <- CST_Analogs(data$hcst, data$obs, - grid_exp = data$hcst$source_files[1], - nanalogs = nanalogs, - fun_analog = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", - region = NULL, - return_indices = FALSE, - loocv_window = loocv, - ncores = ncores) + hcst_downscal <- list() + hcst_downscal[["analogs"]] <- CST_Analogs(data$hcst, data$obs, + grid_exp = data$hcst$source_files[1], + nanalogs = nanalogs, + fun_analog = "min", + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + region = NULL, + return_indices = FALSE, + loocv_window = loocv, + ncores = ncores) DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } else if (type == "logres") { + } else if (type == "logreg") { - if (is.null(int_methods) | (length(strsplit(int_methods, ", | |,")[[1]]) != 1)) { + if (length(int_methods) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (length(strsplit(int_methods, ", | |,")[[1]]) != 1) { stop("Please provide one (and only one) interpolation method in the recipe.") } @@ -222,6 +254,14 @@ downscale_datasets <- function(recipe, data) { if (is.null(target_grid)) { stop("Please provide the target grid in the recipe.") } + + # Since we are forcing to create three categories, and applying cross-validation, + # we need at least six years of data for the logistic regression function to not + # crash + if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { + stop("The number of start dates is insufficient for the logisitic regression method. ", + "Please provide six or more.") + } hcst_downscal <- list() for (log_reg_method in strsplit(log_reg_methods, ", | |,")[[1]]) { diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index a6549863..400448b7 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -26,7 +26,7 @@ Analysis: Time: sdate: '0501' hcst_start: '2000' - hcst_end: '2004' + hcst_end: '2005' ftime_min: 1 ftime_max: 1 #sometimes we want the region for the obs to be bigger than for hcst @@ -45,14 +45,15 @@ Analysis: metric: RPSS EnsCorr BSS90 Mean_Bias_SS Downscaling: # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) # TO DO: add downscaling to point locations - type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: bilinear # optional, regridding method accepted by CDO - bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' + type: int # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: bilinear nn conservative # optional, regridding method accepted by CDO + bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' lr_method: # optional, 'basic', 'large_scale', '4nn' log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO - nanalogs: # optional, number of analogs to be searched + nanalogs: 1 # optional, number of analogs to be searched Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index f48ebe7b..e34a6cf8 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -60,13 +60,27 @@ get_dir <- function(recipe, agg = "global") { calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) store.freq <- recipe$Analysis$Variables$freq - + downsc.type <- tolower(recipe$Analysis$Workflow$Downscaling$type) + if (downsc.type == "int") { + downsc.method <- strsplit(tolower(recipe$Analysis$Workflow$Downscaling$int_method), ", | |,")[[1]] + } else if (downsc.type == "intbc") { + downsc.method <- strsplit(tolower(recipe$Analysis$Workflow$Downscaling$bc_method), ", | |,")[[1]] + } else if (downsc.type == "intlr") { + downsc.method <- strsplit(tolower(recipe$Analysis$Workflow$Downscaling$lr_method), ", | |,")[[1]] + } else if (downsc.type == "analogs") { + downsc.method <- "analogs" + } else if (downsc.type == "logreg") { + downsc.method <- strsplit(tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method), ", | |,")[[1]] + } + switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, + "country" = {dir <- paste0(outdir, calib.method, "-", + store.freq, "/", downsc.type, "/", + downsc.method, "/", variable, "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", calib.method, "-", - store.freq, "/", variable, "/", + "global" = {dir <- paste0(outdir, calib.method, "-", + store.freq, "/", downsc.type, "/", + downsc.method, "/", variable, "/", fcst.sdate, "/")}) return(dir) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ff0e9fd4..17f58196 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -8,7 +8,8 @@ source("modules/Visualization/tmp/PlotCombinedMap.R") plot_data <- function(recipe, data, - calibrated_data = NULL, + downscaled_data = NULL, + calibrated_data = NULL, skill_metrics = NULL, probabilities = NULL, archive = NULL, @@ -23,11 +24,12 @@ plot_data <- function(recipe, # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - outdir <- paste0(get_dir(recipe), "/plots/") - dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + outdir <- paste0(get_dir(recipe), "plots/") + sapply(outdir, dir.create, showWarnings = FALSE, recursive = TRUE) + #dir.create(outdir, showWarnings = FALSE, recursive = TRUE) if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && - (is.null(data$fcst))) { + (is.null(data$fcst)) && (is.null(downscaled_data))) { stop("The Visualization module has been called, but there is no data ", "that can be plotted.") } @@ -47,6 +49,12 @@ plot_data <- function(recipe, plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, significance) } + + # Plot forecast ensemble mean + if (!is.null(downscaled_data)) { + plot_downscaled_data(recipe, archive, downscaled_data, outdir) + #plot_ensemble_mean(recipe, archive, downscaled_data$exp, outdir) + } # Plot forecast ensemble mean if (!is.null(calibrated_data$fcst)) { @@ -69,6 +77,16 @@ plot_data <- function(recipe, } } +plot_downscaled_data <- function(recipe, archive, data_ls, outdir) { + + methods <- names(data_ls) + for (method in methods) { + outdir_method <- grep(pattern = method, x = outdir, value = TRUE) + print(outdir_method) + plot_ensemble_mean(recipe, archive, fcst = data_ls[[method]]$exp, outdir = outdir_method) + } +} + plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { @@ -80,9 +98,10 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { stop("The element 'skill_metrics' must be a list of named arrays.") } - + latitude <- data_cube$lat longitude <- data_cube$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R index 898af7eb..12026bba 100644 --- a/modules/test_seasonal_downscaling.R +++ b/modules/test_seasonal_downscaling.R @@ -34,7 +34,12 @@ stopifnot(recipe$Analysis$Workflow$Downscaling$type %in% c('int', 'intbc', 'intl skill_metrics <- lapply(downscaled_data$hcst, function(x) compute_skill_metrics(recipe, x$exp, x$obs)) # Plot downscaled data -lapply(downscaled_data$hcst, function(x) plot_data(recipe, downscaled_data = x, data = NULL)) +plot_data(recipe, downscaled_data = downscaled_data$hcst, data = NULL) + +# Plot raw data +my_data <- list() +my_data$fcst <- data$hcst +plot_data(recipe, data = my_data) # Plot skill data my_data <- list() -- GitLab From 07d3f53501fd29cd11364ae6a3dcd3bd03ea0eb3 Mon Sep 17 00:00:00 2001 From: Jaume Ramon Date: Thu, 16 Feb 2023 15:06:04 +0100 Subject: [PATCH 08/21] last changes before git pull --- modules/Downscaling/Downscaling.R | 269 ++++++++---------- .../testing_recipes/recipe_system5c3s-tas.yml | 2 +- .../recipe_system5c3s-tas_downscaling.yml | 8 +- modules/Visualization/Visualization.R | 24 +- modules/test_seasonal_downscaling.R | 65 ++++- 5 files changed, 182 insertions(+), 186 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index c3d063b4..1f814b74 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -15,7 +15,7 @@ source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr. source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') -source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") +#source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") ## Entry params data and recipe? @@ -40,12 +40,12 @@ downscale_datasets <- function(recipe, data) { } else { # Downscaling function params - int_methods <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) - bc_methods <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) - lr_methods <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) - log_reg_methods <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) - target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) - nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) + int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) + bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) + log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) if (is.null(recipe$Analysis$ncores)) { ncores <- 1 @@ -71,58 +71,48 @@ downscale_datasets <- function(recipe, data) { } if (type == "int") { - if (is.null(int_methods)) { - stop("Please provide one or more interpolation methods in the recipe.") + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") } if (is.null(target_grid)) { stop("Please provide the target grid in the recipe.") } - hcst_downscal <- list() - for (int_method in strsplit(int_methods, ", | |,")[[1]]) { - - # Ensure that observations are in the same grid as experiments - # Only needed for this method because the others already return the - # observations - latmin <- data$hcst$lat[1] - lonmin <- data$hcst$lon[1] - latmax <- data$hcst$lat[length(data$hcst$lat)] - lonmax <- data$hcst$lon[length(data$hcst$lon)] - hcst_d <- CST_Interpolation(data$hcst, - points = NULL, - method_remap = int_method, - target_grid = target_grid, - lat_dim = "latitude", - lon_dim = "longitude", - region = c(lonmin, lonmax, latmin, latmax), - method_point_interp = NULL) - - obs_d <- CST_Interpolation(data$obs, - points = NULL, - method_remap = int_method, - target_grid = target_grid, - lat_dim = "latitude", - lon_dim = "longitude", - region = c(lonmin, lonmax, latmin, latmax), - method_point_interp = NULL) - - - hcst_d$obs <- obs_d$exp - hcst_downscal[[ int_method ]] <- hcst_d - } + # Ensure that observations are in the same grid as experiments + # Only needed for this method because the others already return the + # observations + latmin <- data$hcst$lat[1] + lonmin <- data$hcst$lon[1] + latmax <- data$hcst$lat[length(data$hcst$lat)] + lonmax <- data$hcst$lon[length(data$hcst$lon)] + hcst_downscal <- CST_Interpolation(data$hcst, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + obs_downscal <- CST_Interpolation(data$obs, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + hcst_downscal$obs <- obs_downscal$exp DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intbc") { - if (length(int_methods) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") - } - - if (length(strsplit(int_methods, ", | |,")[[1]]) != 1) { + if (length(int_method) == 0) { stop("Please provide one (and only one) interpolation method in the recipe.") } - if (is.null(bc_methods)) { + if (is.null(bc_method)) { stop("Please provide one bias-correction method in the recipe. Accepted ", "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") } @@ -131,41 +121,31 @@ downscale_datasets <- function(recipe, data) { stop("Please provide the target grid in the recipe.") } - hcst_downscal <- list() - for (bc_method in strsplit(bc_methods, ", | |,")[[1]]) { - - if (!(bc_method %in% BC_METHODS)) { - stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", - "are 'simple_bias', 'calibration', 'quantile_mapping'.")) - } + if (!(bc_method %in% BC_METHODS)) { + stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", + "are 'simple_bias', 'calibration', 'quantile_mapping'.")) + } - hcst_d <- CST_Intbc(data$hcst, data$obs, - target_grid = target_grid, - bc_method = bc_method, - int_method = int_methods, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - ncores = ncores) + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) - hcst_downscal[[ bc_method ]] <- hcst_d - } - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intlr") { - if (length(int_methods) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") - } - - if (length(strsplit(int_methods, ", | |,")[[1]]) != 1) { + if (length(int_method) == 0) { stop("Please provide one (and only one) interpolation method in the recipe.") } - if (is.null(lr_methods)) { + if (is.null(lr_method)) { stop("Please provide one linear regression method in the recipe. Accepted ", "methods are 'basic', 'large_scale', '4nn'.") } @@ -174,43 +154,37 @@ downscale_datasets <- function(recipe, data) { stop("Please provide the target grid in the recipe.") } - hcst_downscal <- list() - for (lr_method in strsplit(lr_methods, ", | |,")[[1]]) { - - if (!(lr_method %in% LR_METHODS)) { - stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", - "are 'basic', 'large_scale', '4nn'.")) - } + if (!(lr_method %in% LR_METHODS)) { + stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", + "are 'basic', 'large_scale', '4nn'.")) + } - # TO DO: add the possibility to have the element 'pred' in 'data' - if (lr_method == "large_scale") { - if (is.null(data$pred$data)) { - stop("Please provide the large scale predictors in the element 'data$pred$data'.") - } - } else { - data$pred$data <- NULL + # TO DO: add the possibility to have the element 'pred' in 'data' + if (lr_method == "large_scale") { + if (is.null(data$pred$data)) { + stop("Please provide the large scale predictors in the element 'data$pred$data'.") } + } else { + data$pred$data <- NULL + } - hcst_d <- CST_Intlr(data$hcst, data$obs, - lr_method = lr_method, - target_grid = target_grid, - points = NULL, - int_method = int_methods, - method_point_interp = NULL, - predictors = data$pred$data, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", - member_dim = "ensemble", - large_scale_predictor_dimname = 'vars', - loocv = loocv, - region = NULL, - ncores = ncores) + hcst_downscal <- CST_Intlr(data$hcst, data$obs, + lr_method = lr_method, + target_grid = target_grid, + points = NULL, + int_method = int_method, + method_point_interp = NULL, + predictors = data$pred$data, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + large_scale_predictor_dimname = 'vars', + loocv = loocv, + region = NULL, + ncores = ncores) - hcst_downscal[[ lr_method ]] <- hcst_d - } - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { @@ -220,33 +194,28 @@ downscale_datasets <- function(recipe, data) { nanalogs <- 3 } - hcst_downscal <- list() - hcst_downscal[["analogs"]] <- CST_Analogs(data$hcst, data$obs, - grid_exp = data$hcst$source_files[1], - nanalogs = nanalogs, - fun_analog = "min", - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", - member_dim = "ensemble", - region = NULL, - return_indices = FALSE, - loocv_window = loocv, - ncores = ncores) + hcst_downscal <- CST_Analogs(data$hcst, data$obs, + grid_exp = data$hcst$source_files[1], + nanalogs = nanalogs, + fun_analog = "min", + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + region = NULL, + return_indices = FALSE, + loocv_window = loocv, + ncores = ncores) DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "logreg") { - if (length(int_methods) == 0) { + if (length(int_method) == 0) { stop("Please provide one (and only one) interpolation method in the recipe.") } - if (length(strsplit(int_methods, ", | |,")[[1]]) != 1) { - stop("Please provide one (and only one) interpolation method in the recipe.") - } - - if (is.null(log_reg_methods)) { + if (is.null(log_reg_method)) { stop("Please provide one logistic regression method in the recipe. Accepted ", "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") } @@ -263,33 +232,27 @@ downscale_datasets <- function(recipe, data) { "Please provide six or more.") } - hcst_downscal <- list() - for (log_reg_method in strsplit(log_reg_methods, ", | |,")[[1]]) { - - if (!(log_reg_method %in% LOG_REG_METHODS)) { - stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", - "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) - } - - hcst_d <- CST_LogisticReg(data$hcst, data$obs, - target_grid = target_grid, - int_method = int_methods, - log_reg_method = log_reg_method, - probs_cat = c(1/3,2/3), - return_most_likely_cat = FALSE, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - loocv = loocv, - ncores = ncores) - - hcst_downscal[[ log_reg_method ]] <- hcst_d + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) } + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, + target_grid = target_grid, + int_method = int_method, + log_reg_method = log_reg_method, + probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + loocv = loocv, + ncores = ncores) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } } diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 38e23c8b..9ed0bf80 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -40,6 +40,6 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index 400448b7..0cad69ec 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -42,18 +42,18 @@ Analysis: Calibration: method: raw Skill: - metric: RPSS EnsCorr BSS90 Mean_Bias_SS + metric: BSS10 BSS90 Mean_Bias_SS RPSS ROCSS Downscaling: # Assumption 1: leave-one-out cross-validation is always applied # Assumption 2: for analogs, we select the best analog (minimum distance) # TO DO: add downscaling to point locations - type: int # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: bilinear nn conservative # optional, regridding method accepted by CDO + type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: # optional, regridding method accepted by CDO bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' lr_method: # optional, 'basic', 'large_scale', '4nn' log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO - nanalogs: 1 # optional, number of analogs to be searched + nanalogs: 4 # optional, number of analogs to be searched Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 17f58196..61f5de9a 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -8,8 +8,8 @@ source("modules/Visualization/tmp/PlotCombinedMap.R") plot_data <- function(recipe, data, - downscaled_data = NULL, calibrated_data = NULL, + downscaled_data = NULL, skill_metrics = NULL, probabilities = NULL, archive = NULL, @@ -25,8 +25,7 @@ plot_data <- function(recipe, # significance: Bool. Whether to include significance dots where applicable outdir <- paste0(get_dir(recipe), "plots/") - sapply(outdir, dir.create, showWarnings = FALSE, recursive = TRUE) - #dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + dir.create(outdir, showWarnings = FALSE, recursive = TRUE) if ((is.null(skill_metrics)) && (is.null(calibrated_data)) && (is.null(data$fcst)) && (is.null(downscaled_data))) { @@ -49,12 +48,11 @@ plot_data <- function(recipe, plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, significance) } - - # Plot forecast ensemble mean + + # Plot downscaled data if (!is.null(downscaled_data)) { - plot_downscaled_data(recipe, archive, downscaled_data, outdir) - #plot_ensemble_mean(recipe, archive, downscaled_data$exp, outdir) - } + plot_ensemble_mean(recipe, archive, downscaled_data$hcst$exp, outdir) + } # Plot forecast ensemble mean if (!is.null(calibrated_data$fcst)) { @@ -77,16 +75,6 @@ plot_data <- function(recipe, } } -plot_downscaled_data <- function(recipe, archive, data_ls, outdir) { - - methods <- names(data_ls) - for (method in methods) { - outdir_method <- grep(pattern = method, x = outdir, value = TRUE) - print(outdir_method) - plot_ensemble_mean(recipe, archive, fcst = data_ls[[method]]$exp, outdir = outdir_method) - } -} - plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, outdir, significance = F) { diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R index 12026bba..5f375abe 100644 --- a/modules/test_seasonal_downscaling.R +++ b/modules/test_seasonal_downscaling.R @@ -17,12 +17,27 @@ library(ClimProjDiags) source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/Utils.R') source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/CDORemap.R') +# Loop all downscaling methods and types +#types <- c('int', 'intbc', 'intlr', 'analogs', 'logreg') +#int_methods <- c('con', 'bil', 'bic', 'nn', 'con2') +#bc_methods <- c('simple_bias', 'calibration', 'quantile_mapping') +#lr_methods <- c('basic', 'large_scale', '4nn') +#logreg_methods <- c('ens_mean', 'ens_mean_sd', 'sorted_members') + +# Read recipe recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" recipe <- prepare_outputs(recipe_file) # Load datasets data <- load_datasets(recipe) +# Modify the recipe manually +recipe$Analysis$Workflow$Downscaling$type <- 'analogs' +recipe$Analysis$Workflow$Downscaling$int_method <- NULL # con, bil, bic, nn, con2 +recipe$Analysis$Workflow$Downscaling$bc_method <- NULL +recipe$Analysis$Workflow$Downscaling$lr_method <- NULL +recipe$Analysis$Workflow$Downscaling$log_reg_method <- NULL + # Downscale datasets downscaled_data <- downscale_datasets(recipe, data) @@ -30,21 +45,51 @@ downscaled_data <- downscale_datasets(recipe, data) # NOTE: LogReg method does not have member dimension because it provides directly the # predicted categories # The following line should work only for Interpolation, Intbc, Intlr, Analogs -stopifnot(recipe$Analysis$Workflow$Downscaling$type %in% c('int', 'intbc', 'intlr', 'analogs')) -skill_metrics <- lapply(downscaled_data$hcst, function(x) compute_skill_metrics(recipe, x$exp, x$obs)) +#stopifnot(recipe$Analysis$Workflow$Downscaling$type %in% c('int', 'intbc', 'intlr', 'analogs')) +#skill_metrics <- compute_skill_metrics(recipe, downscaled_data$hcst$exp, downscaled_data$hcst$obs) + +# Save downscaled data to later intercompare them +saveRDS(downscaled_data, file = paste0('/esarchive/scratch/jramon/downscaling/data/FOCUS/', + recipe$Analysis$Workflow$Downscaling$type, '-', + recipe$Analysis$Workflow$Downscaling$int_method, '-', + recipe$Analysis$Workflow$Downscaling$bc_method, '-', + recipe$Analysis$Workflow$Downscaling$lr_method, '-', + recipe$Analysis$Workflow$Downscaling$log_reg_method, '.RDS')) # Plot downscaled data -plot_data(recipe, downscaled_data = downscaled_data$hcst, data = NULL) +plot_data(recipe, data = NULL, downscaled_data = downscaled_data) -# Plot raw data +# Plot skill metrics my_data <- list() -my_data$fcst <- data$hcst -plot_data(recipe, data = my_data) +my_data$hcst <- downscaled_data$hcst$exp +plot_data(recipe, data = my_data, skill_metrics = skill_metrics) -# Plot skill data -my_data <- list() -my_data$hcst <- downscaled_data$hcst$bilinear$exp -plot_data(recipe, skill_metrics = skill_metrics$bilinear, data = my_data) #Plot skill metrics +# Plot Taylor diagram intercomparison +source('/esarchive/scratch/jramon/GitLab_jramon/nc2ts/taylor_diagram.R') +rds_files <- list.files(path = '/esarchive/scratch/jramon/downscaling/data/FOCUS', full.names = T) +down_data <- list() +methods <- c(list.files(path = '/esarchive/scratch/jramon/downscaling/data/FOCUS')) +for (nfile in seq(rds_files)) { + + # to plot all points in the same Taylor diagram + flag <- ifelse(nfile == 1, F, T) + + down_data[[nfile]] <- readRDS(rds_files[[nfile]]) + + #compute model ensemble mean + ens_mean <- Apply(down_data[[nfile]]$hcst$exp$data, 'ensemble', mean)$output1 + + # remove ensemble dimension in obs + obs <- Subset(down_data[[nfile]]$hcst$obs$data, along = 'ensemble', indices = 1, drop = 'selected') + + ens_mean <- .reorder_dims(arr_ref = obs, arr_to_reorder = ens_mean) + + plot <- taylor.diagram(ref = as.vector(obs), model = as.vector(ens_mean), normalize = TRUE, + add = flag, pch = nfile, sd.arcs = TRUE, maxsd = 1.5) + +} + +legend(1.3, 1.5, cex = 0.8, pt.cex = 1, legend = methods, pch = 1:length(rds_files), ncol = 1) ################################## # EXAMPLE FOR SEASONAL CALIBRATION -- GitLab From 70f94cdf152bc9a8b04d39b795b4ec16eaf16096 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 3 Mar 2023 10:36:40 +0100 Subject: [PATCH 09/21] Change one CSDownscale function for testing, fix Viz module --- modules/Downscaling/Downscaling.R | 14 +++++++------- .../recipe_system5c3s-tas_downscaling.yml | 18 +++++++++--------- modules/Visualization/Visualization.R | 5 ----- modules/test_seasonal.R | 10 ++++++---- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 1f814b74..4c32d09a 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -9,12 +9,12 @@ #░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intbc.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intlr.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Analogs.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/LogisticReg.R') +source('/esarchive/scratch/vagudets/repos/csdownscale/R/Utils.R') #source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") @@ -257,6 +257,6 @@ downscale_datasets <- function(recipe, data) { } } print(DOWNSCAL_MSG) - return(list(hcst = hcst_downscal, fcst = NULL)) + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index 0cad69ec..b5d57c6f 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -19,10 +19,10 @@ Analysis: freq: monthly_mean Datasets: System: - name: system5c3s + name: ECMWF-SEAS5 Multimodel: no Reference: - name: era5 + name: ERA5 Time: sdate: '0501' hcst_start: '2000' @@ -47,16 +47,16 @@ Analysis: # Assumption 1: leave-one-out cross-validation is always applied # Assumption 2: for analogs, we select the best analog (minimum distance) # TO DO: add downscaling to point locations - type: # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: # optional, regridding method accepted by CDO - bc_method: # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: # optional, 'basic', 'large_scale', '4nn' - log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: conservative # optional, regridding method accepted by CDO + bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' + lr_method: basic # optional, 'basic', 'large_scale', '4nn' + log_reg_method: ens_mean # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO nanalogs: 4 # optional, number of analogs to be searched Output_format: S2S4E Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index e7236da3..9e453bdc 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -48,11 +48,6 @@ plot_data <- function(recipe, significance) } - # Plot downscaled data - if (!is.null(downscaled_data)) { - plot_ensemble_mean(recipe, archive, downscaled_data$hcst$exp, outdir) - } - # Plot forecast ensemble mean if (!is.null(data$fcst)) { plot_ensemble_mean(recipe, archive, data$fcst, outdir) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index 44394cbd..bf784c1f 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,20 +1,22 @@ -setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") +source("modules/Downscaling/Downscaling.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" recipe <- prepare_outputs(recipe_file) # Load datasets data <- load_datasets(recipe) +# Downscale datasets +data <- downscale_datasets(recipe, data) # Calibrate datasets -calibrated_data <- calibrate_datasets(recipe, data) +# data <- calibrate_datasets(recipe, data) # Compute anomalies -calibrated_data <- compute_anomalies(recipe, calibrated_data) +# calibrated_data <- compute_anomalies(recipe, calibrated_data) # Compute skill metrics skill_metrics <- compute_skill_metrics(recipe, calibrated_data) # Compute percentiles and probability bins -- GitLab From c524fbe95c959abe226c145810b8640231cc6b2a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 3 Mar 2023 16:14:08 +0100 Subject: [PATCH 10/21] Restructure downscaling checks (WIP) --- modules/Downscaling/Downscaling.R | 186 +++++++++++++++++------------- 1 file changed, 103 insertions(+), 83 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 4c32d09a..1e8fb5d9 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -9,6 +9,7 @@ #░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN +## TODO: Move recipe checks to check_recipe() source('/esarchive/scratch/vagudets/repos/csdownscale/R/Interpolation.R') source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intbc.R') source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intlr.R') @@ -29,56 +30,60 @@ downscale_datasets <- function(recipe, data) { type <- tolower(recipe$Analysis$Workflow$Downscaling$type) if (!is.null(data$fcst)) { - warning("The downscaling will be only performed to the hindcast data") + warn(recipe$Run$logger, + "The downscaling will be only performed to the hindcast data") data$fcst <- NULL } if (type == "none") { - hcst_downscal <- data$hcst DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" - } else { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) - log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + logreg_method <- tolower(recipe$Analysis$Workflow$Downscaling$logreg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) - + ## TODO: Compute number of cores if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { ncores <- recipe$Analysis$ncores } - - #TO DO: add the parametre loocv where it corresponds + ## TODO: add the parametre loocv where it corresponds if (is.null(recipe$Analysis$loocv)) { loocv <- TRUE } else { loocv <- recipe$Analysis$loocv } - + # Define downscaling options DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") LR_METHODS <- c("basic", "large_scale", "4nn") - LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") if (!(type %in% DOWNSCAL_TYPES)) { - stop("Downscaling type in the recipe is not available. Accepted types ", - "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") - } - + error(recipe$Run$logger, + paste("Downscaling type in the recipe is not available.", + "Accepted entries are:", DOWNSCAL_TYPES)) + } + # Interpolation if (type == "int") { - if (is.null(int_method)) { - stop("Please provide one interpolation method in the recipe.") + # Check method + if (length(int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) } - + # Check target grid if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "target grid is provided in the recipe.")) } - + ## TODO: Move this check elsewhere # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the # observations @@ -86,6 +91,7 @@ downscale_datasets <- function(recipe, data) { lonmin <- data$hcst$lon[1] latmax <- data$hcst$lat[length(data$hcst$lat)] lonmax <- data$hcst$lon[length(data$hcst$lon)] + # Interpolate hcst hcst_downscal <- CST_Interpolation(data$hcst, points = NULL, method_remap = int_method, @@ -94,7 +100,7 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) - + # Interpolate obs obs_downscal <- CST_Interpolation(data$obs, points = NULL, method_remap = int_method, @@ -103,29 +109,34 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) - hcst_downscal$obs <- obs_downscal$exp - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intbc") { if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no interpolation method is provided.")) + stop() } - - if (is.null(bc_method)) { - stop("Please provide one bias-correction method in the recipe. Accepted ", - "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") + if (length(bc_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + stop() ## "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + if (length(target_grid) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no target grid is provided.")) + stop() } - if (!(bc_method %in% BC_METHODS)) { - stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", - "are 'simple_bias', 'calibration', 'quantile_mapping'.")) + error(recipe$Run$logger, + paste0(bc_method, " method in the recipe is not available.", + "The available methods are:", BC_METHODS)) + stop() } - + # Interpolate hcst and obs with bias correction hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, @@ -138,28 +149,32 @@ downscale_datasets <- function(recipe, data) { member_dim = "ensemble", region = NULL, ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "intlr") { + ## TODO: Logger if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no interpolation method is provided.")) + stop() } - - if (is.null(lr_method)) { - stop("Please provide one linear regression method in the recipe. Accepted ", - "methods are 'basic', 'large_scale', '4nn'.") + if (length(lr_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method is provided.")) + stop() } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + if (length(target_grid) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no target grid is provided.")) + stop() } - if (!(lr_method %in% LR_METHODS)) { - stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", - "are 'basic', 'large_scale', '4nn'.")) + error(recipe$Run$logger, + paste0("The accepted linear regression methods are:", LR_METHODS)) + stop() } - - # TO DO: add the possibility to have the element 'pred' in 'data' + ## TODO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { stop("Please provide the large scale predictors in the element 'data$pred$data'.") @@ -167,7 +182,7 @@ downscale_datasets <- function(recipe, data) { } else { data$pred$data <- NULL } - + # Interpolate hcst and obs with linear regression hcst_downscal <- CST_Intlr(data$hcst, data$obs, lr_method = lr_method, target_grid = target_grid, @@ -175,25 +190,23 @@ downscale_datasets <- function(recipe, data) { int_method = int_method, method_point_interp = NULL, predictors = data$pred$data, - lat_dim = "latitude", + lat_dim = "latitude", lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", + sdate_dim = "syear", + time_dim = "time", member_dim = "ensemble", large_scale_predictor_dimname = 'vars', loocv = loocv, - region = NULL, + region = NULL, ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { - - if (is.null(nanalogs)) { - warning("The number of analogs for searching has not been provided in the ", - "recipe. Setting it to 3.") + if (length(nanalogs) == 0) { + warn(recipe$Run$logger, + paste("The number of analogs for searching has not been provided", + "in the recipe. Setting it to 3.")) nanalogs <- 3 } - + # Apply analogs method to hcst and obs hcst_downscal <- CST_Analogs(data$hcst, data$obs, grid_exp = data$hcst$source_files[1], nanalogs = nanalogs, @@ -207,36 +220,43 @@ downscale_datasets <- function(recipe, data) { return_indices = FALSE, loocv_window = loocv, ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "logreg") { - if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method is provided.")) + stop() } - - if (is.null(log_reg_method)) { - stop("Please provide one logistic regression method in the recipe. Accepted ", - "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + if (length(logreg_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no logistic regression method is provided. The accepted", + "methods are:", LOG_REG_METHODS)) + stop() } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") + if (length(target_grid) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no target grid is provided.")) + stop() } - + ## Keep this check here # Since we are forcing to create three categories, and applying cross-validation, # we need at least six years of data for the logistic regression function to not # crash - if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { - stop("The number of start dates is insufficient for the logisitic regression method. ", - "Please provide six or more.") + if (dim(data$hcst$data)[["syear"]] < 6) { + error(recipe$Run$logger, + paste("The number of start dates is insufficient for the", + "logistic regression method. Please provide six or more.")) + stop() } - - if (!(log_reg_method %in% LOG_REG_METHODS)) { - stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", - "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) + if (!(logreg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste("The accepted methos for logistic regression are:", + LOGREG_METHODS)) + stop() } - + # Apply logistic regression to hcst and obs hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, target_grid = target_grid, int_method = int_method, @@ -253,10 +273,10 @@ downscale_datasets <- function(recipe, data) { loocv = loocv, ncores = ncores) - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } - } - print(DOWNSCAL_MSG) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + info(recipe$Run$logger, DOWNSCAL_MSG) return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } -- GitLab From a78ef83f6cb30dc7f567e8ab3e1172cc7f840f73 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Mar 2023 10:46:49 +0100 Subject: [PATCH 11/21] Rewrite downscaling checks --- modules/Downscaling/Downscaling.R | 12 ++++++------ .../recipe_system5c3s-tas_downscaling.yml | 6 +++--- modules/test_seasonal.R | 12 ++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 1e8fb5d9..f2a51b48 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -10,12 +10,12 @@ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN ## TODO: Move recipe checks to check_recipe() -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Interpolation.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intbc.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Intlr.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Analogs.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/LogisticReg.R') -source('/esarchive/scratch/vagudets/repos/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') #source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml index b5d57c6f..99e02c7d 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -50,10 +50,10 @@ Analysis: type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' int_method: conservative # optional, regridding method accepted by CDO bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: basic # optional, 'basic', 'large_scale', '4nn' - log_reg_method: ens_mean # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + lr_method: # optional, 'basic', 'large_scale', '4nn' + log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO - nanalogs: 4 # optional, number of analogs to be searched + nanalogs: # optional, number of analogs to be searched Output_format: S2S4E Run: Loglevel: INFO diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index bf784c1f..1f3b8b9b 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -14,15 +14,15 @@ data <- load_datasets(recipe) # Downscale datasets data <- downscale_datasets(recipe, data) # Calibrate datasets -# data <- calibrate_datasets(recipe, data) +data <- calibrate_datasets(recipe, data) # Compute anomalies -# calibrated_data <- compute_anomalies(recipe, calibrated_data) +# data <- compute_anomalies(recipe, data) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- compute_skill_metrics(recipe, data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data) +# probabilities <- compute_probabilities(recipe, data) # Export all data to netCDF -save_data(recipe, calibrated_data, skill_metrics, probabilities) +save_data(recipe, data, skill_metrics, probabilities) # Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, +plot_data(recipe, data, skill_metrics, probabilities, significance = T) -- GitLab From 3a788ac65c3d2dca10e6a9356f95e8084b4583b7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Mar 2023 16:12:15 +0100 Subject: [PATCH 12/21] Move checks from Downscaling.R to recipe checker --- modules/Downscaling/Downscaling.R | 97 +---------------------------- tools/check_recipe.R | 100 +++++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 95 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index f2a51b48..d8522b65 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -64,25 +64,8 @@ downscale_datasets <- function(recipe, data) { LR_METHODS <- c("basic", "large_scale", "4nn") LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") - if (!(type %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, - paste("Downscaling type in the recipe is not available.", - "Accepted entries are:", DOWNSCAL_TYPES)) - } # Interpolation if (type == "int") { - # Check method - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) - } - # Check target grid - if (is.null(target_grid)) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "target grid is provided in the recipe.")) - } ## TODO: Move this check elsewhere # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the @@ -112,31 +95,7 @@ downscale_datasets <- function(recipe, data) { hcst_downscal$obs <- obs_downscal$exp } else if (type == "intbc") { - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no interpolation method is provided.")) - stop() - } - if (length(bc_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) - stop() ## "methods are 'simple_bias', 'calibration', 'quantile_mapping'.") - } - if (length(target_grid) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no target grid is provided.")) - stop() - } - if (!(bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, - paste0(bc_method, " method in the recipe is not available.", - "The available methods are:", BC_METHODS)) - stop() - } - # Interpolate hcst and obs with bias correction + # Interpolate hcst and obs with bias correction hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, @@ -150,30 +109,6 @@ downscale_datasets <- function(recipe, data) { region = NULL, ncores = ncores) } else if (type == "intlr") { - ## TODO: Logger - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no interpolation method is provided.")) - stop() - } - if (length(lr_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method is provided.")) - stop() - } - if (length(target_grid) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no target grid is provided.")) - stop() - } - if (!(lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted linear regression methods are:", LR_METHODS)) - stop() - } ## TODO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { @@ -201,7 +136,7 @@ downscale_datasets <- function(recipe, data) { ncores = ncores) } else if (type == "analogs") { if (length(nanalogs) == 0) { - warn(recipe$Run$logger, + info(recipe$Run$logger, paste("The number of analogs for searching has not been provided", "in the recipe. Setting it to 3.")) nanalogs <- 3 @@ -221,41 +156,15 @@ downscale_datasets <- function(recipe, data) { loocv_window = loocv, ncores = ncores) } else if (type == "logreg") { - if (length(int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method is provided.")) - stop() - } - if (length(logreg_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no logistic regression method is provided. The accepted", - "methods are:", LOG_REG_METHODS)) - stop() - } - if (length(target_grid) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no target grid is provided.")) - stop() - } - ## Keep this check here # Since we are forcing to create three categories, and applying cross-validation, # we need at least six years of data for the logistic regression function to not # crash if (dim(data$hcst$data)[["syear"]] < 6) { error(recipe$Run$logger, - paste("The number of start dates is insufficient for the", + paste("The number of years of data is insufficient for the", "logistic regression method. Please provide six or more.")) stop() } - if (!(logreg_method %in% LOGREG_METHODS)) { - error(recipe$Run$logger, - paste("The accepted methos for logistic regression are:", - LOGREG_METHODS)) - stop() - } # Apply logistic regression to hcst and obs hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, target_grid = target_grid, diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 964c4446..617bd169 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -222,7 +222,9 @@ check_recipe <- function(recipe) { # WORKFLOW CHECKS # --------------------------------------------------------------------- - # Only one Calibration method allowed: + # Calibration + # If 'method' is FALSE/no/'none' or NULL, set to 'raw' + ## TODO: Review this check if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || @@ -257,6 +259,102 @@ check_recipe <- function(recipe) { error_status <- T } } + # Downscaling + ## TODO: Simplify checks (reduce number of lines) + downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) + # Define accepted entries + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") + LR_METHODS <- c("basic", "large_scale", "4nn") + LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + # Check downscaling type + ## TODO: Consider 'NULL' case + if (!((downscal_params$type) %in% DOWNSCAL_TYPES)) { + error(recipe$Run$logger, + paste0("The type of Downscaling request in the recipe is not ", + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) + error_status <- T + } + if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && + (is.null(downscal_params$target_grid))) { + error(recipe$Run$logger, + paste("A target grid is required for the downscaling method", + "requested in the recipe.")) + error_status <- T + } + if (downscal_params$type == "int") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) + error_status <- T + } + } else if (downscal_params$type == "intbc") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested in the recipe, but no", + "interpolation method is provided.")) + error_status <- T + } + if (is.null(downscal_params$bc_method)) { + error(recipe$Run$logger, + paste0("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + error_status <- T + } else if (!(downscal_params$bc_method %in% BC_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted Bias Correction methods for the downscaling", + "module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "intlr") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (is.null(downscal_params$lr_method)) { + error(recipe$Run$logger, + paste0("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method was provided.")) + error_status <- T + } else if (!(downscal_params$lr_method %in% LR_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted linear regression methods for the", + "downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "analogs") { + if (length(nanalogs) == 0) { + warn(recipe$Run$logger, + paste("Downscaling type is 'analogs, but the number of analogs", + "has not been provided in the recipe.")) + } + } else if (downscal_params$type == "logreg") { + if (is.null(downscal_params$int_method)) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (is.null(downscal_params$log_reg_method)) { + error(recipe$Run$logger, + paste0("Downscaling type 'logreg' was requested in the recipe, but", + "no logistic regression method is provided. The accepted", + "methods are:", + paste(LOG_REG_METHODS, collapse = ", "), ".")) + error_status <- T + } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted logistic regression methods for the ", + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T + } + } # Skill if (("Skill" %in% names(recipe$Analysis$Workflow)) && (is.null(recipe$Analysis$Workflow$Skill$metric))) { -- GitLab From bdbe693e7dbd9f6f90288caaac8ddda0b9087971 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 11:50:22 +0100 Subject: [PATCH 13/21] Refine downscaling checks --- tools/check_recipe.R | 166 ++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 617bd169..b2c62e44 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,8 +1,6 @@ check_recipe <- function(recipe) { - # recipe: yaml recipe already read it - ## TODO: Adapt to decadal case - + ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- @@ -268,92 +266,97 @@ check_recipe <- function(recipe) { LR_METHODS <- c("basic", "large_scale", "4nn") LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") # Check downscaling type - ## TODO: Consider 'NULL' case - if (!((downscal_params$type) %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, - paste0("The type of Downscaling request in the recipe is not ", - "available. It must be one of the following: ", - paste(DOWNSCAL_TYPES, collapse = ", "), ".")) - error_status <- T - } - if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && - (is.null(downscal_params$target_grid))) { - error(recipe$Run$logger, - paste("A target grid is required for the downscaling method", - "requested in the recipe.")) - error_status <- T - } - if (downscal_params$type == "int") { - if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) - error_status <- T - } - } else if (downscal_params$type == "intbc") { - if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested in the recipe, but no", - "interpolation method is provided.")) - error_status <- T - } - if (is.null(downscal_params$bc_method)) { - error(recipe$Run$logger, - paste0("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) - error_status <- T - } else if (!(downscal_params$bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted Bias Correction methods for the downscaling", - "module are: ", paste(BC_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "intlr") { - if (is.null(downscal_params$int_method)) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no interpolation method was provided.")) - error_status <- T - } - if (is.null(downscal_params$lr_method)) { - error(recipe$Run$logger, - paste0("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method was provided.")) - error_status <- T - } else if (!(downscal_params$lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted linear regression methods for the", - "downscaling module are: ", - paste(LR_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "analogs") { - if (length(nanalogs) == 0) { + if ("type" %in% names(downscal_params)) { + if (length(downscal_params$type) == 0) { + downscal_params$type <- "none" warn(recipe$Run$logger, - paste("Downscaling type is 'analogs, but the number of analogs", - "has not been provided in the recipe.")) + paste("Downscaling 'type' is empty in the recipe, setting it to", + "'none'.")) } - } else if (downscal_params$type == "logreg") { - if (is.null(downscal_params$int_method)) { + if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method was provided.")) + paste0("The type of Downscaling request in the recipe is not ", + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) error_status <- T } - if (is.null(downscal_params$log_reg_method)) { + if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && + (length(downscal_params$target_grid) == 0)) { error(recipe$Run$logger, - paste0("Downscaling type 'logreg' was requested in the recipe, but", - "no logistic regression method is provided. The accepted", - "methods are:", - paste(LOG_REG_METHODS, collapse = ", "), ".")) - error_status <- T - } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted logistic regression methods for the ", - "downscaling module are: ", - paste(LOGREG_METHODS, collapse = ", "), ".")) + paste("A target grid is required for the downscaling method", + "requested in the recipe.")) error_status <- T } + if (downscal_params$type == "int") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) + error_status <- T + } + } else if (downscal_params$type == "intbc") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested in the recipe, but no", + "interpolation method is provided.")) + error_status <- T + } + if (length(downscal_params$bc_method)== 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + error_status <- T + } else if (!(downscal_params$bc_method %in% BC_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted Bias Correction methods for the downscaling", + " module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "intlr") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (length(downscal_params$lr_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method was provided.")) + error_status <- T + } else if (!(downscal_params$lr_method %in% LR_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted linear regression methods for the", + " downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "analogs") { + if (length(nanalogs) == 0) { + warn(recipe$Run$logger, + paste("Downscaling type is 'analogs, but the number of analogs", + "has not been provided in the recipe.")) + } + } else if (downscal_params$type == "logreg") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (length(downscal_params$log_reg_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe,", + "but no logistic regression method is provided.")) + error_status <- T + } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted logistic regression methods for the ", + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T + } + } } # Skill if (("Skill" %in% names(recipe$Analysis$Workflow)) && @@ -380,6 +383,7 @@ check_recipe <- function(recipe) { # RUN CHECKS # --------------------------------------------------------------------- + ## TODO: These checks should probably go first RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") -- GitLab From e2a8a33bf99c67901e6e16bd982c02f70fd48f48 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 13:27:47 +0100 Subject: [PATCH 14/21] Add CSDownscale functions --- modules/Downscaling/tmp/Analogs.R | 537 +++++++++++++++++ modules/Downscaling/tmp/Intbc.R | 325 +++++++++++ modules/Downscaling/tmp/Interpolation.R | 739 ++++++++++++++++++++++++ modules/Downscaling/tmp/Intlr.R | 525 +++++++++++++++++ modules/Downscaling/tmp/LogisticReg.R | 497 ++++++++++++++++ modules/Downscaling/tmp/Utils.R | 31 + 6 files changed, 2654 insertions(+) create mode 100644 modules/Downscaling/tmp/Analogs.R create mode 100644 modules/Downscaling/tmp/Intbc.R create mode 100644 modules/Downscaling/tmp/Interpolation.R create mode 100644 modules/Downscaling/tmp/Intlr.R create mode 100644 modules/Downscaling/tmp/LogisticReg.R create mode 100644 modules/Downscaling/tmp/Utils.R diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R new file mode 100644 index 00000000..fd6e2f97 --- /dev/null +++ b/modules/Downscaling/tmp/Analogs.R @@ -0,0 +1,537 @@ +#'@rdname CST_Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an 's2dv' object with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv' object with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. +#'The object is expected to be already subset for the desired region. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'@export +CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", + lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", + region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = 1) { + + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, grid_exp = grid_exp, + nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, + region = region, return_indices = return_indices, loocv_window = loocv_window, + ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude, start date and time. The object +#'is expected to be already subset for the desired region. Optionally, 'obs' can have the +#'dimension 'window', containing the sampled fields into which the function will look for +#'the analogs. See function 'generate_window()'. Otherwise, the function will look for +#'analogs using all the possible fields contained in obs. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. +#'@param nanalogs an integer indicating the number of analogs to be searched. +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. The indices refer to the position of the +#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will +#'refer to the position of the element in the dimension 'window'. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@import multiApply +#'@import CSTools +#'@importFrom s2dv InsertDim CDORemap +#'@importFrom FNN get.knnx +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') +#'@export +Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs2 = NULL, + obs2_lats = NULL, obs2_lons = NULL, nanalogs = 3, fun_analog = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, return_indices = FALSE, + loocv_window = TRUE, ncores = 1) { + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(grid_exp, 'character')) { + stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + + if (!inherits(nanalogs, 'numeric')) { + stop("Parameter 'nanalogs' must be of the class 'numeric'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(time_dim, 'character')) { + stop("Parameter 'time_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) { + stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'time_dim'") + } + + # Ensure we have enough data to interpolate from high-res to coarse grid + #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | + # (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { + + # stop("There are not enough data in 'obs'. Please to add more latitudes or ", + # "longitudes.") + #} + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (!is.null(obs2)) { + # the code is not yet prepared to handle members in the observations + if (member_dim %in% names(dim(obs2))) { + if (identical(as.numeric(dim(obs2)[member_dim]), 1)) { + obs2 <- ClimProjDiags::Subset(x = obs2, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs2' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (is.null(obs2_lats) | is.null(obs2_lons)) { + stop("Missing latitudes and/or longitudes for the provided training observations. Please ", + "provide them with the parametres 'obs2_lats' and 'obs2_lons'") + } + + if (is.na(match(lon_dim, names(dim(obs2))))) { + stop("Missing longitude dimension in 'obs2', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(obs2))))) { + stop("Missing latitude dimension in 'obs2', or does not match the parameter 'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(obs2))))) { + stop("Missing start date dimension in 'obs2', or does not match the parameter 'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(obs2))))) { + stop("Missing time dimension in 'obs2', or does not match the parameter 'time_dim'") + } + } + + # Select a function to apply to the analogs selected for a given observation + if (!is.null(fun_analog)) { + stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + } + + if (!is.null(obs2)) { + obs_train <- obs2 + obs_train_lats <- obs2_lats + obs_train_lons <- obs2_lons + } else { + obs_train <- obs + obs_train_lats <- obs_lats + obs_train_lons <- obs_lons + } + + # Correct indices later if cross-validation + loocv_correction <- FALSE + if ( !("window" %in% names(dim(obs_train))) & loocv_window) { + loocv_correction <- TRUE + } + + #----------------------------------- + # Interpolate high-res observations to the coarse grid + #----------------------------------- + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and ", + "'exp_lons'.") + region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) + } + + obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, + target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = "conservative", region = region) + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + region = region)$data + } else { + exp_interpolated <- exp + } + + # Create window if user does not have it in the training observations + if ( !("window" %in% names(dim(obs_interpolated$data))) ) { + obs_train_interpolated <- generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, + time_dim = time_dim, loocv = loocv_window) + obs_hres <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) + } + + #----------------------------------- + # Reshape train and test + #----------------------------------- + res.data <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog), ncores = ncores)$output1 + + # Return the indices of the best analogs + if (return_indices) { + res.ind <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 + + # If cross-validation has been applied, correct the indices + if (loocv_correction) { + nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] + ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] + res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) + sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), + output_dims = c("ind", sdate_dim))$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats) + } + else { + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) + } + + return(res) +} + +# For each element in test, find the indices of the k nearest neigbhors in train +.analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { + + require(FNN) + + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) + # test dim: 2 dimensions lat and lon (in this order) + # Number of lats/lons of the high-resolution data + space_dims_hres <- dim(obs_hres)[c(2,3)] + + # Reformat train and test as an array with (time, points) + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + test <- as.vector(test) + obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" + + # Identify and remove NA's + idx_na_tr <- is.na(train[ , 1]) + idx_na_te <- is.na(test) + idx_na <- idx_na_tr | idx_na_te + tr_wo_na <- t(train[!idx_na , ]) + te_wo_na <- test[!idx_na] + te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") + + knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) + + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + + # Either return only the indices or the analogs + if (return_indices) { + res <- as.numeric(idx) + } else { + res <- obs_hres[ , idx] + dim(res) <- c(space_dims_hres, analogs = k) + + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + weight <- 1 / dist + res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else { + res <- apply(res, c(1,2), fun_analog) + } + } + } + + return(res) +} + +# Add the dimension window to an array that contains, at least, the start date and time +# dimensions +# object has at least dimensions sdate and time +generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { + + rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] + ntimes <- dim(obj)[names(dim(obj)) == time_dim] + rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] + + # Generate a window containing all the data + if (is.null(size)) { + + # Generate window removing one start date each time (leave-one-out cross-validation) + if (loocv) { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), + output_dims = c('window', sdate_dim))$output1 + # Generate window without cross-validation + } else { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = 'window')$output1 + } + } + # Generate window of the size specified by the user. Only applied with CV + else { + # For an accurate generation of the window, it is mandatory to add some "extra" data. + if (!("smonth" %in% names(dim(obj)))) { + stop("Missing 'smonth' dimension") + } + + # Concatenate data from previous, target and posterior months + obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), + fun = as.vector, output_dims = "time")$output1 + + if (loocv) { + obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), + fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), + output_dims = 'window')$output1 + names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) + } else { + obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), + output_dims = c('window', time_dim))$output1 + + } + } + + return(obj_window) +} + + + + + + + + diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R new file mode 100644 index 00000000..86bb5a9c --- /dev/null +++ b/modules/Downscaling/tmp/Intbc.R @@ -0,0 +1,325 @@ +#'@rdname CST_Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export + +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], + method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) + +} + +#'@rdname Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import CSTools +#' +#'@seealso \code{\link[CSTools]{BiasCorrection}} +#'@seealso \code{\link[CSTools]{Calibration}} +#'@seealso \code{\link[CSTools]{QuantileMapping}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = 1, ...) { + + if (!inherits(bc_method, 'character')) { + stop("Parameter 'bc_method' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") + } + + if (!(bc_method %in% c('sbc', 'cal', 'qm', 'dbc', 'simple_bias', 'calibration', + 'quantile_mapping', 'dynamical_bias'))) { + stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", + "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", + "'sbc', 'cal', 'qm' can also be used.") + } + + if (!is.null(points) & is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # Some functions only accept the dimension names "member" and "sdate" for exp and + # "sdate" for obs + #if (member_dim != 'member') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == member_dim), 'member') + #} + + #if (sdate_dim != 'sdate') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') + # names(dim(obs_ref)) <- replace(names(dim(obs_ref)), + # which(names(dim(obs_ref)) == sdate_dim), 'sdate') + #} + + if (bc_method == 'sbc' | bc_method == 'simple_bias') { + if (dim(obs_ref)[sdate_dim] == 1) { + warning('Simple Bias Correction should not be used with only one observation. Returning NA.') + } + + res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'cal' | bc_method == 'calibration') { + if (dim(exp_interpolated$data)[member_dim] == 1) { + stop('Calibration must not be used with only one ensemble member.') + } + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, + memb_dim = member_dim, sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # the temporal dimension must be only one dimension called "time" + if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { + exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = "time")$output1 + } + if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { + obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, + output_dims = "time")$output1 + } + # REMEMBER to add na.rm = T in colMeans in .proxiesattractor + res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) + } + + # Return a list of three elements + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + + + + diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R new file mode 100644 index 00000000..e594691f --- /dev/null +++ b/modules/Downscaling/tmp/Interpolation.R @@ -0,0 +1,739 @@ +#'@rdname CST_Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp s2dv object containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The field data is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An s2dv object containing the dowscaled field. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, + lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + #if (is.null(exp[[lat_dim]]) | is.null(exp[[lon_dim]])) { + # stop("The name of the latitude/longitude elements in 'exp' must match the parametres ", + # "'lat_dim' and 'lon_dim'") + #} + + if ((length(which(names(dim(exp$data)) == lat_dim)) == 0) | (length(which(names(dim(exp$data)) == lon_dim)) == 0)) { + stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") + } + + res <- Interpolation(exp = exp$data, lats = exp$lat, lons = exp$lon, + source_file = exp$source_files[1], points = points, + method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, + lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + res_s2dv <- list(exp = exp, obs = NULL) + return(res_s2dv) +} + +#'@rdname Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The object is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param lats a numeric vector containing the latitude values. Latitudes must range from +#'-90 to 90. +#'@param lons a numeric vector containing the longitude values. Longitudes can range from +#'-180 to 180 or from 0 to 360. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling +#'is to a point location. +#'@import multiApply +#'@import plyr +#'@importFrom s2dv CDORemap +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'res <- Interpolation(exp = exp, lats = lats, lons = lons, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, + target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!is.null(method_remap)) { + if (!inherits(method_remap, 'character')) { + stop("Parameter 'method_remap' must be of the class 'character'") + } + } + + if (!is.null(method_point_interp)) { + if (!inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") + } + + # Check for negative latitudes in the exp data + if (any(lats < -90 | lats > 90) ) { + stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") + } + + # checkings for the case of interpolation to point locations + if (!is.null(points)) { + if (!inherits(points, 'list')) { + stop("Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes.") + } + + if (is.null(method_point_interp)) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + if (!(method_point_interp %in% c('nearest', 'bilinear', '9point', 'invdist4nn', 'NE', 'NW', 'SE', 'SW'))) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + # Points must be a list of two elements + if (length(points) != 2) { + stop("'points' must be a lis of two elements containing the point ", + "latitudes and longitudes in the form 'points$lat', 'points$lon'") + } + + # The names of the two elements must be 'lat' and 'lon' + if (any(!(c(lat_dim, lon_dim) %in% names(points)))) { + stop("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'") + } + + # Check that the number of latitudes and longitudes match + if (length(unique(lengths(points))) != 1L) { + stop("The number of latitudes and longitudes must match") + } + + # Check for negative latitudes in the point coordinates + if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { + stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", + "-90 to 90") + } + + if (is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + } else { + if (is.null(method_remap)) { + stop("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(target_grid)) { + stop("Parameter 'target_grid' can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + } + + #---------------------------------- + # Limits of the region defined by the model data + #---------------------------------- + # for the case when region limits are not passed by the user + # regions contains the following elements in order: lonmin, lonmax, latmin, latmax + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'lats' and 'lons'.") + region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) + } + + # Ensure points to be within the region limits + if (!is.null(points)) { + if (any(points[[lat_dim]] > region[4]) | any(points[[lat_dim]] < region[3]) | + any(points[[lon_dim]] > region[2]) | any(points[[lon_dim]] < region[1])) { + stop("At least one of the points lies outside the model region") + } + } + + #---------------------------------- + # Map regrid with CDO + #---------------------------------- + if (is.null(points)) { + res <- CDORemap(data_array = exp, + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) + + # Return a list + res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) + + #---------------------------------- + # Interpolate to point locations + #---------------------------------- + } else { + # First create interpolation weights, depending on the chosen method + weights <- create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), + lats = points[[lat_dim]], lons = points[[lon_dim]], + method = method_point_interp, region = list(lat_min = region[3], + lat_max = region[4], lon_min = region[1], lon_max = region[2])) + + # Select coarse-scale data to be interpolated + model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp) + + # Interpolate model data to point locations + res <- interpolate_data(model_data_gridpoints, weights) + + # Return a list + res <- list(data = res, obs = NULL, lon = points[[lon_dim]], lat = points[[lat_dim]]) + } + + return(res) +} + +#====================== +# Compute weights for interpolation at several (lat,lon) positions +# We assume that grid boxes are centered in the grid point. +#====================== +create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, + method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", + "NW", "SE", "SW")) +{ + # crop the region to get the correct weights - save temporary file + nc_cropped1 <- paste0('tmp_cropped_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + nc_cropped2 <- paste0('tmp_cropped2_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + + system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, + ',', region$lat_max, ' ', ncfile, ' ', nc_cropped1)) + + #---------------- + # Read grid description and compute (i,j) of requested locations (including decimals) + #---------------- + griddes <- get_griddes(nc_cropped1) + + if (is.null(griddes$yinc)) { + system(paste0('rm ', nc_cropped1)) + stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", + "downscaling to point locations.") + } + + # If latitudes are decreasingly ordered, revert them + if (griddes$yinc < 0) { + system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) + griddes <- get_griddes(nc_cropped2) + } + # remove temporary files + system(paste0('rm ', nc_cropped1)) + system(paste0('rm ', nc_cropped2)) + + if (is.null(griddes)) { + stop("'griddes' not found in the NetCDF source files") + } + + gridpoints <- latlon2ij(griddes, lats, lons) + + #---------------- + # Compute the weights according to the selected method + #---------------- + if(method == "nearest") { + #---------------- + # Round i and j to closest integer. Weight is always 1. + #---------------- + + # | | | + # -+-----+-----+- + # | x| | + # | a | | + # | | | + # -+-----+-----+- + # | | | + + centeri <- round(gridpoints$i,0) + centeri[centeri == griddes$xsize+1] <- 1 # close longitudes + + weights.df <- data.frame(locid = locids, + lat = lats, + lon = lons, + rawi = gridpoints$i, + rawj = gridpoints$j, + i = centeri, + j = round(gridpoints$j, 0), + weight = 1) + } else if (method %in% c("bilinear","invdist4nn")) { + #---------------- + # Get the (i,j) coordinates of the 4 points (a,b,c,d) around x. + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + lowi <- floor(gridpoints$i) + highi <- ceiling(gridpoints$i) + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- floor(gridpoints$j) + highj <- ceiling(gridpoints$j) + # Note: highi and lowi are the same if i is integer + # Note: highj and lowj are the same if j is integer + + #---------------- + # Get x position wrt ad and ab axes (from 0 to 1) + #---------------- + pcti <- gridpoints$i - lowi + pctj <- gridpoints$j - lowj + + #---------------- + # Compute weights for a,b,c,d grid points + #---------------- + if(method == "bilinear") { + wa = (1 - pcti) * (1 - pctj) + wb = (1 - pcti) * pctj + wc = pcti * pctj + wd = pcti * (1 - pctj) + } else if(method == "invdist4nn") { + #---------------- + # Note: the distance is computed in the (i,j) space. + # Note2: this method does not guarantees a continuous interpolation. + # Use bilinear if that's desirable. + # When x is on the ab line, c and d would be used. In the limit of x + # being just left of ab other points would be used. + # Here we just dropped c and d coeffs when over ab. Same for ad line, + # b and c coeffs dropped. This prevents repeated nodes. + #---------------- + ida = 1 / sqrt(pcti^2 + pctj^2) + idb = 1 / sqrt(pcti^2 + (1 - pctj)^2) + idc = 1 / sqrt((1-pcti)^2 + (1-pctj)^2) + idd = 1 / sqrt((1-pcti)^2 + pctj^2) + idb[pctj == 0] <- 0; + idc[pctj == 0] <- 0; + idc[pcti == 0] <- 0; + idd[pcti == 0] <- 0; + + #---------------- + # Normalize vector of inverse distances + #---------------- + invdist <- cbind(ida, idb, idc, idd) + print(invdist) + w <- t(apply(invdist, 1, function(x) { print(x); if(any(is.infinite(x))) { + x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) + print(w) + + wa = w[ , 1] + wb = w[ , 2] + wc = w[ , 3] + wd = w[ , 4] + } + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats,lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = wa) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = wb) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = wc) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = wd) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df) + } else if(method == "9point") { + #---------------- + # Get the (i,j) coordinates of the 9 points (a,b,...,i) around x + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | | + #-+-----+-----+-----+- + # | | | | + # | c | f | i | + # | | | | + #-+-----+-----+-----+- + # | | x| | + # | b | e | h | + # | | | | + #-+-----+-----+-----+- + # | | | | + # | a | d | g | + # | | | | + #-+-----+-----+-----+- + # | | | | + + centeri <- round(gridpoints$i, 0) + centeri[centeri == griddes$xsize + 1] <- 1 + centerj <- round(gridpoints$j, 0) + lowi <- centeri - 1 + highi <- centeri + 1 + lowi[lowi == 0] <- griddes$xsize # close the longitudes + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- centerj - 1 + highj <- centerj + 1 + + #---------------- + # For the north and south pole do a 6-point average + #---------------- + w_highj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,0 ,1/9)) + w_centerj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,1/6,1/9)) + w_lowj <- ifelse(centerj == 1,0 ,ifelse(centerj == griddes$ysize,1/6,1/9)) + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = w_lowj) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = centerj, weight = w_centerj) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = w_highj) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = lowj, weight = w_lowj) + weightse.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = centerj, weight = w_centerj) + weightsf.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = highj, weight = w_highj) + weightsg.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = w_lowj) + weightsh.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = centerj, weight = w_centerj) + weightsi.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = w_highj) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df, weightse.df, + weightsf.df, weightsg.df, weightsh.df, weightsi.df) + } else if(method %in% c("NE", "NW", "SW", "SE")) { + #---------------- + # Find if increasing i and j increases or decreases lat and lon + #---------------- + westtoeast <- (griddes$xinc > 0) + southtonorth <- T + if(griddes$gridtype == "gaussian") { + # We assume gaussian grid latitudes are ordered north to south + southtonorth <- F + } else { #lonlat + if(griddes$yinc < 0) {southtonorth <- F} + } + + #---------------- + # Get the (i,j) coordinates of the desired point (a,b,c or d) around x + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + if(substr(method,1,1) == "N" & southtonorth == T) { selj <- ceiling(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == T) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "N" & southtonorth == F) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == F) { selj <- ceiling(gridpoints$j) } + + if(substr(method,2,2) == "E" & westtoeast == T) {seli <- ceiling(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == T) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "E" & westtoeast == F) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == F) {seli <- ceiling(gridpoints$i) } + + seli[seli == griddes$xsize + 1] <- 1 # close the longitudes + + weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = seli, j = selj, weight = 1) + } else { + stop(paste0("Method " ,method, " not implemented")) + } + + #---------------- + # Order by locid and remove lines with 0 weight + # This also removes some duplicates in the bilinear/invdist4nn methods when i + # or j is a whole number, or in the 9-point method when at the poles. + #---------------- + weights.df <- weights.df[order(weights.df$locid), ] + weights.df <- weights.df[weights.df$weight != 0, ] + + #---------------- + # Add as attributes the method and the nc file used to compute the weights + #---------------- + attributes(weights.df)$nc_file <- normalizePath(ncfile) + attributes(weights.df)$method <- method + + return(weights.df) +} + +#====================== +# Compute (i,j) from (lat,lon). +# Works only for 'lonlat' and 'gaussian' grids. +# Grids are supposed to cover whole globe. +#====================== +latlon2ij <- function(griddes, lats, lons) { + #------------ + # Check input params + #------------ + if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} + if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} + if((griddes$xfirst > 180) & (any(lons < 0))) { + stop("Please use the same convention for the longitudes in the source file and the ", + "longitude values in 'points'.") + } + #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} + # no need to resize lons to [0,360) + + #------------ + # Compute i (with decimals) + # i lies in [1,xsize+1) + # %% gives the remainder + #------------ + gridpoints <- list() + gridpoints$i <- 1 + (((lons - griddes$xfirst) / griddes$xinc) %% griddes$xsize) + + #------------ + # Compute j (with decimals) + #------------ + if(griddes$gridtype=='lonlat') { + gridpoints$j <- 1 + (lats - griddes$yfirst) / griddes$yinc + } else if(griddes$gridtype == 'gaussian') { + # We assume gaussian grid latitudes are ordered north to south + # findInterval can only work with monotonic ascending values so we revert twice + northj <- griddes$ysize-findInterval(lats, -griddes$yvals) + southj <- northj + 1 + + # Special case: We are north of the first lat + gridpoints$j[northj == 0] <- 1 + + # Special case: We are south of the last lat + gridpoints$j[southj == griddes$ysize + 1] <- griddes$ysize + + # Generic case + ok_idx <- !(northj == 0 | southj == griddes$ysize+1) + gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - + lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) + } else { stop("Unsupported grid") } + + return(gridpoints) +} + +#====================== +# Use cdo griddes to obtain grid information +#====================== +get_griddes <- function(ncfile) { + tmp <- system(paste0("cdo griddes ", ncfile, + " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) + arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) + griddes <- as.list(arr[,2]) + names(griddes) <- arr[,1] + + if(griddes$gridtype == "gaussian") { + griddes$yvals <- get_lats(ncfile) + } + + # Convert some fields to numeric. Ensures all fields are present. + for(nm in c("xsize", "ysize", "xfirst", "yfirst", "xinc", "yinc")) { + griddes[[nm]] <- ifelse(is.null(griddes[[nm]]), NA, as.numeric(griddes[[nm]])) + } + + return(griddes) +} + +#====================== +# Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". +#====================== +get_lats <- function(ncfile) { + + tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) + + if(!is.null(attributes(tmp)$status)) { + tmp <- system(paste0('ncks -H -s "%f " -v lat ',ncfile),intern=T) + } + + lats <- as.numeric(strsplit(tmp[1],"\\s+",perl=T)[[1]]) + + return(lats) +} + +#====================== +# Load model data at all (i,j) pairs listed in the weights dataframe. +# Uses StartR. All ... parameters go to Start (i.e. specify dat, var, +# sdate, time, ensemble, num_procs, etc) +#====================== +get_model_data <- function(weights.df, mdata) { + + #----------------- + # Get data for all combinations of i and j. + # (inefficient, getting many unneded pairs). + # Avoid retrieving duplicates with unique() + # These are the indices of the global grid + #----------------- + is <- weights.df$i + js <- weights.df$j + + #----------------- + # Get indices of original is and js in unique(is),unique(js) that were requested + #----------------- + idxi <- match(is, unique(is)) + idxj <- match(js, unique(js)) + + #----------------- + # Subsample mdata to keep only the needed (i,j) pairs. + #----------------- + if (is.na(match("longitude", names(dim(mdata))))) { + londim <- match("lon", names(dim(mdata))) + } else { + londim <- match("longitude", names(dim(mdata))) + } + if (is.na(match("latitude", names(dim(mdata))))) { + latdim <- match("lat", names(dim(mdata))) + } else { + latdim <- match("latitude", names(dim(mdata))) + } + + # trick: exchange idxi and idxj + #if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } + #keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] + + #sub_mdata <- apply(mdata, keepdims, function(x) { + # laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + #names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Retrieve with multiApply + #----------------- + sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 + names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Return an array that contains as many gridpoints as (i,j) pairs were requested + #----------------- + return(sub_mdata) +} + +#====================== +# Multiply the grid-point series by the weights, +# to obtain the desired interpolations +#====================== +interpolate_data <- function(model_data, weights.df) { + #----------------- + # Multiply each gridpoint matrix by its corresponding weight + #----------------- + gpdim <- match("gridpoint", names(dim(model_data))) + weighted_data <- sweep(model_data, gpdim, weights.df$weight, "*") + + #----------------- + # Sum all series that belong to same interpolation point + # Return an array that contains the requested locations and interpolation type + #----------------- + #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) + #names(dim(interp_data))[1] <- "location" + interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { + rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"))$output1 + + return(interp_data) +} + + diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R new file mode 100644 index 00000000..565e3046 --- /dev/null +++ b/modules/Downscaling/tmp/Intlr.R @@ -0,0 +1,525 @@ +#'@rdname CST_Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, + method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", member_dim = "member", + large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, points = points, source_file_exp = exp$source_files[1], + source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, + int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, + member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, + loocv = loocv, region = region, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude and start date. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, + ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(lr_method, 'character')) { + stop("Parameter 'lr_method' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (!is.null(points) & (is.null(source_file_exp) | is.null(source_file_obs))) { + stop("No source files found. Source files for exp and obs must be provided in the parameters ", + "'source_file_exp' and 'source_file_obs', respectively.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + # sdate must be the time dimension in the input data + stopifnot(sdate_dim %in% names(dim(exp))) + stopifnot(sdate_dim %in% names(dim(obs))) + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + restore_ens <- TRUE + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + # checkings for the parametre 'predictors' + if (!is.null(predictors)) { + if (!is.array(predictors)) { + stop("Parameter 'predictors' must be of the class 'array'") + } else { + # ensure the predictor variable name matches the parametre large_scale_predictor_dimname + stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) + stopifnot(sdate_dim %in% names(dim(predictors))) + stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) + } + } + + #----------------------------------- + # Interpolation + #----------------------------------- + if (lr_method != '4nn') { + + if (is.null(int_method)) { + stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", + "Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + obs_interpolated <- obs_interpolated$data + } else { + obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons + } + } + + #----------------------------------- + # Linear regressions + #----------------------------------- + # Pointwise linear regression + # Predictor: model data + # Predictand: observations + if (lr_method == 'basic') { + predictor <- exp_interpolated$data + predictand <- obs_interpolated + + target_dims_predictor <- sdate_dim + target_dims_predictand <- sdate_dim + } + + # (Multi) linear regression with large-scale predictors + # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations + # Predictand: model data + else if (lr_method == 'large-scale') { + if (is.null(predictors)) { + stop("The large-scale predictors must be passed through the parametre 'predictors'") + } + + predictand <- obs_interpolated + predictor <- predictors + + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) + target_dims_predictand <- sdate_dim + } + + # Multi-linear regression with the four nearest neighbours + # Predictors: model data + # Predictand: observations + else if (lr_method == '4nn') { + predictor <- find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) + + if (is.null(points)) { + if (!is.null(target_grid)) { + warning("Interpolating to the 'obs' grid") + } + predictand <- obs + + lats <- obs_lats + lons <- obs_lons + } + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + # interpolate to point locations + else { + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + lats <- predictor$lat + lons <- predictor$lon + predictor <- predictor$data + predictand <- predictand$data + } + + target_dims_predictor <- c(sdate_dim,'nn') + target_dims_predictand <- sdate_dim + } + + else { + stop(paste0(lr_method, " method is not implemented yet")) + } + + # Apply the linear regressions + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + + names(dim(res))[1] <- sdate_dim + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) + } + + # Return a list of three elements + res <- list(data = res, obs = predictand, lon = lons, lat = lats) + + return(res) +} + +#----------------------------------- +# Atomic function to generate and apply the linear regressions +#----------------------------------- +.intlr <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- train_lm(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + } + + return(res) + +} + +#----------------------------------- +# Function to generate the linear regressions. +# Returns a list +#----------------------------------- +train_lm <- function(df, loocv) { + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) lm(df[-j,], formula = y ~ .)) + + } else { + + lm1 <- list(lm(data = df, formula = y ~ .)) + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the linear regressions. +#----------------------------------- +pred_lm <- function(df, lm1, loocv) { + + if (loocv) { + + pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,])) + + } else { + + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + } + + return(pred_vals) +} + +#----------------------------------- +# Function to find N nearest neighbours. +# 'coar' is an array with named dimensions +#----------------------------------- +find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4) { + + # Sort the distances from closest to furthest + idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) + idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) + + names(dim(idx_lat)) <- c('nn', lat_dim) + names(dim(idx_lon)) <- c('nn', lon_dim) + + # obtain the values of the nearest neighbours + nearest <- Apply(list(coar, idx_lat, idx_lon), + target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), + fun = function(x, y, z) x[y, z])$output1 + + return(nearest) +} + + diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R new file mode 100644 index 00000000..24be6936 --- /dev/null +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -0,0 +1,497 @@ +#'@rdname CST_LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. The +#'object is expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export +CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", + probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + member_dim = "member", region = NULL, loocv = FALSE, ncores = 1) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, + int_method = int_method, log_reg_method = log_reg_method, points = points, + method_point_interp = method_point_interp, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + source_file = exp$source_files[1], region = region, loocv = loocv, + ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- LogisticReg(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export +LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, + int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", + source_file = NULL, region = NULL, loocv = FALSE, ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(target_grid, 'character')) { + stop("Parameter 'target_grid' must be of the class 'character'") + } + + if (!is.null(int_method) & !inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!is.null(method_point_interp) & !inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + if (!is.null(source_file) & !inherits(source_file, 'character')) { + stop("Parameter 'source_file' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'member_dim'") + } + + if (!is.null(points) & (is.null(source_file))) { + stop("No source files found. One source file for exp must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # compute ensemble mean anomalies + if (log_reg_method == "ens_mean") { + predictor <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim) + + target_dims_predictor <- sdate_dim + } + else if (log_reg_method == "ens_mean_sd") { + ens_mean_anom <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, + sdate_dim = sdate_dim) + ens_sd <- get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim) + + #merge two arrays into one array of predictors + predictor <- abind(ens_mean_anom, ens_sd, along = 1/2) + names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) + + target_dims_predictor <- c(sdate_dim, "pred") + } else if (log_reg_method == "sorted_members") { + predictor <- sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim) + + target_dims_predictor <- c(sdate_dim, member_dim) + } else { + stop(paste0(log_reg_method, " not recognised or not implemented.")) + } + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # convert observations to categorical predictands + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + terc <- convert2prob(as.vector(x), prob = probs_cat) + apply(terc, 1, function(r) which (r == 1))}, + output_dims = sdate_dim)$output1 + + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"))$output1 + + if (return_most_likely_cat) { + res <- Apply(res, target_dims = c(sdate_dim, "category"), most_likely_category, + output_dims = sdate_dim)$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + +most_likely_category <- function(data) { +# data, expected dims: start date, category (in this order) + + if (all(is.na(data))) { + mlc <- rep(NA, nrow(data)) + } else { + mlc <- apply(data, 1, which.max) + } + return(mlc) +} + +sort_members <- function(obj_ens, member_dim) { + + sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE)$output1 + + return(sorted) +} + +get_ens_sd <- function(obj_ens, member_dim) { + + # compute ensemble spread + ens_sd <- Apply(obj_ens, target_dims = member_dim, sd, na.rm = TRUE)$output1 + + return(ens_sd) +} + +get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { + + require(s2dv) + + # compute climatology + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean)$output1 + + # compute ensemble mean + ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE)$output1 + + # compute ensemble mean anomalies + anom <- Ano(ens_mean, clim) + + return(anom) +} + +# atomic functions for logistic regressions +.log_reg <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- matrix(NA, nrow = n, ncol = length(unique(tmp_df$y))) + + } else { + # training + lm1 <- train_lr(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + } + return(res) +} + +#----------------------------------- +# Function to train the logistic regressions. +#----------------------------------- +train_lr <- function(df, loocv) { + + require(nnet) + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + + } else { + + lm1 <- list(multinom(y ~ ., data = df)) + + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the logistic regressions. +#----------------------------------- +pred_lr <- function(df, lm1, loocv) { + + require(plyr) + + if (loocv) { + + # The error: "Error: Results must have the same dimensions." can + # appear when the number of sdates is insufficient + pred_vals_ls <- list() + for (j in 1:nrow(df)) { + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + } + + pred_vals <- laply(pred_vals_ls, .fun = as.array) + + } else { + + # type = class, probs + #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") + #pred_vals <- unlist(pred_vals_ls) + pred_vals <- predict(lm1[[1]], df, type = "probs") + } + + return(pred_vals) +} + + diff --git a/modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R new file mode 100644 index 00000000..4c727465 --- /dev/null +++ b/modules/Downscaling/tmp/Utils.R @@ -0,0 +1,31 @@ +.check_coords <- function(lat1, lon1, lat2, lon2) { + match <- TRUE + if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { + match <- FALSE + } + return(match) +} + +# reorder dims to a reference array. If they do not exist, they are created +# example +#arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) +#arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) + +.reorder_dims <- function(arr_ref, arr_to_reorder) { + + miss_dims <- names(dim(arr_ref))[!(names(dim(arr_ref)) %in% names(dim(arr_to_reorder)))] + + if (length(miss_dims) != 0) { + for (m in seq(miss_dims)) { + arr_to_reorder <- InsertDim(data = arr_to_reorder, posdim = length(dim(arr_to_reorder)) + 1, lendim = 1, + name = miss_dims[m]) + } + } + + # TODO: add code to reorder dimensions and put the non-common dimensions at the end + + orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) + return(Reorder(data = arr_to_reorder, order = orddim)) +} + + -- GitLab From 64036d79a200d0b28782376bb35460b1bb83cda0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Mar 2023 13:29:44 +0100 Subject: [PATCH 15/21] Source functions from temporary folder --- modules/Downscaling/Downscaling.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index d8522b65..97732c40 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -10,12 +10,12 @@ #░ ░ ░ ## TODO: Remove once CSDownscale is on CRAN ## TODO: Move recipe checks to check_recipe() -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/LogisticReg.R') -source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('modules/Downscaling/tmp/Interpolation.R') +source('modules/Downscaling/tmp/Intbc') +source('modules/Downscaling/tmp/Intlr.R') +source('modules/Downscaling/tmp/Analogs.R') +source('modules/Downscaling/tmp/LogisticReg.R') +source('modules/Downscaling/tmp/Utils.R') #source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") -- GitLab From 575fe297b608820cd129b2cd04a2e308ca81d61f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 14 Mar 2023 11:02:29 +0100 Subject: [PATCH 16/21] Fix bug (error when sourcing function) and modify path names for downscaling --- modules/Downscaling/Downscaling.R | 2 +- modules/Saving/paths2save.R | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 97732c40..23586788 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -11,7 +11,7 @@ ## TODO: Remove once CSDownscale is on CRAN ## TODO: Move recipe checks to check_recipe() source('modules/Downscaling/tmp/Interpolation.R') -source('modules/Downscaling/tmp/Intbc') +source('modules/Downscaling/tmp/Intbc.R') source('modules/Downscaling/tmp/Intlr.R') source('modules/Downscaling/tmp/Analogs.R') source('modules/Downscaling/tmp/LogisticReg.R') diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 93196b86..b2023ae6 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -92,8 +92,17 @@ get_dir <- function(recipe, agg = "global") { fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) } } - + # Calibration or downscaling method calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + if ((calib.method == "raw") && + (!is.null(recipe$Analysis$Workflow$Downscaling))) { + if (recipe$Analysis$Workflow$Downscaling$type == "none") { + calib.method <- "raw" + } else { + calib.method <- recipe$Analysis$Workflow$Downscaling$type + } + } + # Frequency store.freq <- recipe$Analysis$Variables$freq ## TODO: Change "_country" switch(tolower(agg), -- GitLab From 823ebfbc83335a572a6073b1df4c281e33aa9b29 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 22 Mar 2023 15:46:39 +0100 Subject: [PATCH 17/21] can retrieve hurs variable from CERRA dataset --- conf/archive.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/archive.yml b/conf/archive.yml index eb8e86a5..6da69a65 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -186,7 +186,7 @@ archive: daily_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/"} monthly_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", - "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/"} + "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/","hurs":"_f3h-r2631x1113/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" CERRA-Land: -- GitLab From 65981563db5824cd1d2b97bd588176d544998631 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 22 Mar 2023 15:49:39 +0100 Subject: [PATCH 18/21] applies the updates in the Intbc function --- modules/Downscaling/Downscaling.R | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 23586788..58b9c22f 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -42,11 +42,13 @@ downscale_datasets <- function(recipe, data) { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) logreg_method <- tolower(recipe$Analysis$Workflow$Downscaling$logreg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) - ## TODO: Compute number of cores + + ## TODO: Compute number of cores if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { @@ -94,12 +96,13 @@ downscale_datasets <- function(recipe, data) { method_point_interp = NULL) hcst_downscal$obs <- obs_downscal$exp - } else if (type == "intbc") { - # Interpolate hcst and obs with bias correction + } else if (type == "intbc" & (bc_method=="cal" | bc_method=="calibration")) { + # Interpolate hcst and obs with bias correction (via calibration) hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, int_method = int_method, + cal.method=cal_method, points = NULL, method_point_interp = NULL, lat_dim = "latitude", @@ -108,7 +111,23 @@ downscale_datasets <- function(recipe, data) { member_dim = "ensemble", region = NULL, ncores = ncores) - } else if (type == "intlr") { + + } else if (type == "intbc" & !(bc_method=="cal" | bc_method=="calibration")) { + # Interpolate hcst and obs with bias correction (any other method than calibration) + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) + + } else if (type == "intlr") { ## TODO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { -- GitLab From b771b442e0acae4f193b0d80f6ca4d0b63e37b0c Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 22 Mar 2023 15:50:56 +0100 Subject: [PATCH 19/21] different 'int + calibration' methods can be tested --- modules/Downscaling/tmp/Intbc.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 86bb5a9c..5a5275bf 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -68,7 +68,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1) + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1,...) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -82,7 +82,7 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores) + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores,...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data exp$data <- res$data -- GitLab From 7c3282e8264c10a7ca4881ee21eb8592a0236cc9 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Fri, 24 Mar 2023 14:33:28 +0100 Subject: [PATCH 20/21] checks are included --- modules/Downscaling/Downscaling.R | 178 ++++++++++++++++++++---------- 1 file changed, 122 insertions(+), 56 deletions(-) diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 58b9c22f..3e574e73 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -1,23 +1,10 @@ -# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ -#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ -#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ -#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ -#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ -#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ -# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ -#░ ░ ░ -## TODO: Remove once CSDownscale is on CRAN -## TODO: Move recipe checks to check_recipe() +### Downscaling Module source('modules/Downscaling/tmp/Interpolation.R') source('modules/Downscaling/tmp/Intbc.R') source('modules/Downscaling/tmp/Intlr.R') source('modules/Downscaling/tmp/Analogs.R') source('modules/Downscaling/tmp/LogisticReg.R') source('modules/Downscaling/tmp/Utils.R') -#source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_BiasCorrection.R") - ## Entry params data and recipe? downscale_datasets <- function(recipe, data) { @@ -30,45 +17,57 @@ downscale_datasets <- function(recipe, data) { type <- tolower(recipe$Analysis$Workflow$Downscaling$type) if (!is.null(data$fcst)) { - warn(recipe$Run$logger, - "The downscaling will be only performed to the hindcast data") + warning("The downscaling will be only performed to the hindcast data") data$fcst <- NULL } if (type == "none") { + hcst_downscal <- data$hcst DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" + } else { # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) - cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) + cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) - logreg_method <- tolower(recipe$Analysis$Workflow$Downscaling$logreg_method) + log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) - - ## TODO: Compute number of cores + if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { ncores <- recipe$Analysis$ncores } - ## TODO: add the parametre loocv where it corresponds + + #TO DO: add the parametre loocv where it corresponds if (is.null(recipe$Analysis$loocv)) { loocv <- TRUE } else { loocv <- recipe$Analysis$loocv } - # Define downscaling options + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") - BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping", "sbc", "cal", "qm") LR_METHODS <- c("basic", "large_scale", "4nn") - LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + + if (!(type %in% DOWNSCAL_TYPES)) { + stop("Downscaling type in the recipe is not available. Accepted types ", + "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") + } - # Interpolation if (type == "int") { - ## TODO: Move this check elsewhere + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the # observations @@ -76,7 +75,6 @@ downscale_datasets <- function(recipe, data) { lonmin <- data$hcst$lon[1] latmax <- data$hcst$lat[length(data$hcst$lat)] lonmax <- data$hcst$lon[length(data$hcst$lon)] - # Interpolate hcst hcst_downscal <- CST_Interpolation(data$hcst, points = NULL, method_remap = int_method, @@ -85,7 +83,7 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) - # Interpolate obs + obs_downscal <- CST_Interpolation(data$obs, points = NULL, method_remap = int_method, @@ -94,15 +92,41 @@ downscale_datasets <- function(recipe, data) { lon_dim = "longitude", region = c(lonmin, lonmax, latmin, latmax), method_point_interp = NULL) + hcst_downscal$obs <- obs_downscal$exp - } else if (type == "intbc" & (bc_method=="cal" | bc_method=="calibration")) { - # Interpolate hcst and obs with bias correction (via calibration) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intbc") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(bc_method)) { + stop("Please provide one bias-correction method in the recipe. Accepted ", + "methods are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', ", + "'qm'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(bc_method %in% BC_METHODS)) { + stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", + "are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', 'qm'.")) + } + + if (bc_method=="cal" | bc_method=="calibration") + { + if (length(cal_method)==0) { + stop("Please provide one (and only one) calibration method in the recipe.") + } + hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, int_method = int_method, - cal.method=cal_method, + cal.method=cal_method, points = NULL, method_point_interp = NULL, lat_dim = "latitude", @@ -112,9 +136,10 @@ downscale_datasets <- function(recipe, data) { region = NULL, ncores = ncores) - } else if (type == "intbc" & !(bc_method=="cal" | bc_method=="calibration")) { - # Interpolate hcst and obs with bias correction (any other method than calibration) - hcst_downscal <- CST_Intbc(data$hcst, data$obs, + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else + { + hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, int_method = int_method, @@ -126,9 +151,28 @@ downscale_datasets <- function(recipe, data) { member_dim = "ensemble", region = NULL, ncores = ncores) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + } else if (type == "intlr") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } - } else if (type == "intlr") { - ## TODO: add the possibility to have the element 'pred' in 'data' + if (is.null(lr_method)) { + stop("Please provide one linear regression method in the recipe. Accepted ", + "methods are 'basic', 'large_scale', '4nn'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(lr_method %in% LR_METHODS)) { + stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", + "are 'basic', 'large_scale', '4nn'.")) + } + + # TO DO: add the possibility to have the element 'pred' in 'data' if (lr_method == "large_scale") { if (is.null(data$pred$data)) { stop("Please provide the large scale predictors in the element 'data$pred$data'.") @@ -136,7 +180,7 @@ downscale_datasets <- function(recipe, data) { } else { data$pred$data <- NULL } - # Interpolate hcst and obs with linear regression + hcst_downscal <- CST_Intlr(data$hcst, data$obs, lr_method = lr_method, target_grid = target_grid, @@ -144,23 +188,25 @@ downscale_datasets <- function(recipe, data) { int_method = int_method, method_point_interp = NULL, predictors = data$pred$data, - lat_dim = "latitude", + lat_dim = "latitude", lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", + sdate_dim = "syear", + time_dim = "time", member_dim = "ensemble", large_scale_predictor_dimname = 'vars', loocv = loocv, - region = NULL, + region = NULL, ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { - if (length(nanalogs) == 0) { - info(recipe$Run$logger, - paste("The number of analogs for searching has not been provided", - "in the recipe. Setting it to 3.")) + + if (is.null(nanalogs)) { + warning("The number of analogs for searching has not been provided in the ", + "recipe. Setting it to 3.") nanalogs <- 3 } - # Apply analogs method to hcst and obs + hcst_downscal <- CST_Analogs(data$hcst, data$obs, grid_exp = data$hcst$source_files[1], nanalogs = nanalogs, @@ -174,17 +220,36 @@ downscale_datasets <- function(recipe, data) { return_indices = FALSE, loocv_window = loocv, ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "logreg") { + + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(log_reg_method)) { + stop("Please provide one logistic regression method in the recipe. Accepted ", + "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + # Since we are forcing to create three categories, and applying cross-validation, # we need at least six years of data for the logistic regression function to not # crash - if (dim(data$hcst$data)[["syear"]] < 6) { - error(recipe$Run$logger, - paste("The number of years of data is insufficient for the", - "logistic regression method. Please provide six or more.")) - stop() + if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { + stop("The number of start dates is insufficient for the logisitic regression method. ", + "Please provide six or more.") + } + + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) } - # Apply logistic regression to hcst and obs + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, target_grid = target_grid, int_method = int_method, @@ -201,10 +266,11 @@ downscale_datasets <- function(recipe, data) { loocv = loocv, ncores = ncores) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } - info(recipe$Run$logger, DOWNSCAL_MSG) - return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) + } + print(DOWNSCAL_MSG) + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } + -- GitLab From d397a625cd841bdc991802fcd2ec699e8a710a15 Mon Sep 17 00:00:00 2001 From: eduzenli Date: Wed, 29 Mar 2023 10:47:38 +0200 Subject: [PATCH 21/21] hurs variable can be downloaded from ECMWF-SEAS5 and CERRA archives --- conf/archive.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 6da69a65..87e9cd65 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,7 +16,7 @@ archive: "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/"} + "tdps":"_f6h/","hurs":"_f6h/"} nmember: fcst: 51 hcst: 25 @@ -183,10 +183,10 @@ archive: name: "ECMWF CERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/cerra/" - daily_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", + daily_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/"} - monthly_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", - "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/","hurs":"_f3h-r2631x1113/"} + monthly_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", + "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" CERRA-Land: -- GitLab