From 91b5b7b1c27124831360887a395b3524be31e9db Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 27 Sep 2023 16:49:09 +0200 Subject: [PATCH] Revert "Merge branch 'dev-Downscaling' into 'master'" This reverts merge request !65 --- conf/archive.yml | 8 +- modules/Calibration/Calibration.R | 3 +- modules/Downscaling/Downscaling.R | 295 ------- modules/Downscaling/tmp/Analogs.R | 550 ------------- modules/Downscaling/tmp/Intbc.R | 339 -------- modules/Downscaling/tmp/Interpolation.R | 767 ------------------ modules/Downscaling/tmp/Intlr.R | 559 ------------- modules/Downscaling/tmp/LogisticReg.R | 550 ------------- modules/Downscaling/tmp/Utils.R | 39 - modules/Loading/Loading.R | 1 - .../recipe_system5c3s-tas_downscaling.yml | 62 -- modules/Saving/R/get_filename.R | 3 +- modules/Skill/Skill.R | 2 +- modules/Visualization/Visualization.R | 1 - tools/check_recipe.R | 114 +-- tools/data_summary.R | 3 +- 16 files changed, 12 insertions(+), 3284 deletions(-) delete mode 100644 modules/Downscaling/Downscaling.R delete mode 100644 modules/Downscaling/tmp/Analogs.R delete mode 100644 modules/Downscaling/tmp/Intbc.R delete mode 100644 modules/Downscaling/tmp/Interpolation.R delete mode 100644 modules/Downscaling/tmp/Intlr.R delete mode 100644 modules/Downscaling/tmp/LogisticReg.R delete mode 100644 modules/Downscaling/tmp/Utils.R delete mode 100644 modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml diff --git a/conf/archive.yml b/conf/archive.yml index 29489a97..a347dc40 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -5,9 +5,9 @@ esarchive: name: "ECMWF SEAS5" institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system5c3s/" - daily: {"tasmin":"/", "tasmax":"/"} daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", + "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", "tdps":"_f6h/", "hurs":"_f6h/"} @@ -16,8 +16,7 @@ esarchive: "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/", "psl":"_f6h/", "tos":"_f6h/", - "hurs":"_f6h/"} + "tdps":"_f6h/", "psl":"_f6h/", "tos":"_f6h/"} nmember: fcst: 51 hcst: 25 @@ -186,8 +185,7 @@ esarchive: name: "ECMWF CERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/cerra/" - daily: {"tasmax":"-r2631x1113/", "tasmin":"-r2631x1113/"} - daily_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", + daily_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/"} monthly_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/"} diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index e41565a9..171b22cf 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -110,8 +110,7 @@ Calibration <- function(recipe, data) { fcst_calibrated <- NULL } } - } else if ((recipe$Analysis$Variables$freq == "daily_mean") || - (recipe$Analysis$Variables$freq == "daily")) { + } else if (recipe$Analysis$Variables$freq == "daily_mean") { # Daily data calibration using Quantile Mapping if (!(method %in% c("qmap"))) { error(recipe$Run$logger, diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R deleted file mode 100644 index 15f5ea9b..00000000 --- a/modules/Downscaling/Downscaling.R +++ /dev/null @@ -1,295 +0,0 @@ -### Downscaling Module -source('modules/Downscaling/tmp/Interpolation.R') -source('modules/Downscaling/tmp/Intbc.R') -source('modules/Downscaling/tmp/Intlr.R') -source('modules/Downscaling/tmp/Analogs.R') -source('modules/Downscaling/tmp/LogisticReg.R') -source('modules/Downscaling/tmp/Utils.R') - -Downscaling <- function(recipe, data) { - # Function that downscale the hindcast using the method stated in the - # recipe. For the moment, forecast must be null. - # - # data: list of s2dv_cube objects containing the hcst, obs and fcst. - # recipe: object obtained when passing the .yml recipe file to read_yaml() - - type <- tolower(recipe$Analysis$Workflow$Downscaling$type) - - if (type == "none") { - hcst_downscal <- data$hcst - DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" - - } else { - - if (!is.null(data$fcst)) { - warn(recipe$Run$logger, - "The downscaling will be only performed to the hindcast data") - data$fcst <- NULL - } - # Downscaling function params - int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) - bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) - lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) - log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) - target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) - nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) - size <- recipe$Analysis$Workflow$Downscaling$size - - if (is.null(recipe$Analysis$ncores)) { - ncores <- 1 - } else { - ncores <- recipe$Analysis$ncores - } - - #TO DO: add the parametre loocv where it corresponds - if (is.null(recipe$Analysis$loocv)) { - loocv <- TRUE - } else { - loocv <- recipe$Analysis$loocv - } - - DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") - BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") - LR_METHODS <- c("basic", "large-scale", "4nn") - LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") - - if (!(type %in% DOWNSCAL_TYPES)) { - stop("Downscaling type in the recipe is not available. Accepted types ", - "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") - } - - if (type == "int") { - if (is.null(int_method)) { - stop("Please provide one interpolation method in the recipe.") - } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") - } - - # Ensure that observations are in the same grid as experiments - # Only needed for this method because the others already return the - # observations - latmin <- data$hcst$coords$latitude[1] - lonmin <- data$hcst$coords$longitude[1] - latmax <- data$hcst$coords$latitude[length(data$hcst$coords$latitude)] - lonmax <- data$hcst$coords$longitude[length(data$hcst$coords$longitude)] - hcst_downscal <- CST_Interpolation(data$hcst, - points = NULL, - method_remap = int_method, - target_grid = target_grid, - lat_dim = "latitude", - lon_dim = "longitude", - region = c(lonmin, lonmax, latmin, latmax), - method_point_interp = NULL) - - obs_downscal <- CST_Interpolation(data$obs, - points = NULL, - method_remap = int_method, - target_grid = target_grid, - lat_dim = "latitude", - lon_dim = "longitude", - region = c(lonmin, lonmax, latmin, latmax), - method_point_interp = NULL) - - hcst_downscal$obs <- obs_downscal$exp - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } else if (type == "intbc") { - if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") - } - - if (is.null(bc_method)) { - stop("Please provide one bias-correction method in the recipe. Accepted ", - "methods are 'quantile_mapping', 'bias', 'evmos', 'mse_min', 'crps_min' ", - "'rpc-based', 'qm'. ") - } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") - } - - if (!(bc_method %in% BC_METHODS)) { - stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", - "are 'quantile_mapping', 'bias', 'evmos', 'mse_min', 'crps_min' ", - "'rpc-based', 'qm'.")) - } - - hcst_downscal <- CST_Intbc(data$hcst, data$obs, - target_grid = target_grid, - bc_method = bc_method, - int_method = int_method, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - ncores = ncores) - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } else if (type == "intlr") { - if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") - } - - if (is.null(lr_method)) { - stop("Please provide one linear regression method in the recipe. Accepted ", - "methods are 'basic', 'large-scale', '4nn'.") - } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") - } - - if (!(lr_method %in% LR_METHODS)) { - stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", - "are 'basic', 'large-scale', '4nn'.")) - } - - # TO DO: add the possibility to have the element 'pred' in 'data' - if (lr_method == "large-scale") { - if (is.null(data$pred$data)) { - stop("Please provide the large scale predictors in the element 'data$pred$data'.") - } - } else { - data$pred$data <- NULL - } - - hcst_downscal <- CST_Intlr(data$hcst, data$obs, - lr_method = lr_method, - target_grid = target_grid, - points = NULL, - int_method = int_method, - method_point_interp = NULL, - predictors = data$pred$data, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", - member_dim = "ensemble", - large_scale_predictor_dimname = 'vars', - loocv = loocv, - region = NULL, - ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } else if (type == "analogs") { - - if (is.null(nanalogs)) { - warning("The number of analogs for searching has not been provided in the ", - "recipe. Setting it to 3.") - nanalogs <- 3 - } - - if (!is.null(size) & recipe$Analysis$Variables$freq == "monthly_mean") { - size <- NULL - warning("Size is set to NULL. ", - "It must be NULL for the monthly input data.") - } - - if (!is.null(size)) { - dum <- data$obs$data ## keep obs data before the window process to provide it later as the output - data$obs$data <- .generate_window(data$obs$data, - sdate_dim = 'syear', - time_dim = 'time', - loocv = TRUE, - size = size) - data$obs$data <- Apply(data$obs$data, - target_dims="window", - fun=function (x) x[!is.na(x)])$output1 - } - - hcst_downscal <- CST_Analogs(data$hcst, data$obs, - grid_exp = data$hcst$attrs$source_files[ - which(!is.na(data$hcst$attrs$source_files))[1]], - nanalogs = nanalogs, - fun_analog = "wmean", - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - time_dim = "time", - member_dim = "ensemble", - region = NULL, - return_indices = FALSE, - loocv_window = loocv, - ncores = ncores) - - if (!is.null(size)) { - hcst_downscal$obs$data <- Apply(dum, target_dims=c("time", "smonth"), - function (x) {x[1:(dim(data$hcst$data)["time"]), 2]}, - ncores = ncores, - output_dims = "time")$output1 ## 2nd month is the target month - } - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } else if (type == "logreg") { - - if (length(int_method) == 0) { - stop("Please provide one (and only one) interpolation method in the recipe.") - } - - if (is.null(log_reg_method)) { - stop("Please provide one logistic regression method in the recipe. Accepted ", - "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") - } - - if (is.null(target_grid)) { - stop("Please provide the target grid in the recipe.") - } - - # Since we are forcing to create three categories, and applying cross-validation, - # we need at least six years of data for the logistic regression function to not - # crash - if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { - stop("The number of start dates is insufficient for the logisitic regression method. ", - "Please provide six or more.") - } - - if (!(log_reg_method %in% LOG_REG_METHODS)) { - stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", - "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) - } - - hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, - target_grid = target_grid, - int_method = int_method, - log_reg_method = log_reg_method, - probs_cat = c(1/3,2/3), - return_most_likely_cat = FALSE, - points = NULL, - method_point_interp = NULL, - lat_dim = "latitude", - lon_dim = "longitude", - sdate_dim = "syear", - member_dim = "ensemble", - region = NULL, - loocv = loocv, - ncores = ncores) - - DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" - } - } - print(DOWNSCAL_MSG) - - # Saving - if (recipe$Analysis$Workflow$Downscaling$save != 'none') { - info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") - } - ## TODO: What do we do with the full values? - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Downscaling/") - # if ((recipe$Analysis$Workflow$Downscaling$save %in% - # c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { - # save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - # } - if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst') - } - if (recipe$Analysis$Workflow$Downscaling$save == 'all') { - save_observations(recipe = recipe, data_cube = hcst_downscal$obs) - } - - return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) -} diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R deleted file mode 100644 index 99fc45e7..00000000 --- a/modules/Downscaling/tmp/Analogs.R +++ /dev/null @@ -1,550 +0,0 @@ -#'@rdname CST_Analogs -#'@title Downscaling using Analogs based on large scale fields. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function performs a downscaling using Analogs. To compute -#'the analogs given a coarse-scale field, the function looks for days with -#'similar conditions in the historical observations. The coarse scale and -#'observation data can be either global or regional. In the latter case, the -#'region is defined by the user. In principle, the coarse and observation data -#'should be of the same variable, although different variables can also be admitted. -#'The analogs function will find the N best analogs based in Minimum Euclidean -#'distance. -#' -#'The search of analogs must be done in the longest dataset posible, but might -#'require high-memory computational resources. This is important since it is -#'necessary to have a good representation of the possible states of the field in -#'the past, and therefore, to get better analogs. The function can also look for -#'analogs within a window of D days, but is the user who has to define that window. -#'Otherwise, the function will look for analogs in the whole dataset. This function -#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal -#'and decadal predictions) but can admit climate projections or reanalyses. It does -#'not have constrains of specific region or variables to downscale. -#'@param exp an 's2dv' object with named dimensions containing the experimental field on -#'the coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and time. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an 's2dv' object with named dimensions containing the observational field. -#'The object must have, at least, the dimensions latitude, longitude and start date. -#'The object is expected to be already subset for the desired region. -#'@param obs2 an 's2dv' object with named dimensions containing a different observational -#'field to that in 'obs'. If provided, these observations will be used in the training, -#'i.e. the searching for analogs, so that they should be in a coarser grid to those in -#''obs'. Training with observations on a grid with a spatial resolution closer to that -#'in 'exp', will in principle ensure better results. The object must have, at least, the -#'dimensions latitude, longitude and start date. The object is expected to be already -#'subset for the desired region. -#'@param grid_exp a character vector with a path to an example file of the exp data. -#'It can be either a path to another NetCDF file which to read the target grid from -#'(a single grid must be defined in such file) or a character vector indicating the -#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. -#'@param nanalogs an integer indicating the number of analogs to be searched -#'@param fun_analog a function to be applied over the found analogs. Only these options -#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), -#'the function returns the found analogs. -#'@param lat_dim a character vector indicating the latitude dimension name in the element -#''data' in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element -#''data' in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the -#'element 'data' in exp and obs. Default set to "sdate". -#'@param time_dim a character vector indicating the time dimension name in the element -#''data' in exp and obs. Default set to "time". -#'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param region a numeric vector indicating the borders of the region defined in exp. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param return_indices a logical vector indicating whether to return the indices of the -#'analogs together with the downscaled fields. Default to FALSE. -#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension -#''window'. It indicates whether to apply leave-one-out cross-validation in the creation -#'of the window. It is recommended to be set to TRUE. Default to TRUE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the -#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL -#'(default), the output array in 'data' also contains the dimension 'analog' with the best -#'analog days. -#'@examples -#'exp <- rnorm(15000) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(27000) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) -#'obs_lons <- seq(0,6, 6/14) -#'obs_lats <- seq(0,6, 6/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) -#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') -#'@export -CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", - lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", - region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = NULL) { - - # input exp and obs must be s2dv_cube objects - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } - - # input exp and obs must be s2dv_cube objects - if (!inherits(obs,'s2dv_cube')) { - stop("Parameter 'obs' must be of the class 's2dv_cube'") - } - - res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], - exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], - obs_lons = obs$coords[[lon_dim]], grid_exp = grid_exp, nanalogs = nanalogs, - fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, - region = region, return_indices = return_indices, loocv_window = loocv_window, - ncores = ncores) - - # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - obs$data <- res$obs - obs$dims <- dim(obs$data) - obs$coords[[lon_dim]] <- res$lon - obs$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) - return(res_s2dv) -} - -#'@rdname Analogs -#'@title Downscaling using Analogs based on large scale fields. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} -#' -#'@description This function performs a downscaling using Analogs. To compute -#'the analogs given a coarse-scale field, the function looks for days with -#'similar conditions in the historical observations. The coarse scale and -#'observation data can be either global or regional. In the latter case, the -#'region is defined by the user. In principle, the coarse and observation data -#'should be of the same variable, although different variables can also be admitted. -#'The analogs function will find the N best analogs based in Minimum Euclidean -#'distance. -#' -#'The search of analogs must be done in the longest dataset posible, but might -#'require high-memory computational resources. This is important since it is -#'necessary to have a good representation of the possible states of the field in -#'the past, and therefore, to get better analogs. The function can also look for -#'analogs within a window of D days, but is the user who has to define that window. -#'Otherwise, the function will look for analogs in the whole dataset. This function -#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal -#'and decadal predictions) but can admit climate projections or reanalyses. It does -#'not have constrains of specific region or variables to downscale. -#'@param exp an array with named dimensions containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and time. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an array with named dimensions containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude, start date and time. The object -#'is expected to be already subset for the desired region. Optionally, 'obs' can have the -#'dimension 'window', containing the sampled fields into which the function will look for -#'the analogs. See function 'generate_window()'. Otherwise, the function will look for -#'analogs using all the possible fields contained in obs. -#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must -#'range from -90 to 90. -#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must -#'range from -90 to 90. -#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param grid_exp a character vector with a path to an example file of the exp data. -#'It can be either a path to another NetCDF file which to read the target grid from -#'(a single grid must be defined in such file) or a character vector indicating the -#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. -#'@param obs2 an 's2dv' object with named dimensions containing a different observational -#'field to that in 'obs'. If provided, these observations will be used in the training, -#'i.e. the searching for analogs, so that they should be in a coarser grid to those in -#''obs'. Training with observations on a grid with a spatial resolution closer to that -#'in 'exp', will in principle ensure better results. The object must have, at least, the -#'dimensions latitude, longitude and start date. The object is expected to be already -#'subset for the desired region. -#'@param nanalogs an integer indicating the number of analogs to be searched. -#'@param fun_analog a function to be applied over the found analogs. Only these options -#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), -#'the function returns the found analogs. -#'@param lat_dim a character vector indicating the latitude dimension name in the element -#''data' in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element -#''data' in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the -#'element 'data' in exp and obs. Default set to "sdate". -#'@param time_dim a character vector indicating the time dimension name in the element -#''data' in exp and obs. Default set to "time". -#'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param region a numeric vector indicating the borders of the region defined in exp. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param return_indices a logical vector indicating whether to return the indices of the -#'analogs together with the downscaled fields. The indices refer to the position of the -#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will -#'refer to the position of the element in the dimension 'window'. Default to FALSE. -#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension -#''window'. It indicates whether to apply leave-one-out cross-validation in the creation -#'of the window. It is recommended to be set to TRUE. Default to TRUE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@import multiApply -#'@import CSTools -#'@importFrom s2dv InsertDim CDORemap -#'@importFrom FNN get.knnx -#' -#'@seealso \code{\link[s2dverification]{CDORemap}} -#' -#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the -#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL -#'(default), the output array in 'data' also contains the dimension 'analog' with the best -#'analog days. -#'@examples -#'exp <- rnorm(15000) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(27000) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) -#'obs_lons <- seq(0,6, 6/14) -#'obs_lats <- seq(0,6, 6/11) -#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, -#'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') -#'@export -Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs2 = NULL, - obs2_lats = NULL, obs2_lons = NULL, nanalogs = 3, fun_analog = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", region = NULL, return_indices = FALSE, - loocv_window = TRUE, ncores = NULL) { - #----------------------------------- - # Checkings - #----------------------------------- - if (!inherits(grid_exp, 'character')) { - stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", - "to another NetCDF file which to read the target grid from (a single grid must be ", - "defined in such file) or a character vector indicating the coarse grid to ", - "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") - } - - if (!inherits(nanalogs, 'numeric')) { - stop("Parameter 'nanalogs' must be of the class 'numeric'") - } - - if (!inherits(lat_dim, 'character')) { - stop("Parameter 'lat_dim' must be of the class 'character'") - } - - if (!inherits(lon_dim, 'character')) { - stop("Parameter 'lon_dim' must be of the class 'character'") - } - - if (!inherits(sdate_dim, 'character')) { - stop("Parameter 'sdate_dim' must be of the class 'character'") - } - - if (!inherits(time_dim, 'character')) { - stop("Parameter 'time_dim' must be of the class 'character'") - } - - # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names - if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { - stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { - stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'lat_dim'") - } - - if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { - stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'sdate_dim'") - } - - if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) { - stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'time_dim'") - } - - # Ensure we have enough data to interpolate from high-res to coarse grid - #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | - # (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { - - # stop("There are not enough data in 'obs'. Please to add more latitudes or ", - # "longitudes.") - #} - - # the code is not yet prepared to handle members in the observations - restore_ens <- FALSE - if (member_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[member_dim]), 1)) { - restore_ens <- TRUE - obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'member_dim', ", - "but it should be of length = 1).") - } - } - - if (!is.null(obs2)) { - # the code is not yet prepared to handle members in the observations - if (member_dim %in% names(dim(obs2))) { - if (identical(as.numeric(dim(obs2)[member_dim]), 1)) { - obs2 <- ClimProjDiags::Subset(x = obs2, along = member_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs2' can have 'member_dim', ", - "but it should be of length = 1).") - } - } - - if (is.null(obs2_lats) | is.null(obs2_lons)) { - stop("Missing latitudes and/or longitudes for the provided training observations. Please ", - "provide them with the parametres 'obs2_lats' and 'obs2_lons'") - } - - if (is.na(match(lon_dim, names(dim(obs2))))) { - stop("Missing longitude dimension in 'obs2', or does not match the parameter 'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(obs2))))) { - stop("Missing latitude dimension in 'obs2', or does not match the parameter 'lat_dim'") - } - - if (is.na(match(sdate_dim, names(dim(obs2))))) { - stop("Missing start date dimension in 'obs2', or does not match the parameter 'sdate_dim'") - } - - if (is.na(match(time_dim, names(dim(obs2))))) { - stop("Missing time dimension in 'obs2', or does not match the parameter 'time_dim'") - } - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - # Select a function to apply to the analogs selected for a given observation - if (!is.null(fun_analog)) { - stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) - } - - if (!is.null(obs2)) { - obs_train <- obs2 - obs_train_lats <- obs2_lats - obs_train_lons <- obs2_lons - } else { - obs_train <- obs - obs_train_lats <- obs_lats - obs_train_lons <- obs_lons - } - - # Correct indices later if cross-validation - loocv_correction <- FALSE - if ( !("window" %in% names(dim(obs_train))) & loocv_window) { - loocv_correction <- TRUE - } - - #----------------------------------- - # Interpolate high-res observations to the coarse grid - #----------------------------------- - if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", - "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and ", - "'exp_lons'.") - region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) - } - - obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, - target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = "conservative", region = region, ncores = ncores) - # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to - # the same grid to force the matching - if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat), lat2 = exp_lats, lon1 = as.numeric(obs_interpolated$lon), lon2 = exp_lons)) { - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", - region = region, ncores = ncores)$data - } else { - exp_interpolated <- exp - } - - # Create window if user does not have it in the training observations - if ( !("window" %in% names(dim(obs_interpolated$data))) ) { - obs_train_interpolated <- .generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, - time_dim = time_dim, loocv = loocv_window, ncores = ncores) - obs_hres <- .generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window, ncores = ncores) - } else { - obs_train_interpolated <- obs_interpolated$data - dim(obs_train_interpolated) <- dim(obs_train_interpolated)[-which(names(dim(obs_train_interpolated))=="time")] - obs_hres <- obs - dim(obs_hres) <- dim(obs_hres)[-which(names(dim(obs_hres))=="time")] - } - - #----------------------------------- - # Reshape train and test - #----------------------------------- - res.data <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), - target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), - c("window", lat_dim, lon_dim)), - fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, - fun_analog = fun_analog), ncores = ncores)$output1 - - # Return the indices of the best analogs - if (return_indices) { - res.ind <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), - target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), - c("window", lat_dim, lon_dim)), - fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, - fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 - - # If cross-validation has been applied, correct the indices - if (loocv_correction) { - nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] - ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] - res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) - sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), - output_dims = c("ind", sdate_dim), ncores = ncores)$output1 - } - - # restore ensemble dimension in observations if it existed originally - if (restore_ens) { - obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) - } - - res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats) - } - else { - # restore ensemble dimension in observations if it existed originally - if (restore_ens) { - obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) - } - - res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) - } - - return(res) -} - -# For each element in test, find the indices of the k nearest neigbhors in train -.analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { - - require(FNN) - - # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) - # test dim: 2 dimensions lat and lon (in this order) - # Number of lats/lons of the high-resolution data - space_dims_hres <- dim(obs_hres)[c(2,3)] - - # Reformat train and test as an array with (time, points) - train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" - test <- as.vector(test) - obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" - - # Identify and remove NA's - idx_na_tr <- is.na(train[ , 1]) - idx_na_te <- is.na(test) - idx_na <- idx_na_tr | idx_na_te - tr_wo_na <- t(train[!idx_na , ]) - te_wo_na <- test[!idx_na] - te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") - - knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) - - dist <- knn.ind$nn.dist - idx <- knn.ind$nn.index - - # Either return only the indices or the analogs - if (return_indices) { - res <- as.numeric(idx) - } else { - res <- obs_hres[ , idx] - dim(res) <- c(space_dims_hres, analogs = k) - - if (!is.null(fun_analog)) { - if (fun_analog == "wmean") { - weight <- 1 / dist - res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) - } else if (fun_analog == "min") { - res<-res[,,which.min(dist)] - } else if (fun_analog == "max") { - res<-res[,,which.max(dist)] - } else { - res <- apply(res, c(1,2), fun_analog) - } - } - } - - return(res) -} - - -# Add the dimension window to an array that contains, at least, the start date and time -# dimensions -# object has at least dimensions sdate and time -.generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL, ncores = NULL) { - - rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] - ntimes <- dim(obj)[names(dim(obj)) == time_dim] - rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] - - # Generate a window containing all the data - if (is.null(size)) { - - # Generate window removing one start date each time (leave-one-out cross-validation) - if (loocv) { - obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), - fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), - output_dims = c('window', sdate_dim), ncores = ncores)$output1 - # Generate window without cross-validation - } else { - obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), - fun = as.vector, output_dims = 'window', ncores = ncores)$output1 - } - } - # Generate window of the size specified by the user. Only applied with CV - else { - # For an accurate generation of the window, it is mandatory to add some "extra" data. - if (!("smonth" %in% names(dim(obj)))) { - stop("Missing 'smonth' dimension") - } - - # Concatenate data from previous, target and posterior months - obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), - fun = as.vector, output_dims = "time", ncores = ncores )$output1 - - if (loocv) { - obj_window <- Apply(list(obj_new, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL), - fun = function(x, s) as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), -s]), - output_dims = 'window', ncores = ncores)$output1 - names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) - } else { - obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), - fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), ])), - output_dims = c('window', time_dim), ncores = ncores)$output1 - - } - } - - return(obj_window) -} diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R deleted file mode 100644 index dc5d050b..00000000 --- a/modules/Downscaling/tmp/Intbc.R +++ /dev/null @@ -1,339 +0,0 @@ -#'@rdname CST_Intbc -#'@title Downscaling using interpolation and bias adjustment. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function performs a downscaling using an interpolation and a later bias -#'adjustment. It is recommended that the observations are passed already in the target grid. -#'Otherwise, the function will also perform an interpolation of the observed field into the -#'target grid. The coarse scale and observation data can be either global or regional. In the -#'latter case, the region is defined by the user. In principle, the coarse and observation data -#'are intended to be of the same variable, although different variables can also be admitted. -#' -#'@param exp an 's2dv object' containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and member. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an 's2dv object' containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param bc_method a character vector indicating the bias adjustment method to be applied after -#'the interpolation. Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', -#''mse_min', 'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used. -#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. -#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 -#'or newer version is required. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param method_point_interp a character vector indicating the interpolation method to interpolate -#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", -#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. -#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' -#'in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' -#'in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the element -#''data' in exp and obs. Default set to "sdate". -#'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param region a numeric vector indicating the borders of the region defined in obs. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the -#'downscaled latitudes, and 'lon' the downscaled longitudes. -#'@examples -#'exp <- rnorm(500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(900) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) -#'obs_lons <- seq(1,5, 4/14) -#'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) -#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') -#'@export - -CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = NULL, ...) -{ - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } - - if (!inherits(obs,'s2dv_cube')) { - stop("Parameter 'obs' must be of the class 's2dv_cube'") - } - - res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, - int_method = int_method, bc_method = bc_method, points = points, - source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], - method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores, ...) - - # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - obs$data <- res$obs - obs$dims <- dim(obs$data) - obs$coords[[lon_dim]] <- res$lon - obs$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) - return(res_s2dv) - -} - -#'@rdname Intbc -#'@title Downscaling using interpolation and bias adjustment. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function performs a downscaling using an interpolation and a later bias -#'adjustment. It is recommended that the observations are passed already in the target grid. -#'Otherwise, the function will also perform an interpolation of the observed field into the -#'target grid. The coarse scale and observation data can be either global or regional. In the -#'latter case, the region is defined by the user. In principle, the coarse and observation data -#'are intended to be of the same variable, although different variables can also be admitted. -#' -#'@param exp an array with named dimensions containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and member. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an array with named dimensions containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. -#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must -#'range from -90 to 90. -#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must -#'range from -90 to 90. -#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param bc_method a character vector indicating the bias adjustment method to be applied after -#'the interpolation. Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', -#''mse_min', 'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used. -#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. -#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 -#'or newer version is required. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param method_point_interp a character vector indicating the interpolation method to interpolate -#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", -#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. -#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' -#'in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' -#'in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the element -#''data' in exp and obs. Default set to "sdate". -#'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param source_file_exp a character vector with a path to an example file of the exp data. -#'Only needed if the downscaling is to a point location. -#'@param source_file_obs a character vector with a path to an example file of the obs data. -#'Only needed if the downscaling is to a point location. -#'@param region a numeric vector indicating the borders of the region defined in obs. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@import CSTools -#' -#'@seealso \code{\link[CSTools]{BiasCorrection}} -#'@seealso \code{\link[CSTools]{Calibration}} -#'@seealso \code{\link[CSTools]{QuantileMapping}} -#' -#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the -#'downscaled latitudes, and 'lon' the downscaled longitudes. -#'@examples -#'exp <- rnorm(500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(900) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) -#'obs_lons <- seq(1,5, 4/14) -#'obs_lats <- seq(1,4, 3/11) -#'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, -#'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') -#'@export -Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, - points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", member_dim = "member", source_file_exp = NULL, source_file_obs = NULL, - region = NULL, ncores = NULL, ...) { - - if (!inherits(bc_method, 'character')) { - stop("Parameter 'bc_method' must be of the class 'character'") - } - - if (!inherits(lat_dim, 'character')) { - stop("Parameter 'lat_dim' must be of the class 'character'") - } - - if (!inherits(lon_dim, 'character')) { - stop("Parameter 'lon_dim' must be of the class 'character'") - } - - if (!inherits(sdate_dim, 'character')) { - stop("Parameter 'sdate_dim' must be of the class 'character'") - } - - if (!inherits(member_dim, 'character')) { - stop("Parameter 'member_dim' must be of the class 'character'") - } - - if (is.na(match(lon_dim, names(dim(exp))))) { - stop("Missing longitude dimension in 'exp', or does not match the parameter ", - "'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp))))) { - stop("Missing latitude dimension in 'exp', or does not match the parameter ", - "'lat_dim'") - } - - if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { - stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'sdate_dim'") - } - - if (is.na(match(member_dim, names(dim(exp))))) { - stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") - } - - if (!(bc_method %in% c('qm', 'dbc', 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', 'mse_min', - 'crps_min', 'rpc-based'))) { - stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", - "Accepted methods are 'quantile_mapping', 'dynamical_bias', 'bias', 'evmos', 'mse_min', ", - "'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used.") - } - - # When observations are pointwise - if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { - point_obs <- T - # dimension aux in obs is needed - if (is.na(match("aux", names(dim(obs))))) { - obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") - } - } else { - point_obs <- F - } - - if (!is.null(points) & (is.null(source_file_exp))) { - stop("No source file found. Source file must be provided in the parameter 'source_file_exp'.") - } - - if (!is.null(points) & is.null(method_point_interp)) { - stop("Please provide the interpolation method to interpolate gridded data to point locations ", - "through the parameter 'method_point_interp'.") - } - - if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders ", - "of the downscaling region are defined by the first and last elements of the parametres ", - "'obs_lats' and 'obs_lons'.") - region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, - region = region, ncores = ncores) - - # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to - # the same grid to force the matching - if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, - region = region, ncores = ncores) - obs_ref <- obs_interpolated$data - } else { - obs_ref <- obs - } - - # Some functions only accept the dimension names "member" and "sdate" for exp and - # "sdate" for obs - #if (member_dim != 'member') { - # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), - # which(names(dim(exp_interpolated$data)) == member_dim), 'member') - #} - - #if (sdate_dim != 'sdate') { - # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), - # which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') - # names(dim(obs_ref)) <- replace(names(dim(obs_ref)), - # which(names(dim(obs_ref)) == sdate_dim), 'sdate') - #} - - if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, - memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) - } - else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { - # the temporal dimension must be only one dimension called "time" - if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { - exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), - fun = as.vector, output_dims = "time", ncores = ncores)$output1 - } - if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { - obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, - output_dims = "time", ncores = ncores)$output1 - } - # REMEMBER to add na.rm = T in colMeans in .proxiesattractor - res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ncores = ncores, ...) - } else { - if (dim(exp_interpolated$data)[member_dim] == 1) { - stop('Calibration must not be used with only one ensemble member.') - } - if (dim(obs_ref)[sdate_dim] == 1) { - warning('Simple Bias Correction should not be used with only one observation. Returning NA.') - } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) - } - - # Return a list of three elements - res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) - - return(res) -} diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R deleted file mode 100644 index ed79f4fd..00000000 --- a/modules/Downscaling/tmp/Interpolation.R +++ /dev/null @@ -1,767 +0,0 @@ -#'@rdname CST_Interpolation -#'@title Regrid or interpolate gridded data to a point location. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function interpolates gridded model data from one grid to -#'another (regrid) or interpolates gridded model data to a set of point locations. -#'The gridded model data can be either global or regional. In the latter case, the -#'region is defined by the user. It does not have constrains of specific region or -#'variables to downscale. -#'@param exp s2dv object containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude and longitude. The field data is expected to be already subset -#'for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param method_remap a character vector indicating the regridding method to be passed -#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is -#'to be used, CDO_1.9.8 or newer version is required. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param lat_dim a character vector indicating the latitude dimension name in the element -#''exp' and/or 'points'. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element -#''exp' and/or 'points'. Default set to "lon". -#'@param region a numeric vector indicating the borders of the region defined in exp. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param method_point_interp a character vector indicating the interpolation method to -#'interpolate model gridded data into the point locations. Accepted methods are "nearest", -#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@seealso \code{\link[s2dverification]{CDORemap}} -#' -#'@return An s2dv object containing the dowscaled field. -#' -#'@examples -#'exp <- rnorm(500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) -#'lons <- 1:5 -#'lats <- 1:4 -#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) -#'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') -#'@export -CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, - lat_dim = "lat", lon_dim = "lon", region = NULL, - method_point_interp = NULL, ncores = NULL) -{ - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } - - #if (is.null(exp[[lat_dim]]) | is.null(exp[[lon_dim]])) { - # stop("The name of the latitude/longitude elements in 'exp' must match the parametres ", - # "'lat_dim' and 'lon_dim'") - #} - - if ((length(which(names(dim(exp$data)) == lat_dim)) == 0) | (length(which(names(dim(exp$data)) == lon_dim)) == 0)) { - stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") - } - - res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$coords[[lon_dim]], - source_file = exp$attrs$source_files[1], points = points, - method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, - lon_dim = lon_dim, region = region, method_point_interp = method_point_interp, ncores = ncores) - - # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = NULL) - return(res_s2dv) -} - -#'@rdname Interpolation -#'@title Regrid or interpolate gridded data to a point location. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} -#' -#'@description This function interpolates gridded model data from one grid to -#'another (regrid) or interpolates gridded model data to a set of point locations. -#'The gridded model data can be either global or regional. In the latter case, the -#'region is defined by the user. It does not have constrains of specific region or -#'variables to downscale. -#'@param exp an array with named dimensions containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude and longitude. The object is expected to be already subset -#'for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param lats a numeric vector containing the latitude values. Latitudes must range from -#'-90 to 90. -#'@param lons a numeric vector containing the longitude values. Longitudes can range from -#'-180 to 180 or from 0 to 360. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param source_file a character vector with a path to an example file of the exp data. -#'Only needed if the downscaling is to a point location. -#'@param method_remap a character vector indicating the regridding method to be passed -#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is -#'to be used, CDO_1.9.8 or newer version is required. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param lat_dim a character vector indicating the latitude dimension name in the element -#''exp' and/or 'points'. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element -#''exp' and/or 'points'. Default set to "lon". -#'@param region a numeric vector indicating the borders of the region defined in exp. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param method_point_interp a character vector indicating the interpolation method to -#'interpolate model gridded data into the point locations. Accepted methods are "nearest", -#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling -#'is to a point location. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@import multiApply -#'@import plyr -#'@importFrom s2dv CDORemap -#' -#'@seealso \code{\link[s2dverification]{CDORemap}} -#' -#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the -#'downscaled latitudes, and 'lon' the downscaled longitudes. -#' -#'@examples -#'exp <- rnorm(500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) -#'lons <- 1:5 -#'lats <- 1:4 -#'res <- Interpolation(exp = exp, lats = lats, lons = lons, method_remap = 'conservative', target_grid = 'r1280x640') -#'@export -Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, - target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, - method_point_interp = NULL, ncores = NULL) -{ - if (!is.null(method_remap)) { - if (!inherits(method_remap, 'character')) { - stop("Parameter 'method_remap' must be of the class 'character'") - } - } - - if (!is.null(method_point_interp)) { - if (!inherits(method_point_interp, 'character')) { - stop("Parameter 'method_point_interp' must be of the class 'character'") - } - } - - if (is.na(match(lon_dim, names(dim(exp))))) { - stop("Missing longitude dimension in 'exp', or does not match the parameter 'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp))))) { - stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") - } - - # Check for negative latitudes in the exp data - if (any(lats < -90 | lats > 90) ) { - stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") - } - - # checkings for the case of interpolation to point locations - if (!is.null(points)) { - if (!inherits(points, 'list')) { - stop("Parameter 'points' must be a list of two elements containing the point ", - "latitudes and longitudes.") - } - - if (is.null(method_point_interp)) { - stop("Parameter 'method_point_interp' must be a character vector indicating the ", - "interpolation method. Accepted methods are nearest, bilinear, 9point, ", - "invdist4nn, NE, NW, SE, SW") - } - - if (!(method_point_interp %in% c('nearest', 'bilinear', '9point', 'invdist4nn', 'NE', 'NW', 'SE', 'SW'))) { - stop("Parameter 'method_point_interp' must be a character vector indicating the ", - "interpolation method. Accepted methods are nearest, bilinear, 9point, ", - "invdist4nn, NE, NW, SE, SW") - } - - # Points must be a list of two elements - if (length(points) != 2) { - stop("'points' must be a lis of two elements containing the point ", - "latitudes and longitudes in the form 'points$lat', 'points$lon'") - } - - # The names of the two elements must be 'lat' and 'lon' - if (any(!(c(lat_dim, lon_dim) %in% names(points)))) { - stop("The names of the elements in the list 'points' must coincide with the parametres ", - "'lat_dim' and 'lon_dim'") - } - - # Check that the number of latitudes and longitudes match - if (length(unique(lengths(points))) != 1L) { - stop("The number of latitudes and longitudes must match") - } - - # Check for negative latitudes in the point coordinates - if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { - stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", - "-90 to 90") - } - - if (is.null(source_file)) { - stop("No source file found. Source file must be provided in the parameter 'source_file'.") - } - } else { - if (is.null(method_remap)) { - stop("Parameter 'method_remap' must be a character vector indicating the ", - "interpolation method. Accepted methods are con, bil, bic, nn, con2") - } - - if (is.null(target_grid)) { - stop("Parameter 'target_grid' can be either a path ", - "to another NetCDF file which to read the target grid from (a single grid must be ", - "defined in such file) or a character vector indicating the coarse grid to ", - "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") - } - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - #---------------------------------- - # Limits of the region defined by the model data - #---------------------------------- - # for the case when region limits are not passed by the user - # regions contains the following elements in order: lonmin, lonmax, latmin, latmax - if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", - "downscaling region are defined by the first and last elements of the parametres 'lats' and 'lons'.") - region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) - } - - # Ensure points to be within the region limits - if (!is.null(points)) { - if (any(points[[lat_dim]] > region[4]) | any(points[[lat_dim]] < region[3]) | - any(points[[lon_dim]] > region[2]) | any(points[[lon_dim]] < region[1])) { - stop("At least one of the points lies outside the model region") - } - } - - #---------------------------------- - # Map regrid with CDO - #---------------------------------- - if (is.null(points)) { - - .KnownLonNames <- s2dv:::.KnownLonNames - .KnownLatNames <- s2dv:::.KnownLatNames - .warning <- s2dv:::.warning - - res <- CDORemap(data_array = exp, - lats = lats, - lons = lons, - grid = target_grid, - method = method_remap, - crop = region) - - # Return a list - res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) - - #---------------------------------- - # Interpolate to point locations - #---------------------------------- - } else { - # First create interpolation weights, depending on the chosen method - weights <- .create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), - lats = points[[lat_dim]], lons = points[[lon_dim]], - method = method_point_interp, region = list(lat_min = region[3], - lat_max = region[4], lon_min = region[1], lon_max = region[2])) - - # Select coarse-scale data to be interpolated - model_data_gridpoints <- .get_model_data(weights.df = weights, mdata = exp, ncores = ncores) - - # Interpolate model data to point locations - res <- .interpolate_data(model_data_gridpoints, weights, ncores = ncores) - - # Return a list - res <- list(data = res, obs = NULL, lon = points[[lon_dim]], lat = points[[lat_dim]]) - } - - return(res) -} - -#====================== -# Compute weights for interpolation at several (lat,lon) positions -# We assume that grid boxes are centered in the grid point. -#====================== -.create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, - method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", - "NW", "SE", "SW")) -{ - # crop the region to get the correct weights - save temporary file - nc_cropped1 <- paste0('tmp_cropped_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') - nc_cropped2 <- paste0('tmp_cropped2_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') - - system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, - ',', region$lat_max, ' ', ncfile, ' ', nc_cropped1)) - - #---------------- - # Read grid description and compute (i,j) of requested locations (including decimals) - #---------------- - griddes <- .get_griddes(nc_cropped1) - - if (is.null(griddes$yinc)) { - system(paste0('rm ', nc_cropped1)) - stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", - "downscaling to point locations.") - } - - # If latitudes are decreasingly ordered, revert them - if (griddes$yinc < 0) { - system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) - griddes <- .get_griddes(nc_cropped2) - } - # remove temporary files - system(paste0('rm ', nc_cropped1)) - system(paste0('rm ', nc_cropped2)) - - if (is.null(griddes)) { - stop("'griddes' not found in the NetCDF source files") - } - - gridpoints <- .latlon2ij(griddes, lats, lons) - - #---------------- - # Compute the weights according to the selected method - #---------------- - if(method == "nearest") { - #---------------- - # Round i and j to closest integer. Weight is always 1. - #---------------- - - # | | | - # -+-----+-----+- - # | x| | - # | a | | - # | | | - # -+-----+-----+- - # | | | - - centeri <- round(gridpoints$i,0) - centeri[centeri == griddes$xsize+1] <- 1 # close longitudes - - weights.df <- data.frame(locid = locids, - lat = lats, - lon = lons, - rawi = gridpoints$i, - rawj = gridpoints$j, - i = centeri, - j = round(gridpoints$j, 0), - weight = 1) - } else if (method %in% c("bilinear","invdist4nn")) { - #---------------- - # Get the (i,j) coordinates of the 4 points (a,b,c,d) around x. - # This plot shows i increasing to the right and - # j increasing to the top, but the computations are generic. - #---------------- - # | | | - #- +-----+-----+- - # | | | - # | b | c | - # | | | - #- +-----+-----+- - # | x| | - # | a | d | - # | | | - #- +-----+-----+- - # | | | - - lowi <- floor(gridpoints$i) - highi <- ceiling(gridpoints$i) - highi[highi == griddes$xsize+1] <- 1 # close the longitudes - lowj <- floor(gridpoints$j) - highj <- ceiling(gridpoints$j) - # Note: highi and lowi are the same if i is integer - # Note: highj and lowj are the same if j is integer - - #---------------- - # Get x position wrt ad and ab axes (from 0 to 1) - #---------------- - pcti <- gridpoints$i - lowi - pctj <- gridpoints$j - lowj - - #---------------- - # Compute weights for a,b,c,d grid points - #---------------- - if(method == "bilinear") { - wa = (1 - pcti) * (1 - pctj) - wb = (1 - pcti) * pctj - wc = pcti * pctj - wd = pcti * (1 - pctj) - } else if(method == "invdist4nn") { - #---------------- - # Note: the distance is computed in the (i,j) space. - # Note2: this method does not guarantees a continuous interpolation. - # Use bilinear if that's desirable. - # When x is on the ab line, c and d would be used. In the limit of x - # being just left of ab other points would be used. - # Here we just dropped c and d coeffs when over ab. Same for ad line, - # b and c coeffs dropped. This prevents repeated nodes. - #---------------- - ida = 1 / sqrt(pcti^2 + pctj^2) - idb = 1 / sqrt(pcti^2 + (1 - pctj)^2) - idc = 1 / sqrt((1-pcti)^2 + (1-pctj)^2) - idd = 1 / sqrt((1-pcti)^2 + pctj^2) - idb[pctj == 0] <- 0; - idc[pctj == 0] <- 0; - idc[pcti == 0] <- 0; - idd[pcti == 0] <- 0; - - #---------------- - # Normalize vector of inverse distances - #---------------- - invdist <- cbind(ida, idb, idc, idd) - print(invdist) - w <- t(apply(invdist, 1, function(x) { print(x); if(any(is.infinite(x))) { - x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) - print(w) - - wa = w[ , 1] - wb = w[ , 2] - wc = w[ , 3] - wd = w[ , 4] - } - - #---------------- - # Put info in dataframes and rbind them - #---------------- - weightsa.df <- data.frame(locid = locids, lat = lats,lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = lowi, j = lowj, weight = wa) - weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = lowi, j = highj, weight = wb) - weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = highi, j = highj, weight = wc) - weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = highi, j = lowj, weight = wd) - weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df) - } else if(method == "9point") { - #---------------- - # Get the (i,j) coordinates of the 9 points (a,b,...,i) around x - # This plot shows i increasing to the right and - # j increasing to the top, but the computations are generic. - #---------------- - # | | | | - #-+-----+-----+-----+- - # | | | | - # | c | f | i | - # | | | | - #-+-----+-----+-----+- - # | | x| | - # | b | e | h | - # | | | | - #-+-----+-----+-----+- - # | | | | - # | a | d | g | - # | | | | - #-+-----+-----+-----+- - # | | | | - - centeri <- round(gridpoints$i, 0) - centeri[centeri == griddes$xsize + 1] <- 1 - centerj <- round(gridpoints$j, 0) - lowi <- centeri - 1 - highi <- centeri + 1 - lowi[lowi == 0] <- griddes$xsize # close the longitudes - highi[highi == griddes$xsize+1] <- 1 # close the longitudes - lowj <- centerj - 1 - highj <- centerj + 1 - - #---------------- - # For the north and south pole do a 6-point average - #---------------- - w_highj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,0 ,1/9)) - w_centerj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,1/6,1/9)) - w_lowj <- ifelse(centerj == 1,0 ,ifelse(centerj == griddes$ysize,1/6,1/9)) - - #---------------- - # Put info in dataframes and rbind them - #---------------- - weightsa.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = lowi, j = lowj, weight = w_lowj) - weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = lowi, j = centerj, weight = w_centerj) - weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = lowi, j = highj, weight = w_highj) - weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = centeri, j = lowj, weight = w_lowj) - weightse.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = centeri, j = centerj, weight = w_centerj) - weightsf.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = centeri, j = highj, weight = w_highj) - weightsg.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = highi, j = lowj, weight = w_lowj) - weightsh.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = highi, j = centerj, weight = w_centerj) - weightsi.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = highi, j = highj, weight = w_highj) - weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df, weightse.df, - weightsf.df, weightsg.df, weightsh.df, weightsi.df) - } else if(method %in% c("NE", "NW", "SW", "SE")) { - #---------------- - # Find if increasing i and j increases or decreases lat and lon - #---------------- - westtoeast <- (griddes$xinc > 0) - southtonorth <- T - if(griddes$gridtype == "gaussian") { - # We assume gaussian grid latitudes are ordered north to south - southtonorth <- F - } else { #lonlat - if(griddes$yinc < 0) {southtonorth <- F} - } - - #---------------- - # Get the (i,j) coordinates of the desired point (a,b,c or d) around x - #---------------- - # | | | - #- +-----+-----+- - # | | | - # | b | c | - # | | | - #- +-----+-----+- - # | x| | - # | a | d | - # | | | - #- +-----+-----+- - # | | | - - if(substr(method,1,1) == "N" & southtonorth == T) { selj <- ceiling(gridpoints$j) } - if(substr(method,1,1) == "S" & southtonorth == T) { selj <- floor(gridpoints$j) } - if(substr(method,1,1) == "N" & southtonorth == F) { selj <- floor(gridpoints$j) } - if(substr(method,1,1) == "S" & southtonorth == F) { selj <- ceiling(gridpoints$j) } - - if(substr(method,2,2) == "E" & westtoeast == T) {seli <- ceiling(gridpoints$i) } - if(substr(method,2,2) == "W" & westtoeast == T) {seli <- floor(gridpoints$i) } - if(substr(method,2,2) == "E" & westtoeast == F) {seli <- floor(gridpoints$i) } - if(substr(method,2,2) == "W" & westtoeast == F) {seli <- ceiling(gridpoints$i) } - - seli[seli == griddes$xsize + 1] <- 1 # close the longitudes - - weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, - rawj = gridpoints$j, i = seli, j = selj, weight = 1) - } else { - stop(paste0("Method " ,method, " not implemented")) - } - - #---------------- - # Order by locid and remove lines with 0 weight - # This also removes some duplicates in the bilinear/invdist4nn methods when i - # or j is a whole number, or in the 9-point method when at the poles. - #---------------- - weights.df <- weights.df[order(weights.df$locid), ] - weights.df <- weights.df[weights.df$weight != 0, ] - - #---------------- - # Add as attributes the method and the nc file used to compute the weights - #---------------- - attributes(weights.df)$nc_file <- normalizePath(ncfile) - attributes(weights.df)$method <- method - - return(weights.df) -} - -#====================== -# Compute (i,j) from (lat,lon). -# Works only for 'lonlat' and 'gaussian' grids. -# Grids are supposed to cover whole globe. -#====================== -.latlon2ij <- function(griddes, lats, lons) { - #------------ - # Check input params - #------------ - if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} - if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} - if((griddes$xfirst > 180) & (any(lons < 0))) { - stop("Please use the same convention for the longitudes in the source file and the ", - "longitude values in 'points'.") - } - #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} - # no need to resize lons to [0,360) - - #------------ - # Compute i (with decimals) - # i lies in [1,xsize+1) - # %% gives the remainder - #------------ - gridpoints <- list() - gridpoints$i <- 1 + (((lons - griddes$xfirst) / griddes$xinc) %% griddes$xsize) - - #------------ - # Compute j (with decimals) - #------------ - if(griddes$gridtype=='lonlat') { - gridpoints$j <- 1 + (lats - griddes$yfirst) / griddes$yinc - } else if(griddes$gridtype == 'gaussian') { - # We assume gaussian grid latitudes are ordered north to south - # findInterval can only work with monotonic ascending values so we revert twice - northj <- griddes$ysize-findInterval(lats, -griddes$yvals) - southj <- northj + 1 - - # Special case: We are north of the first lat - gridpoints$j[northj == 0] <- 1 - - # Special case: We are south of the last lat - gridpoints$j[southj == griddes$ysize + 1] <- griddes$ysize - - # Generic case - ok_idx <- !(northj == 0 | southj == griddes$ysize+1) - gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - - lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) - } else { stop("Unsupported grid") } - - return(gridpoints) -} - -#====================== -# Use cdo griddes to obtain grid information -#====================== -.get_griddes <- function(ncfile) { - tmp <- system(paste0("cdo griddes ", ncfile, - " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) - arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) - griddes <- as.list(arr[,2]) - names(griddes) <- arr[,1] - - if(griddes$gridtype == "gaussian") { - griddes$yvals <- .get_lats(ncfile) - } - - # Convert some fields to numeric. Ensures all fields are present. - for(nm in c("xsize", "ysize", "xfirst", "yfirst", "xinc", "yinc")) { - griddes[[nm]] <- ifelse(is.null(griddes[[nm]]), NA, as.numeric(griddes[[nm]])) - } - - return(griddes) -} - -#====================== -# Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". -#====================== -.get_lats <- function(ncfile) { - - tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) - - if(!is.null(attributes(tmp)$status)) { - tmp <- system(paste0('ncks -H -s "%f " -v lat ',ncfile),intern=T) - } - - lats <- as.numeric(strsplit(tmp[1],"\\s+",perl=T)[[1]]) - - return(lats) -} - -#====================== -# Load model data at all (i,j) pairs listed in the weights dataframe. -# Uses StartR. All ... parameters go to Start (i.e. specify dat, var, -# sdate, time, ensemble, num_procs, etc) -#====================== -.get_model_data <- function(weights.df, mdata, ncores = NULL) { - - #----------------- - # Get data for all combinations of i and j. - # (inefficient, getting many unneded pairs). - # Avoid retrieving duplicates with unique() - # These are the indices of the global grid - #----------------- - is <- weights.df$i - js <- weights.df$j - - #----------------- - # If any of the indices happens to be 0, - # change it by 1 but give a warning - #----------------- - if (any(is == 0) | any(js == 0)) { - warning("Is the point location in the border of the region? The code can run but ", - "results will be less accurate than those obtained with a larger region." ) - is[is == 0] <- 1 - js[js == 0] <- 1 - } - - #----------------- - # Get indices of original is and js in unique(is),unique(js) that were requested - #----------------- - idxi <- match(is, unique(is)) - idxj <- match(js, unique(js)) - - #----------------- - # Subsample mdata to keep only the needed (i,j) pairs. - #----------------- - if (is.na(match("longitude", names(dim(mdata))))) { - londim <- match("lon", names(dim(mdata))) - } else { - londim <- match("longitude", names(dim(mdata))) - } - if (is.na(match("latitude", names(dim(mdata))))) { - latdim <- match("lat", names(dim(mdata))) - } else { - latdim <- match("latitude", names(dim(mdata))) - } - - # trick: exchange idxi and idxj - #if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } - #keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] - - #sub_mdata <- apply(mdata, keepdims, function(x) { - # laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) - #names(dim(sub_mdata))[1] <- "gridpoint" - - #----------------- - # Retrieve with multiApply - #----------------- - sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), - fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) }, - ncores = ncores)$output1 - names(dim(sub_mdata))[1] <- "gridpoint" - - #----------------- - # Return an array that contains as many gridpoints as (i,j) pairs were requested - #----------------- - return(sub_mdata) -} - -#====================== -# Multiply the grid-point series by the weights, -# to obtain the desired interpolations -#====================== -.interpolate_data <- function(model_data, weights.df, ncores) { - #----------------- - # Multiply each gridpoint matrix by its corresponding weight - #----------------- - gpdim <- match("gridpoint", names(dim(model_data))) - weighted_data <- sweep(model_data, gpdim, weights.df$weight, "*") - - #----------------- - # Sum all series that belong to same interpolation point - # Return an array that contains the requested locations and interpolation type - #----------------- - #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) - #names(dim(interp_data))[1] <- "location" - interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { - rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"), - ncores = ncores)$output1 - - return(interp_data) -} diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R deleted file mode 100644 index 36a7f11b..00000000 --- a/modules/Downscaling/tmp/Intlr.R +++ /dev/null @@ -1,559 +0,0 @@ -#'@rdname CST_Intlr -#'@title Downscaling using interpolation and linear regression. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function performs a downscaling using an interpolation and a linear -#'regression. Different methodologies that employ linear regressions are available. See -#'parameter 'lr_method' for more information. It is recommended that the observations -#'are passed already in the target grid. Otherwise, the function will also perform an -#'interpolation of the observed field into the target grid. The coarse scale and -#'observation data can be either global or regional. In the latter case, the region is -#'defined by the user. In principle, the coarse and observation data are intended to -#'be of the same variable, although different variables can also be admitted. -#' -#'@param exp an 's2dv object' containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and member. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an 's2dv object' containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. -#'@param lr_method a character vector indicating the linear regression method to be applied -#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' -#'method fits a linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is to the interpolated -#'model data to correct the interpolated values. The 'large-scale' method fits a linear -#'regression with large-scale predictors from the same model (e.g. teleconnection indices) -#'as predictors and high-resolution observations as predictands. This equation is then -#'applied to the interpolated model values. Finally, the '4nn' method uses a linear -#'regression with the four nearest neighbours as predictors and high-resolution observations -#'as predictands. It is then applied to model data to correct the interpolated values. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param int_method a character vector indicating the regridding method to be passed -#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is -#'to be used, CDO_1.9.8 or newer version is required. -#'@param method_point_interp a character vector indicating the interpolation method to -#'interpolate model gridded data into the point locations. Accepted methods are "nearest", -#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". -#'@param predictors an array with large-scale data to be used in the 'large-scale' method. -#'Only needed if the linear regression method is set to 'large-scale'. It must have, at -#'least the dimension start date and another dimension whose name has to be specified in -#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the -#'number of large-scale predictors. -#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' -#'in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' -#'in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the element -#''data' in exp and obs. Default set to "sdate". -#'@param time_dim a character vector indicating the time dimension name in the element -#''data' in exp and obs. Default set to "time". -#'@param large_scale_predictor_dimname a character vector indicating the name of the -#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. -#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when -#'generating the linear regressions. Default to TRUE. -#'@param region a numeric vector indicating the borders of the region defined in exp. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@import multiApply -#' -#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the -#'downscaled latitudes, and 'lon' the downscaled longitudes. -#'@examples -#'exp <- rnorm(500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(900) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) -#'obs_lons <- seq(1,5, 4/14) -#'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) -#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') -#'@export -CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, - method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", member_dim = "member", - large_scale_predictor_dimname = 'vars', loocv = TRUE, region = NULL, ncores = NULL) { - - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } - - if (!inherits(obs,'s2dv_cube')) { - stop("Parameter 'obs' must be of the class 's2dv_cube'") - } - - res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], points = points, - source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], - target_grid = target_grid, lr_method = lr_method, int_method = int_method, - method_point_interp = method_point_interp, predictors = predictors, - lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, - member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, - loocv = loocv, region = region, ncores = ncores) - - # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - obs$data <- res$obs - obs$dims <- dim(obs$data) - obs$coords[[lon_dim]] <- res$lon - obs$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) - return(res_s2dv) -} - -#'@rdname Intlr -#'@title Downscaling using interpolation and linear regression. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function performs a downscaling using an interpolation and a linear -#'regression. Different methodologies that employ linear regressions are available. See -#'parameter 'lr_method' for more information. It is recommended that the observations -#'are passed already in the target grid. Otherwise, the function will also perform an -#'interpolation of the observed field into the target grid. The coarse scale and -#'observation data can be either global or regional. In the latter case, the region is -#'defined by the user. In principle, the coarse and observation data are intended to -#'be of the same variable, although different variables can also be admitted. -#' -#'@param exp an array with named dimensions containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude and start date. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an array with named dimensions containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. -#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must -#'range from -90 to 90. -#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must -#'range from -90 to 90. -#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param lr_method a character vector indicating the linear regression method to be applied -#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' -#'method fits a linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is to the interpolated -#'model data to correct the interpolated values. The 'large-scale' method fits a linear -#'regression with large-scale predictors from the same model (e.g. teleconnection indices) -#'as predictors and high-resolution observations as predictands. This equation is then -#'applied to the interpolated model values. Finally, the '4nn' method uses a linear -#'regression with the four nearest neighbours as predictors and high-resolution observations -#'as predictands. It is then applied to model data to correct the interpolated values. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param int_method a character vector indicating the regridding method to be passed -#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is -#'to be used, CDO_1.9.8 or newer version is required. -#'@param method_point_interp a character vector indicating the interpolation method to -#'interpolate model gridded data into the point locations. Accepted methods are "nearest", -#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". -#'@param source_file_exp a character vector with a path to an example file of the exp data. -#'Only needed if the downscaling is to a point location. -#'@param source_file_obs a character vector with a path to an example file of the obs data. -#'Only needed if the downscaling is to a point location. -#'@param predictors an array with large-scale data to be used in the 'large-scale' method. -#'Only needed if the linear regression method is set to 'large-scale'. It must have, at -#'least the dimension start date and another dimension whose name has to be specified in -#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the -#'number of large-scale predictors. -#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' -#'in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' -#'in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the element -#''data' in exp and obs. Default set to "sdate". -#'@param time_dim a character vector indicating the time dimension name in the element -#''data' in exp and obs. Default set to "time". -#'@param region a numeric vector indicating the borders of the region defined in exp. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param large_scale_predictor_dimname a character vector indicating the name of the -#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. -#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when -#'generating the linear regressions. Default to TRUE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@import multiApply -#' -#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the -#'downscaled latitudes, and 'lon' the downscaled longitudes. -#'@examples -#'exp <- rnorm(500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(900) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) -#'obs_lons <- seq(1,5, 4/14) -#'obs_lats <- seq(1,4, 3/11) -#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, -#'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') -#'@export -Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, - int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, - predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = TRUE, - ncores = NULL) { - - #----------------------------------- - # Checkings - #----------------------------------- - if (!inherits(lr_method, 'character')) { - stop("Parameter 'lr_method' must be of the class 'character'") - } - - if (!inherits(large_scale_predictor_dimname, 'character')) { - stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") - } - - if (!inherits(loocv, 'logical')) { - stop("Parameter 'loocv' must be set to TRUE or FALSE") - } - - if (!inherits(lat_dim, 'character')) { - stop("Parameter 'lat_dim' must be of the class 'character'") - } - - if (!inherits(lon_dim, 'character')) { - stop("Parameter 'lon_dim' must be of the class 'character'") - } - - if (!inherits(sdate_dim, 'character')) { - stop("Parameter 'sdate_dim' must be of the class 'character'") - } - - if (!inherits(large_scale_predictor_dimname, 'character')) { - stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") - } - - if (is.na(match(lon_dim, names(dim(exp))))) { - stop("Missing longitude dimension in 'exp', or does not match the parameter ", - "'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp))))) { - stop("Missing latitude dimension in 'exp', or does not match the parameter ", - "'lat_dim'") - } - - if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { - stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'sdate_dim'") - } - - # When observations are pointwise - if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { - point_obs <- T - # dimension aux in obs is needed - if (is.na(match("aux", names(dim(obs))))) { - obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") - } - } else { - point_obs <- F - } - - if (!is.null(points) & is.null(source_file_exp)) { - stop("No source file found. Source file for exp must be provided in the parameter ", - "'source_file_exp'.") - } - - if (!is.null(points) & is.null(method_point_interp)) { - stop("Please provide the interpolation method to interpolate gridded data to point locations ", - "through the parameter 'method_point_interp'.") - } - - # sdate must be the time dimension in the input data - stopifnot(sdate_dim %in% names(dim(exp))) - stopifnot(sdate_dim %in% names(dim(obs))) - - # the code is not yet prepared to handle members in the observations - restore_ens <- FALSE - if (member_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[member_dim]), 1)) { - obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') - restore_ens <- TRUE - } else { - stop("Not implemented for observations with members ('obs' can have 'member_dim', ", - "but it should be of length = 1).") - } - } - - # checkings for the parametre 'predictors' - if (!is.null(predictors)) { - if (!is.array(predictors)) { - stop("Parameter 'predictors' must be of the class 'array'") - } else { - # ensure the predictor variable name matches the parametre large_scale_predictor_dimname - stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) - stopifnot(sdate_dim %in% names(dim(predictors))) - stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) - } - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - #----------------------------------- - # Interpolation - #----------------------------------- - if (lr_method != '4nn') { - - if (is.null(int_method)) { - stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", - "Accepted methods are con, bil, bic, nn, con2") - } - - if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the ", - "four borders of the downscaling region are defined by the first and last ", - "elements of the parametres 'obs_lats' and 'obs_lons'.") - region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) - } - - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region, ncores = ncores) - - # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to - # the same grid to force the matching - if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region, ncores = ncores) - - lats <- obs_interpolated$lat - lons <- obs_interpolated$lon - obs_interpolated <- obs_interpolated$data - } else { - obs_interpolated <- obs - lats <- obs_lats - lons <- obs_lons - } - } - - #----------------------------------- - # Linear regressions - #----------------------------------- - # Pointwise linear regression - # Predictor: model data - # Predictand: observations - if (lr_method == 'basic') { - predictor <- exp_interpolated$data - predictand <- obs_interpolated - - target_dims_predictor <- sdate_dim - target_dims_predictand <- sdate_dim - } - - # (Multi) linear regression with large-scale predictors - # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations - # Predictand: model data - else if (lr_method == 'large-scale') { - if (is.null(predictors)) { - stop("The large-scale predictors must be passed through the parametre 'predictors'") - } - - predictand <- obs_interpolated - predictor <- predictors - - target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) - target_dims_predictand <- sdate_dim - } - - # Multi-linear regression with the four nearest neighbours - # Predictors: model data - # Predictand: observations - else if (lr_method == '4nn') { - - predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, - lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4, ncores = ncores) - - if (is.null(points) | ("location" %in% names(dim(obs)))) { - if (!is.null(target_grid)) { - warning("Interpolating to the 'obs' grid") - } - predictand <- obs - - lats <- obs_lats - lons <- obs_lons - } - # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, - # interpolate to point locations - else { - predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, - points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) - - predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, - points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) - - lats <- predictor$lat - lons <- predictor$lon - predictor <- predictor$data - predictand <- predictand$data - } - - target_dims_predictor <- c(sdate_dim,'nn') - target_dims_predictand <- sdate_dim - } - - else { - stop(paste0(lr_method, " method is not implemented yet")) - } - - print(paste0('dim predictor',dim(predictor))) - print(paste0('dim predictand',dim(predictand))) - print(dim(list(predictor[1]))) - # Apply the linear regressions - - - - res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), - fun = .intlr, loocv = loocv, ncores = ncores)$output1 - - names(dim(res))[1] <- sdate_dim - # names(dim(res))[which(names(dim(res)) == '')] - - # restore ensemble dimension in observations if it existed originally - if (restore_ens) { - predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) - } - - # Return a list of three elements - res <- list(data = res, obs = predictand, lon = lons, lat = lats) - - return(res) -} - -#----------------------------------- -# Atomic function to generate and apply the linear regressions -#----------------------------------- -.intlr <- function(x, y, loocv) { - - tmp_df <- data.frame(x = x, y = y) - # if the data is all NA, force return return NA - if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { - - n <- nrow(tmp_df) - res <- rep(NA, n) - - } else { - # training - lm1 <- .train_lm(df = tmp_df, loocv = loocv) - - # prediction - res <- .pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) - } - - return(res) -} - -#----------------------------------- -# Function to generate the linear regressions. -# Returns a list -#----------------------------------- -.train_lm <- function(df, loocv) { - - # Remove predictor columns containing only NA's - df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] - - if (loocv) { - - lm1 <- lapply(1:nrow(df), function(j) { - if (all(is.na(df[-j,]$y))) { - return(NA) - } else { - return(lm(df[-j,], formula = y ~ .)) - }}) - } else { - - lm1 <- ifelse(all(is.na(df$y)), NA, list(lm(data = df, formula = y ~ .))) - } - - return(lm1) -} - -#----------------------------------- -# Function to apply the linear regressions. -#----------------------------------- -.pred_lm <- function(df, lm1, loocv) { - - if (loocv) { - pred_vals <- sapply(1:nrow(df), function(j) { - if (all(is.na(lm1[[j]]))) { - return(NA) - } else { - return(predict(lm1[[j]], df[j,])) - }}) - } else { - if (!is.na(lm1)) { - pred_vals_ls <- lapply(lm1, predict, data = df) - pred_vals <- unlist(pred_vals_ls) - } else { - pred_vals <- rep(NA, nrow(df)) - } - } - return(pred_vals) -} - -#----------------------------------- -# Function to find N nearest neighbours. -# 'coar' is an array with named dimensions -#----------------------------------- -.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4, ncores = NULL) { - - # Sort the distances from closest to furthest - idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) - idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) - - names(dim(idx_lat)) <- c('nn', lat_dim) - names(dim(idx_lon)) <- c('nn', lon_dim) - - # obtain the values of the nearest neighbours - nearest <- Apply(list(coar, idx_lat, idx_lon), - target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), - fun = function(x, y, z) x[y, z], ncores = ncores)$output1 - - return(nearest) -} diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R deleted file mode 100644 index a85a1b3f..00000000 --- a/modules/Downscaling/tmp/LogisticReg.R +++ /dev/null @@ -1,550 +0,0 @@ -#'@rdname CST_LogisticReg -#'@title Downscaling using interpolation and logistic regression. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function performs a downscaling using an interpolation and a logistic -#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that -#'the observations are passed already in the target grid. Otherwise, the function will also -#'perform an interpolation of the observed field into the target grid. The coarse scale and -#'observation data can be either global or regional. In the latter case, the region is -#'defined by the user. In principle, the coarse and observation data are intended to be of -#'the same variable, although different variables can also be admitted. -#' -#'@param exp an 's2dv object' with named dimensions containing the experimental field on -#'the coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and member. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an 's2dv object' with named dimensions containing the observational field. -#'The object must have, at least, the dimensions latitude, longitude and start date. The -#'object is expected to be already subset for the desired region. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. -#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 -#'or newer version is required. -#'@param log_reg_method a character vector indicating the logistic regression method to be used. -#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble -#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble -#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) -#'as predictors in the logistic regression, and "sorted_members" considers all the members -#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". -#'@param probs_cat a numeric vector indicating the percentile thresholds separating the -#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See -#'\code{\link[easyVerification]{convert2prob}}. -#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If -#'FALSE, the function returns the probabilities for each category. Default to FALSE. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param method_point_interp a character vector indicating the interpolation method to interpolate -#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", -#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. -#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' -#'in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' -#'in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the element -#''data' in exp and obs. Default set to "sdate". -#'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param region a numeric vector indicating the borders of the region defined in obs. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation -#'in the fitting of the logistic regression. Default to TRUE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@import multiApply -#'@import nnet -#'@importFrom laply plyr -#' -#'@seealso \code{\link[nnet]{multinom}} -#' -#'@return An list of three elements. 'data' contains the dowscaled data, that could be either -#'in the form of probabilities for each category or the most likely category. 'lat' contains the -#'downscaled latitudes, and 'lon' the downscaled longitudes. -#' -#'@examples -#'exp <- rnorm(1500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(2700) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) -#'obs_lons <- seq(1,5, 4/14) -#'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) -#'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', -#'probs_cat = c(1/3, 2/3)) -#'@export -CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", - probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", region = NULL, loocv = TRUE, ncores = NULL) { - - if (!inherits(exp,'s2dv_cube')) { - stop("Parameter 'exp' must be of the class 's2dv_cube'") - } - - if (!inherits(obs,'s2dv_cube')) { - stop("Parameter 'obs' must be of the class 's2dv_cube'") - } - - res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], - exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], - obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, - probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, - int_method = int_method, log_reg_method = log_reg_method, points = points, - method_point_interp = method_point_interp, lat_dim = lat_dim, - lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], - region = region, loocv = loocv, ncores = ncores) - - # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - obs$data <- res$obs - obs$dims <- dim(obs$data) - obs$coords[[lon_dim]] <- res$lon - obs$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) - return(res_s2dv) -} - -#'@rdname LogisticReg -#'@title Downscaling using interpolation and logistic regression. -#' -#'@author J. Ramon, \email{jaume.ramon@bsc.es} -#' -#'@description This function performs a downscaling using an interpolation and a logistic -#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that -#'the observations are passed already in the target grid. Otherwise, the function will also -#'perform an interpolation of the observed field into the target grid. The coarse scale and -#'observation data can be either global or regional. In the latter case, the region is -#'defined by the user. In principle, the coarse and observation data are intended to be of -#'the same variable, although different variables can also be admitted. -#' -#'@param exp an array with named dimensions containing the experimental field on the -#'coarse scale for which the downscaling is aimed. The object must have, at least, -#'the dimensions latitude, longitude, start date and member. The object is expected to be -#'already subset for the desired region. Data can be in one or two integrated regions, e.g., -#'crossing the Greenwich meridian. To get the correct results in the latter case, -#'the borders of the region should be specified in the parameter 'region'. See parameter -#''region'. -#'@param obs an array with named dimensions containing the observational field. The object -#'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. -#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must -#'range from -90 to 90. -#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must -#'range from -90 to 90. -#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes -#'can range from -180 to 180 or from 0 to 360. -#'@param target_grid a character vector indicating the target grid to be passed to CDO. -#'It must be a grid recognised by CDO or a NetCDF file. -#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. -#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 -#'or newer version is required. -#'@param log_reg_method a character vector indicating the logistic regression method to be used. -#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble -#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble -#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) -#'as predictors in the logistic regression, and "sorted_members" considers all the members -#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". -#'@param probs_cat a numeric vector indicating the percentile thresholds separating the -#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See -#'\code{\link[easyVerification]{convert2prob}}. -#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If -#'FALSE, the function returns the probabilities for each category. Default to FALSE. -#'@param points a list of two elements containing the point latitudes and longitudes -#'of the locations to downscale the model data. The list must contain the two elements -#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is -#'to a point location, only regular grids are allowed for exp and obs. Only needed if the -#'downscaling is to a point location. -#'@param method_point_interp a character vector indicating the interpolation method to interpolate -#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", -#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. -#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' -#'in exp and obs. Default set to "lat". -#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' -#'in exp and obs. Default set to "lon". -#'@param sdate_dim a character vector indicating the start date dimension name in the element -#''data' in exp and obs. Default set to "sdate". -#'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param source_file_exp a character vector with a path to an example file of the exp data. -#'Only needed if the downscaling is to a point location. -#'@param source_file_obs a character vector with a path to an example file of the obs data. -#'Only needed if the downscaling is to a point location. -#'@param region a numeric vector indicating the borders of the region defined in obs. -#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers -#'to the left border, while lonmax refers to the right border. latmin indicates the lower -#'border, whereas latmax indicates the upper border. If set to NULL (default), the function -#'takes the first and last elements of the latitudes and longitudes. -#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation -#'in the fitting of the logistic regression. Default to TRUE. -#'@param ncores an integer indicating the number of cores to use in parallel computation. -#'The default value is NULL. -#'@import multiApply -#'@import nnet -#'@importFrom laply plyr -#' -#'@seealso \code{\link[nnet]{multinom}} -#' -#'@return An list of three elements. 'data' contains the dowscaled data, that could be either -#'in the form of probabilities for each category or the most likely category. 'lat' contains the -#'downscaled latitudes, and 'lon' the downscaled longitudes. -#' -#'@examples -#'exp <- rnorm(1500) -#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) -#'exp_lons <- 1:5 -#'exp_lats <- 1:4 -#'obs <- rnorm(2700) -#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) -#'obs_lons <- seq(1,5, 4/14) -#'obs_lats <- seq(1,4, 3/11) -#'res <- LogisticReg(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, -#'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', -#'probs_cat = c(1/3, 2/3)) -#'@export -LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, - int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), - return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = TRUE, ncores = NULL) { - - #----------------------------------- - # Checkings - #----------------------------------- - if (!inherits(target_grid, 'character')) { - stop("Parameter 'target_grid' must be of the class 'character'") - } - - if (!is.null(int_method) & !inherits(int_method, 'character')) { - stop("Parameter 'int_method' must be of the class 'character'") - } - - if (!is.null(method_point_interp) & !inherits(method_point_interp, 'character')) { - stop("Parameter 'method_point_interp' must be of the class 'character'") - } - - if (!inherits(lat_dim, 'character')) { - stop("Parameter 'lat_dim' must be of the class 'character'") - } - - if (!inherits(lon_dim, 'character')) { - stop("Parameter 'lon_dim' must be of the class 'character'") - } - - if (!inherits(sdate_dim, 'character')) { - stop("Parameter 'sdate_dim' must be of the class 'character'") - } - - if (!inherits(member_dim, 'character')) { - stop("Parameter 'member_dim' must be of the class 'character'") - } - - if (!is.null(source_file_exp) & !inherits(source_file_exp, 'character')) { - stop("Parameter 'source_file_exp' must be of the class 'character'") - } - - if (!is.null(source_file_obs) & !inherits(source_file_obs, 'character')) { - stop("Parameter 'source_file_obs' must be of the class 'character'") - } - - if (!inherits(loocv, 'logical')) { - stop("Parameter 'loocv' must be set to TRUE or FALSE") - } - - if (is.na(match(lon_dim, names(dim(exp))))) { - stop("Missing longitude dimension in 'exp', or does not match the parameter ", - "'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp))))) { - stop("Missing latitude dimension in 'exp', or does not match the parameter ", - "'lat_dim'") - } - - if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { - stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'sdate_dim'") - } - - if (is.na(match(member_dim, names(dim(exp))))) { - stop("Missing member dimension in 'exp' and/or 'obs', or does not match the parameter ", - "'member_dim'") - } - - # When observations are pointwise - if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { - point_obs <- T - # dimension aux in obs is needed - if (is.na(match("aux", names(dim(obs))))) { - obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux") - } - } else { - point_obs <- F - } - - if (!is.null(points) & (is.null(source_file_exp))) { - stop("No source file found. Source file must be provided in the parameter 'source_file_exp'.") - } - - if (!is.null(points) & is.null(method_point_interp)) { - stop("Please provide the interpolation method to interpolate gridded data to point locations ", - "through the parameter 'method_point_interp'.") - } - - if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders ", - "of the downscaling region are defined by the first and last elements of the parametres ", - "'obs_lats' and 'obs_lons'.") - region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } - - # the code is not yet prepared to handle members in the observations - restore_ens <- FALSE - if (member_dim %in% names(dim(obs))) { - if (identical(as.numeric(dim(obs)[member_dim]), 1)) { - restore_ens <- TRUE - obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') - } else { - stop("Not implemented for observations with members ('obs' can have 'member_dim', ", - "but it should be of length = 1).") - } - } - - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, - region = region, ncores = ncores) - - # compute ensemble mean anomalies - if (log_reg_method == "ens_mean") { - predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, - ncores = ncores) - - target_dims_predictor <- sdate_dim - } - else if (log_reg_method == "ens_mean_sd") { - - require(abind) - - ens_mean_anom <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores) - ens_sd <- .get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) - - #merge two arrays into one array of predictors - predictor <- abind(ens_mean_anom, ens_sd, along = 1/2) - names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) - - target_dims_predictor <- c(sdate_dim, "pred") - } else if (log_reg_method == "sorted_members") { - predictor <- .sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) - - target_dims_predictor <- c(sdate_dim, member_dim) - } else { - stop(paste0(log_reg_method, " not recognised or not implemented.")) - } - - # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to - # the same grid to force the matching - if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, - lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, - region = region, ncores = ncores) - obs_ref <- obs_interpolated$data - } else { - obs_ref <- obs - } - - # convert observations to categorical predictands - -obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { - if (!any(!is.na(x))) { - rep(NA,length(x)) - } else { - terc <- convert2prob(as.vector(x), prob = probs_cat) - apply(terc, 1, function(r) which (r == 1))}}, - output_dims = sdate_dim, ncores = ncores)$output1 - - - res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 - - if (return_most_likely_cat) { - res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, - output_dims = sdate_dim, ncores = ncores)$output1 - } - - # restore ensemble dimension in observations if it existed originally - if (restore_ens) { - obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim, ncores = ncores) - } - - res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) - - return(res) -} - -.most_likely_category <- function(data) { -# data, expected dims: start date, category (in this order) - - if (all(is.na(data))) { - mlc <- rep(NA, nrow(data)) - } else { - mlc <- apply(data, 1, which.max) - } - return(mlc) -} - -.sort_members <- function(obj_ens, member_dim, ncores = NULL) { - - sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 - - return(sorted) -} - -.get_ens_sd <- function(obj_ens, member_dim, ncores = NULL) { - - # compute ensemble spread - ens_sd <- Apply(obj_ens, target_dims = member_dim, sd, na.rm = TRUE, ncores = ncores)$output1 - - return(ens_sd) -} - -.get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim, ncores = NULL) { - - require(s2dv) - - # compute climatology - clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores)$output1 - - # compute ensemble mean - ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE, ncores = ncores)$output1 - - # compute ensemble mean anomalies - anom <- Ano(ens_mean, clim, ncores = ncores) - - return(anom) -} - -# atomic functions for logistic regressions -.log_reg <- function(x, y, loocv,probs_cat) { - - tmp_df <- data.frame(x = x, y = y) - - # if the data is all NA, force return return NA - if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1) | all(is.na(tmp_df$y))) { - - n1 <- nrow(tmp_df) - n2<- length(probs_cat)+1 - res <- matrix(NA, nrow = n1, ncol = n2) - - } else { - # training - lm1 <- .train_lr(df = tmp_df, loocv = loocv) - - # prediction - res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv,probs_cat=probs_cat) - } - return(res) -} - -#----------------------------------- -# Function to train the logistic regressions. -#----------------------------------- -.train_lr <- function(df, loocv) { - - require(nnet) - - # Remove columns containing only NA's - df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] - - if (loocv) { - - lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) - - } else { - - lm1 <- list(multinom(y ~ ., data = df)) - - } - - return(lm1) -} - -#----------------------------------- -# Function to apply the logistic regressions. -#----------------------------------- -pred_lr <- function(df, lm1, loocv,probs_cat) { - - require(plyr) - - if (loocv) { - - # The error: "Error: Results must have the same dimensions." can - # appear when the number of sdates is insufficient - - pred_vals_ls <-list() - for (j in 1:nrow(df)) { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") - } - - pred_vals <- laply(pred_vals_ls, .fun = as.array) - - if( length(probs_cat)+1==2) - { - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) - pred_vals_dum[,2]<-pred_vals - pred_vals_dum[,1]<-1-pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals)<-c(1,2) - } - - } else { - - # type = class, probs - #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") - #pred_vals <- unlist(pred_vals_ls) - pred_vals <- predict(lm1[[1]], df, type = "probs") - - if( length(probs_cat)+1==2) - { - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) - pred_vals_dum[,2]<-pred_vals - pred_vals_dum[,1]<-1-pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals)<-c(1,2) - } - - } - - return(pred_vals) -} diff --git a/modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R deleted file mode 100644 index 3cd65852..00000000 --- a/modules/Downscaling/tmp/Utils.R +++ /dev/null @@ -1,39 +0,0 @@ -.check_coords <- function(lat1, lon1, lat2, lon2) { - if (all(as.numeric(lat1) == as.numeric(lat2)) & all(as.numeric(lon1) == as.numeric(lon2))) { - match <- TRUE - } else { - match <- FALSE - } - return(match) -} - -# reorder dims to a reference array. If they do not exist, they are created -# example -#arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) -#arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) - -.reorder_dims <- function(arr_ref, arr_to_reorder) { - - miss_dims <- names(dim(arr_ref))[!(names(dim(arr_ref)) %in% names(dim(arr_to_reorder)))] - - if (length(miss_dims) != 0) { - for (m in seq(miss_dims)) { - arr_to_reorder <- InsertDim(data = arr_to_reorder, posdim = length(dim(arr_to_reorder)) + 1, lendim = 1, - name = miss_dims[m]) - } - } - - # TODO: add code to reorder dimensions and put the non-common dimensions at the end - - orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) - return(Reorder(data = arr_to_reorder, order = orddim)) -} - -#.check_coords <- function(lat1, lon1, lat2, lon2) { -# match <- TRUE -# if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { -# match <- FALSE -# } -# return(match) -#} - diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 2de0406c..cf97efc2 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -15,6 +15,5 @@ Loading <- function(recipe) { } else { stop("Incorrect time horizon.") } - return(data) } diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml deleted file mode 100644 index 99e02c7d..00000000 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml +++ /dev/null @@ -1,62 +0,0 @@ -# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ -#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ -#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ -#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ -#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ -#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ -# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ -#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ -#░ ░ ░ -Description: - Author: J. Ramon -Info: downscaling of seasonal predictions from coarse to fine grids - -Analysis: - Horizon: Seasonal - Variables: - name: tas - freq: monthly_mean - Datasets: - System: - name: ECMWF-SEAS5 - Multimodel: no - Reference: - name: ERA5 - Time: - sdate: '0501' - hcst_start: '2000' - hcst_end: '2005' - ftime_min: 1 - ftime_max: 1 -#sometimes we want the region for the obs to be bigger than for hcst - Region: - latmin: 34.1 - latmax: 45.1 - lonmin: -12.5 - lonmax: 6.35 - Regrid: - method: bilinear - type: none - Workflow: - Calibration: - method: raw - Skill: - metric: BSS10 BSS90 Mean_Bias_SS RPSS ROCSS - Downscaling: - # Assumption 1: leave-one-out cross-validation is always applied - # Assumption 2: for analogs, we select the best analog (minimum distance) - # TO DO: add downscaling to point locations - type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' - int_method: conservative # optional, regridding method accepted by CDO - bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' - lr_method: # optional, 'basic', 'large_scale', '4nn' - log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' - target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO - nanalogs: # optional, number of analogs to be searched - Output_format: S2S4E -Run: - Loglevel: INFO - Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ - code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index 1c925651..f18a6fb4 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -17,7 +17,8 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { switch(tolower(agg), "country" = {gg <- "-country"}, - "global" = {gg <- ""}) + "global" = {gg <- ""}, + "region" = {gg <- "-region"}) system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index fd27ba75..95b6856a 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -307,7 +307,7 @@ Skill <- function(recipe, data, agg = 'global') { } info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") # Save outputs - #NURIA: I think change the output_dir is a problem for future savings +#NURIA: I think change the output_dir is a problem for future savings if (recipe$Analysis$Workflow$Skill$save != 'none') { info(recipe$Run$logger, "##### START SAVING SKILL METRIC #####") } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a6b6bd75..1f6e836d 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -210,4 +210,3 @@ Visualization <- function(recipe, } } } - diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 5b81420b..57bece61 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,6 +1,8 @@ check_recipe <- function(recipe) { + # recipe: yaml recipe already read it - ## TODO: set up logger-less case + ## TODO: Adapt to decadal case + info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- @@ -237,9 +239,7 @@ check_recipe <- function(recipe) { # WORKFLOW CHECKS # --------------------------------------------------------------------- - # Calibration - # If 'method' is FALSE/no/'none' or NULL, set to 'raw' - ## TODO: Review this check + # Only one Calibration method allowed: if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || @@ -296,109 +296,6 @@ check_recipe <- function(recipe) { } } } - # Downscaling - ## TODO: Simplify checks (reduce number of lines) - ## TODO: Add saving checks - downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) - # Define accepted entries - DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") - BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") - LR_METHODS <- c("basic", "large_scale", "4nn") - LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") - # Check downscaling type - if ("type" %in% names(downscal_params)) { - if (length(downscal_params$type) == 0) { - downscal_params$type <- "none" - warn(recipe$Run$logger, - paste("Downscaling 'type' is empty in the recipe, setting it to", - "'none'.")) - } - if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { - error(recipe$Run$logger, - paste0("The type of Downscaling request in the recipe is not ", - "available. It must be one of the following: ", - paste(DOWNSCAL_TYPES, collapse = ", "), ".")) - error_status <- T - } - if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && - (length(downscal_params$target_grid) == 0)) { - error(recipe$Run$logger, - paste("A target grid is required for the downscaling method", - "requested in the recipe.")) - error_status <- T - } - if (downscal_params$type == "int") { - if (length(downscal_params$int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) - error_status <- T - } - } else if (downscal_params$type == "intbc") { - if (length(downscal_params$int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'int' was requested in the recipe, but no", - "interpolation method is provided.")) - error_status <- T - } - if (length(downscal_params$bc_method)== 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) - error_status <- T - } else if (!(downscal_params$bc_method %in% BC_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted Bias Correction methods for the downscaling", - " module are: ", paste(BC_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "intlr") { - if (length(downscal_params$int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no interpolation method was provided.")) - error_status <- T - } - if (length(downscal_params$lr_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method was provided.")) - error_status <- T - } else if (!(downscal_params$lr_method %in% LR_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted linear regression methods for the", - " downscaling module are: ", - paste(LR_METHODS, collapse = ", "), ".")) - error_status <- T - } - } else if (downscal_params$type == "analogs") { - if (length(nanalogs) == 0) { - warn(recipe$Run$logger, - paste("Downscaling type is 'analogs, but the number of analogs", - "has not been provided in the recipe.")) - } - } else if (downscal_params$type == "logreg") { - if (length(downscal_params$int_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method was provided.")) - error_status <- T - } - if (length(downscal_params$log_reg_method) == 0) { - error(recipe$Run$logger, - paste("Downscaling type 'logreg' was requested in the recipe,", - "but no logistic regression method is provided.")) - error_status <- T - } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { - error(recipe$Run$logger, - paste0("The accepted logistic regression methods for the ", - "downscaling module are: ", - paste(LOGREG_METHODS, collapse = ", "), ".")) - error_status <- T - } - } - } - # Indices if ("Indices" %in% names(recipe$Analysis$Workflow)) { nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) @@ -459,7 +356,6 @@ check_recipe <- function(recipe) { error_status <- T } } - # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { @@ -484,7 +380,6 @@ check_recipe <- function(recipe) { error_status <- T } } - # Visualization if ("Visualization" %in% names(recipe$Analysis$Workflow)) { PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", @@ -552,7 +447,6 @@ check_recipe <- function(recipe) { # RUN CHECKS # --------------------------------------------------------------------- - ## TODO: These checks should probably go first RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") diff --git a/tools/data_summary.R b/tools/data_summary.R index e3de9711..11f365cd 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -10,8 +10,7 @@ data_summary <- function(data_cube, recipe) { object_name <- deparse(substitute(data_cube)) if (recipe$Analysis$Variables$freq == "monthly_mean") { date_format <- "%b %Y" - } else if ((recipe$Analysis$Variables$freq == "daily_mean") || - (recipe$Analysis$Variables$freq == "daily")) { + } else if (recipe$Analysis$Variables$freq == "daily_mean") { date_format <- "%b %d %Y" } months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) -- GitLab