diff --git a/conf/archive.yml b/conf/archive.yml index eb8e86a574e680085c53ac4c115b426b75e3a559..87e9cd654b6db79b715f67cda07642e4df9ed4c1 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,7 +16,7 @@ archive: "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/"} + "tdps":"_f6h/","hurs":"_f6h/"} nmember: fcst: 51 hcst: 25 @@ -183,9 +183,9 @@ archive: name: "ECMWF CERRA" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/cerra/" - daily_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", + daily_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/"} - monthly_mean: {"hur":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", + monthly_mean: {"hurs":"_f3h-r2631x1113/", "ps":"_f3h-r2631x1113/", "sfcWind":"_f3h-r2631x1113/", "tas":"_f3h-r2631x1113/", "winddir":"_f3h-r2631x1113/","tasmin":"_f24h-r2631x1113/","tasmax":"_f24h-r2631x1113/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/cerra/monthly_mean/tas_f3h-r2631x1113/tas_200506.nc" diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R new file mode 100644 index 0000000000000000000000000000000000000000..3e574e73347b356c555b41999a01e8bc778d1356 --- /dev/null +++ b/modules/Downscaling/Downscaling.R @@ -0,0 +1,276 @@ +### Downscaling Module +source('modules/Downscaling/tmp/Interpolation.R') +source('modules/Downscaling/tmp/Intbc.R') +source('modules/Downscaling/tmp/Intlr.R') +source('modules/Downscaling/tmp/Analogs.R') +source('modules/Downscaling/tmp/LogisticReg.R') +source('modules/Downscaling/tmp/Utils.R') + +## Entry params data and recipe? +downscale_datasets <- function(recipe, data) { + # Function that downscale the hindcast using the method stated in the + # recipe. For the moment, forecast must be null. + # + # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # recipe: object obtained when passing the .yml recipe file to read_yaml() + + type <- tolower(recipe$Analysis$Workflow$Downscaling$type) + + if (!is.null(data$fcst)) { + warning("The downscaling will be only performed to the hindcast data") + data$fcst <- NULL + } + + if (type == "none") { + + hcst_downscal <- data$hcst + DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" + + } else { + # Downscaling function params + int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) + bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) + cal_method <- tolower(recipe$Analysis$Workflow$Downscaling$cal_method) + lr_method <- tolower(recipe$Analysis$Workflow$Downscaling$lr_method) + log_reg_method <- tolower(recipe$Analysis$Workflow$Downscaling$log_reg_method) + target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) + nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) + + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + + #TO DO: add the parametre loocv where it corresponds + if (is.null(recipe$Analysis$loocv)) { + loocv <- TRUE + } else { + loocv <- recipe$Analysis$loocv + } + + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping", "sbc", "cal", "qm") + LR_METHODS <- c("basic", "large_scale", "4nn") + LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + + if (!(type %in% DOWNSCAL_TYPES)) { + stop("Downscaling type in the recipe is not available. Accepted types ", + "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") + } + + if (type == "int") { + if (is.null(int_method)) { + stop("Please provide one interpolation method in the recipe.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + # Ensure that observations are in the same grid as experiments + # Only needed for this method because the others already return the + # observations + latmin <- data$hcst$lat[1] + lonmin <- data$hcst$lon[1] + latmax <- data$hcst$lat[length(data$hcst$lat)] + lonmax <- data$hcst$lon[length(data$hcst$lon)] + hcst_downscal <- CST_Interpolation(data$hcst, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + obs_downscal <- CST_Interpolation(data$obs, + points = NULL, + method_remap = int_method, + target_grid = target_grid, + lat_dim = "latitude", + lon_dim = "longitude", + region = c(lonmin, lonmax, latmin, latmax), + method_point_interp = NULL) + + hcst_downscal$obs <- obs_downscal$exp + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "intbc") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(bc_method)) { + stop("Please provide one bias-correction method in the recipe. Accepted ", + "methods are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', ", + "'qm'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(bc_method %in% BC_METHODS)) { + stop(paste0(bc_method, " method in the recipe is not available. Accepted methods ", + "are 'simple_bias', 'calibration', 'quantile_mapping', 'sbc', 'cal', 'qm'.")) + } + + if (bc_method=="cal" | bc_method=="calibration") + { + if (length(cal_method)==0) { + stop("Please provide one (and only one) calibration method in the recipe.") + } + + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + cal.method=cal_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else + { + hcst_downscal <- CST_Intbc(data$hcst, data$obs, + target_grid = target_grid, + bc_method = bc_method, + int_method = int_method, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + ncores = ncores) + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + } else if (type == "intlr") { + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(lr_method)) { + stop("Please provide one linear regression method in the recipe. Accepted ", + "methods are 'basic', 'large_scale', '4nn'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + if (!(lr_method %in% LR_METHODS)) { + stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", + "are 'basic', 'large_scale', '4nn'.")) + } + + # TO DO: add the possibility to have the element 'pred' in 'data' + if (lr_method == "large_scale") { + if (is.null(data$pred$data)) { + stop("Please provide the large scale predictors in the element 'data$pred$data'.") + } + } else { + data$pred$data <- NULL + } + + hcst_downscal <- CST_Intlr(data$hcst, data$obs, + lr_method = lr_method, + target_grid = target_grid, + points = NULL, + int_method = int_method, + method_point_interp = NULL, + predictors = data$pred$data, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + large_scale_predictor_dimname = 'vars', + loocv = loocv, + region = NULL, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "analogs") { + + if (is.null(nanalogs)) { + warning("The number of analogs for searching has not been provided in the ", + "recipe. Setting it to 3.") + nanalogs <- 3 + } + + hcst_downscal <- CST_Analogs(data$hcst, data$obs, + grid_exp = data$hcst$source_files[1], + nanalogs = nanalogs, + fun_analog = "min", + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + time_dim = "time", + member_dim = "ensemble", + region = NULL, + return_indices = FALSE, + loocv_window = loocv, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } else if (type == "logreg") { + + if (length(int_method) == 0) { + stop("Please provide one (and only one) interpolation method in the recipe.") + } + + if (is.null(log_reg_method)) { + stop("Please provide one logistic regression method in the recipe. Accepted ", + "methods are 'ens_mean', 'ens_mean_sd', 'sorted_members'.") + } + + if (is.null(target_grid)) { + stop("Please provide the target grid in the recipe.") + } + + # Since we are forcing to create three categories, and applying cross-validation, + # we need at least six years of data for the logistic regression function to not + # crash + if (dim(data$hcst$data)[names(dim(data$hcst$data)) == "syear"] <= 5) { + stop("The number of start dates is insufficient for the logisitic regression method. ", + "Please provide six or more.") + } + + if (!(log_reg_method %in% LOG_REG_METHODS)) { + stop(paste0(log_reg_method, " method in the recipe is not available. Accepted methods ", + "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) + } + + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, + target_grid = target_grid, + int_method = int_method, + log_reg_method = log_reg_method, + probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, + points = NULL, + method_point_interp = NULL, + lat_dim = "latitude", + lon_dim = "longitude", + sdate_dim = "syear", + member_dim = "ensemble", + region = NULL, + loocv = loocv, + ncores = ncores) + + DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" + } + } + print(DOWNSCAL_MSG) + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) +} + + diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R new file mode 100644 index 0000000000000000000000000000000000000000..fd6e2f9776259d87745b7aeaaaf5554d0e3907cf --- /dev/null +++ b/modules/Downscaling/tmp/Analogs.R @@ -0,0 +1,537 @@ +#'@rdname CST_Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an 's2dv' object with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv' object with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. +#'The object is expected to be already subset for the desired region. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'@export +CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", + lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", + region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = 1) { + + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, grid_exp = grid_exp, + nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, + region = region, return_indices = return_indices, loocv_window = loocv_window, + ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude, start date and time. The object +#'is expected to be already subset for the desired region. Optionally, 'obs' can have the +#'dimension 'window', containing the sampled fields into which the function will look for +#'the analogs. See function 'generate_window()'. Otherwise, the function will look for +#'analogs using all the possible fields contained in obs. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. +#'@param nanalogs an integer indicating the number of analogs to be searched. +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. The indices refer to the position of the +#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will +#'refer to the position of the element in the dimension 'window'. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@import multiApply +#'@import CSTools +#'@importFrom s2dv InsertDim CDORemap +#'@importFrom FNN get.knnx +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') +#'@export +Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs2 = NULL, + obs2_lats = NULL, obs2_lons = NULL, nanalogs = 3, fun_analog = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, return_indices = FALSE, + loocv_window = TRUE, ncores = 1) { + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(grid_exp, 'character')) { + stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + + if (!inherits(nanalogs, 'numeric')) { + stop("Parameter 'nanalogs' must be of the class 'numeric'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(time_dim, 'character')) { + stop("Parameter 'time_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) { + stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'time_dim'") + } + + # Ensure we have enough data to interpolate from high-res to coarse grid + #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | + # (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { + + # stop("There are not enough data in 'obs'. Please to add more latitudes or ", + # "longitudes.") + #} + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (!is.null(obs2)) { + # the code is not yet prepared to handle members in the observations + if (member_dim %in% names(dim(obs2))) { + if (identical(as.numeric(dim(obs2)[member_dim]), 1)) { + obs2 <- ClimProjDiags::Subset(x = obs2, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs2' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (is.null(obs2_lats) | is.null(obs2_lons)) { + stop("Missing latitudes and/or longitudes for the provided training observations. Please ", + "provide them with the parametres 'obs2_lats' and 'obs2_lons'") + } + + if (is.na(match(lon_dim, names(dim(obs2))))) { + stop("Missing longitude dimension in 'obs2', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(obs2))))) { + stop("Missing latitude dimension in 'obs2', or does not match the parameter 'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(obs2))))) { + stop("Missing start date dimension in 'obs2', or does not match the parameter 'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(obs2))))) { + stop("Missing time dimension in 'obs2', or does not match the parameter 'time_dim'") + } + } + + # Select a function to apply to the analogs selected for a given observation + if (!is.null(fun_analog)) { + stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + } + + if (!is.null(obs2)) { + obs_train <- obs2 + obs_train_lats <- obs2_lats + obs_train_lons <- obs2_lons + } else { + obs_train <- obs + obs_train_lats <- obs_lats + obs_train_lons <- obs_lons + } + + # Correct indices later if cross-validation + loocv_correction <- FALSE + if ( !("window" %in% names(dim(obs_train))) & loocv_window) { + loocv_correction <- TRUE + } + + #----------------------------------- + # Interpolate high-res observations to the coarse grid + #----------------------------------- + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and ", + "'exp_lons'.") + region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) + } + + obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, + target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = "conservative", region = region) + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + region = region)$data + } else { + exp_interpolated <- exp + } + + # Create window if user does not have it in the training observations + if ( !("window" %in% names(dim(obs_interpolated$data))) ) { + obs_train_interpolated <- generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, + time_dim = time_dim, loocv = loocv_window) + obs_hres <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) + } + + #----------------------------------- + # Reshape train and test + #----------------------------------- + res.data <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog), ncores = ncores)$output1 + + # Return the indices of the best analogs + if (return_indices) { + res.ind <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 + + # If cross-validation has been applied, correct the indices + if (loocv_correction) { + nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] + ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] + res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) + sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), + output_dims = c("ind", sdate_dim))$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats) + } + else { + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) + } + + return(res) +} + +# For each element in test, find the indices of the k nearest neigbhors in train +.analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { + + require(FNN) + + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) + # test dim: 2 dimensions lat and lon (in this order) + # Number of lats/lons of the high-resolution data + space_dims_hres <- dim(obs_hres)[c(2,3)] + + # Reformat train and test as an array with (time, points) + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + test <- as.vector(test) + obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" + + # Identify and remove NA's + idx_na_tr <- is.na(train[ , 1]) + idx_na_te <- is.na(test) + idx_na <- idx_na_tr | idx_na_te + tr_wo_na <- t(train[!idx_na , ]) + te_wo_na <- test[!idx_na] + te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") + + knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) + + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + + # Either return only the indices or the analogs + if (return_indices) { + res <- as.numeric(idx) + } else { + res <- obs_hres[ , idx] + dim(res) <- c(space_dims_hres, analogs = k) + + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + weight <- 1 / dist + res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else { + res <- apply(res, c(1,2), fun_analog) + } + } + } + + return(res) +} + +# Add the dimension window to an array that contains, at least, the start date and time +# dimensions +# object has at least dimensions sdate and time +generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { + + rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] + ntimes <- dim(obj)[names(dim(obj)) == time_dim] + rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] + + # Generate a window containing all the data + if (is.null(size)) { + + # Generate window removing one start date each time (leave-one-out cross-validation) + if (loocv) { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), + output_dims = c('window', sdate_dim))$output1 + # Generate window without cross-validation + } else { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = 'window')$output1 + } + } + # Generate window of the size specified by the user. Only applied with CV + else { + # For an accurate generation of the window, it is mandatory to add some "extra" data. + if (!("smonth" %in% names(dim(obj)))) { + stop("Missing 'smonth' dimension") + } + + # Concatenate data from previous, target and posterior months + obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), + fun = as.vector, output_dims = "time")$output1 + + if (loocv) { + obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), + fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), + output_dims = 'window')$output1 + names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) + } else { + obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), + output_dims = c('window', time_dim))$output1 + + } + } + + return(obj_window) +} + + + + + + + + diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R new file mode 100644 index 0000000000000000000000000000000000000000..5a5275bf5f2b4f0824343859dbfc6ecc6069fc92 --- /dev/null +++ b/modules/Downscaling/tmp/Intbc.R @@ -0,0 +1,325 @@ +#'@rdname CST_Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export + +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1,...) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], + method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores,...) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) + +} + +#'@rdname Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import CSTools +#' +#'@seealso \code{\link[CSTools]{BiasCorrection}} +#'@seealso \code{\link[CSTools]{Calibration}} +#'@seealso \code{\link[CSTools]{QuantileMapping}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = 1, ...) { + + if (!inherits(bc_method, 'character')) { + stop("Parameter 'bc_method' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") + } + + if (!(bc_method %in% c('sbc', 'cal', 'qm', 'dbc', 'simple_bias', 'calibration', + 'quantile_mapping', 'dynamical_bias'))) { + stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", + "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", + "'sbc', 'cal', 'qm' can also be used.") + } + + if (!is.null(points) & is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # Some functions only accept the dimension names "member" and "sdate" for exp and + # "sdate" for obs + #if (member_dim != 'member') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == member_dim), 'member') + #} + + #if (sdate_dim != 'sdate') { + # names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + # which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') + # names(dim(obs_ref)) <- replace(names(dim(obs_ref)), + # which(names(dim(obs_ref)) == sdate_dim), 'sdate') + #} + + if (bc_method == 'sbc' | bc_method == 'simple_bias') { + if (dim(obs_ref)[sdate_dim] == 1) { + warning('Simple Bias Correction should not be used with only one observation. Returning NA.') + } + + res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'cal' | bc_method == 'calibration') { + if (dim(exp_interpolated$data)[member_dim] == 1) { + stop('Calibration must not be used with only one ensemble member.') + } + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, + memb_dim = member_dim, sdate_dim = sdate_dim, ...) + } + else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # the temporal dimension must be only one dimension called "time" + if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { + exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = "time")$output1 + } + if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { + obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, + output_dims = "time")$output1 + } + # REMEMBER to add na.rm = T in colMeans in .proxiesattractor + res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) + } + + # Return a list of three elements + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + + + + diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R new file mode 100644 index 0000000000000000000000000000000000000000..e594691fc148cefdcba35c92044d5539e698de2a --- /dev/null +++ b/modules/Downscaling/tmp/Interpolation.R @@ -0,0 +1,739 @@ +#'@rdname CST_Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp s2dv object containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The field data is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An s2dv object containing the dowscaled field. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, + lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + #if (is.null(exp[[lat_dim]]) | is.null(exp[[lon_dim]])) { + # stop("The name of the latitude/longitude elements in 'exp' must match the parametres ", + # "'lat_dim' and 'lon_dim'") + #} + + if ((length(which(names(dim(exp$data)) == lat_dim)) == 0) | (length(which(names(dim(exp$data)) == lon_dim)) == 0)) { + stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") + } + + res <- Interpolation(exp = exp$data, lats = exp$lat, lons = exp$lon, + source_file = exp$source_files[1], points = points, + method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, + lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + res_s2dv <- list(exp = exp, obs = NULL) + return(res_s2dv) +} + +#'@rdname Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The object is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param lats a numeric vector containing the latitude values. Latitudes must range from +#'-90 to 90. +#'@param lons a numeric vector containing the longitude values. Longitudes can range from +#'-180 to 180 or from 0 to 360. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling +#'is to a point location. +#'@import multiApply +#'@import plyr +#'@importFrom s2dv CDORemap +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'res <- Interpolation(exp = exp, lats = lats, lons = lons, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, + target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!is.null(method_remap)) { + if (!inherits(method_remap, 'character')) { + stop("Parameter 'method_remap' must be of the class 'character'") + } + } + + if (!is.null(method_point_interp)) { + if (!inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") + } + + # Check for negative latitudes in the exp data + if (any(lats < -90 | lats > 90) ) { + stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") + } + + # checkings for the case of interpolation to point locations + if (!is.null(points)) { + if (!inherits(points, 'list')) { + stop("Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes.") + } + + if (is.null(method_point_interp)) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + if (!(method_point_interp %in% c('nearest', 'bilinear', '9point', 'invdist4nn', 'NE', 'NW', 'SE', 'SW'))) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + # Points must be a list of two elements + if (length(points) != 2) { + stop("'points' must be a lis of two elements containing the point ", + "latitudes and longitudes in the form 'points$lat', 'points$lon'") + } + + # The names of the two elements must be 'lat' and 'lon' + if (any(!(c(lat_dim, lon_dim) %in% names(points)))) { + stop("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'") + } + + # Check that the number of latitudes and longitudes match + if (length(unique(lengths(points))) != 1L) { + stop("The number of latitudes and longitudes must match") + } + + # Check for negative latitudes in the point coordinates + if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { + stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", + "-90 to 90") + } + + if (is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + } else { + if (is.null(method_remap)) { + stop("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(target_grid)) { + stop("Parameter 'target_grid' can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + } + + #---------------------------------- + # Limits of the region defined by the model data + #---------------------------------- + # for the case when region limits are not passed by the user + # regions contains the following elements in order: lonmin, lonmax, latmin, latmax + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'lats' and 'lons'.") + region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) + } + + # Ensure points to be within the region limits + if (!is.null(points)) { + if (any(points[[lat_dim]] > region[4]) | any(points[[lat_dim]] < region[3]) | + any(points[[lon_dim]] > region[2]) | any(points[[lon_dim]] < region[1])) { + stop("At least one of the points lies outside the model region") + } + } + + #---------------------------------- + # Map regrid with CDO + #---------------------------------- + if (is.null(points)) { + res <- CDORemap(data_array = exp, + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) + + # Return a list + res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) + + #---------------------------------- + # Interpolate to point locations + #---------------------------------- + } else { + # First create interpolation weights, depending on the chosen method + weights <- create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), + lats = points[[lat_dim]], lons = points[[lon_dim]], + method = method_point_interp, region = list(lat_min = region[3], + lat_max = region[4], lon_min = region[1], lon_max = region[2])) + + # Select coarse-scale data to be interpolated + model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp) + + # Interpolate model data to point locations + res <- interpolate_data(model_data_gridpoints, weights) + + # Return a list + res <- list(data = res, obs = NULL, lon = points[[lon_dim]], lat = points[[lat_dim]]) + } + + return(res) +} + +#====================== +# Compute weights for interpolation at several (lat,lon) positions +# We assume that grid boxes are centered in the grid point. +#====================== +create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, + method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", + "NW", "SE", "SW")) +{ + # crop the region to get the correct weights - save temporary file + nc_cropped1 <- paste0('tmp_cropped_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + nc_cropped2 <- paste0('tmp_cropped2_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + + system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, + ',', region$lat_max, ' ', ncfile, ' ', nc_cropped1)) + + #---------------- + # Read grid description and compute (i,j) of requested locations (including decimals) + #---------------- + griddes <- get_griddes(nc_cropped1) + + if (is.null(griddes$yinc)) { + system(paste0('rm ', nc_cropped1)) + stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", + "downscaling to point locations.") + } + + # If latitudes are decreasingly ordered, revert them + if (griddes$yinc < 0) { + system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) + griddes <- get_griddes(nc_cropped2) + } + # remove temporary files + system(paste0('rm ', nc_cropped1)) + system(paste0('rm ', nc_cropped2)) + + if (is.null(griddes)) { + stop("'griddes' not found in the NetCDF source files") + } + + gridpoints <- latlon2ij(griddes, lats, lons) + + #---------------- + # Compute the weights according to the selected method + #---------------- + if(method == "nearest") { + #---------------- + # Round i and j to closest integer. Weight is always 1. + #---------------- + + # | | | + # -+-----+-----+- + # | x| | + # | a | | + # | | | + # -+-----+-----+- + # | | | + + centeri <- round(gridpoints$i,0) + centeri[centeri == griddes$xsize+1] <- 1 # close longitudes + + weights.df <- data.frame(locid = locids, + lat = lats, + lon = lons, + rawi = gridpoints$i, + rawj = gridpoints$j, + i = centeri, + j = round(gridpoints$j, 0), + weight = 1) + } else if (method %in% c("bilinear","invdist4nn")) { + #---------------- + # Get the (i,j) coordinates of the 4 points (a,b,c,d) around x. + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + lowi <- floor(gridpoints$i) + highi <- ceiling(gridpoints$i) + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- floor(gridpoints$j) + highj <- ceiling(gridpoints$j) + # Note: highi and lowi are the same if i is integer + # Note: highj and lowj are the same if j is integer + + #---------------- + # Get x position wrt ad and ab axes (from 0 to 1) + #---------------- + pcti <- gridpoints$i - lowi + pctj <- gridpoints$j - lowj + + #---------------- + # Compute weights for a,b,c,d grid points + #---------------- + if(method == "bilinear") { + wa = (1 - pcti) * (1 - pctj) + wb = (1 - pcti) * pctj + wc = pcti * pctj + wd = pcti * (1 - pctj) + } else if(method == "invdist4nn") { + #---------------- + # Note: the distance is computed in the (i,j) space. + # Note2: this method does not guarantees a continuous interpolation. + # Use bilinear if that's desirable. + # When x is on the ab line, c and d would be used. In the limit of x + # being just left of ab other points would be used. + # Here we just dropped c and d coeffs when over ab. Same for ad line, + # b and c coeffs dropped. This prevents repeated nodes. + #---------------- + ida = 1 / sqrt(pcti^2 + pctj^2) + idb = 1 / sqrt(pcti^2 + (1 - pctj)^2) + idc = 1 / sqrt((1-pcti)^2 + (1-pctj)^2) + idd = 1 / sqrt((1-pcti)^2 + pctj^2) + idb[pctj == 0] <- 0; + idc[pctj == 0] <- 0; + idc[pcti == 0] <- 0; + idd[pcti == 0] <- 0; + + #---------------- + # Normalize vector of inverse distances + #---------------- + invdist <- cbind(ida, idb, idc, idd) + print(invdist) + w <- t(apply(invdist, 1, function(x) { print(x); if(any(is.infinite(x))) { + x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) + print(w) + + wa = w[ , 1] + wb = w[ , 2] + wc = w[ , 3] + wd = w[ , 4] + } + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats,lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = wa) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = wb) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = wc) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = wd) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df) + } else if(method == "9point") { + #---------------- + # Get the (i,j) coordinates of the 9 points (a,b,...,i) around x + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | | + #-+-----+-----+-----+- + # | | | | + # | c | f | i | + # | | | | + #-+-----+-----+-----+- + # | | x| | + # | b | e | h | + # | | | | + #-+-----+-----+-----+- + # | | | | + # | a | d | g | + # | | | | + #-+-----+-----+-----+- + # | | | | + + centeri <- round(gridpoints$i, 0) + centeri[centeri == griddes$xsize + 1] <- 1 + centerj <- round(gridpoints$j, 0) + lowi <- centeri - 1 + highi <- centeri + 1 + lowi[lowi == 0] <- griddes$xsize # close the longitudes + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- centerj - 1 + highj <- centerj + 1 + + #---------------- + # For the north and south pole do a 6-point average + #---------------- + w_highj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,0 ,1/9)) + w_centerj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,1/6,1/9)) + w_lowj <- ifelse(centerj == 1,0 ,ifelse(centerj == griddes$ysize,1/6,1/9)) + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = w_lowj) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = centerj, weight = w_centerj) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = w_highj) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = lowj, weight = w_lowj) + weightse.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = centerj, weight = w_centerj) + weightsf.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = highj, weight = w_highj) + weightsg.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = w_lowj) + weightsh.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = centerj, weight = w_centerj) + weightsi.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = w_highj) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df, weightse.df, + weightsf.df, weightsg.df, weightsh.df, weightsi.df) + } else if(method %in% c("NE", "NW", "SW", "SE")) { + #---------------- + # Find if increasing i and j increases or decreases lat and lon + #---------------- + westtoeast <- (griddes$xinc > 0) + southtonorth <- T + if(griddes$gridtype == "gaussian") { + # We assume gaussian grid latitudes are ordered north to south + southtonorth <- F + } else { #lonlat + if(griddes$yinc < 0) {southtonorth <- F} + } + + #---------------- + # Get the (i,j) coordinates of the desired point (a,b,c or d) around x + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + if(substr(method,1,1) == "N" & southtonorth == T) { selj <- ceiling(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == T) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "N" & southtonorth == F) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == F) { selj <- ceiling(gridpoints$j) } + + if(substr(method,2,2) == "E" & westtoeast == T) {seli <- ceiling(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == T) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "E" & westtoeast == F) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == F) {seli <- ceiling(gridpoints$i) } + + seli[seli == griddes$xsize + 1] <- 1 # close the longitudes + + weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = seli, j = selj, weight = 1) + } else { + stop(paste0("Method " ,method, " not implemented")) + } + + #---------------- + # Order by locid and remove lines with 0 weight + # This also removes some duplicates in the bilinear/invdist4nn methods when i + # or j is a whole number, or in the 9-point method when at the poles. + #---------------- + weights.df <- weights.df[order(weights.df$locid), ] + weights.df <- weights.df[weights.df$weight != 0, ] + + #---------------- + # Add as attributes the method and the nc file used to compute the weights + #---------------- + attributes(weights.df)$nc_file <- normalizePath(ncfile) + attributes(weights.df)$method <- method + + return(weights.df) +} + +#====================== +# Compute (i,j) from (lat,lon). +# Works only for 'lonlat' and 'gaussian' grids. +# Grids are supposed to cover whole globe. +#====================== +latlon2ij <- function(griddes, lats, lons) { + #------------ + # Check input params + #------------ + if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} + if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} + if((griddes$xfirst > 180) & (any(lons < 0))) { + stop("Please use the same convention for the longitudes in the source file and the ", + "longitude values in 'points'.") + } + #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} + # no need to resize lons to [0,360) + + #------------ + # Compute i (with decimals) + # i lies in [1,xsize+1) + # %% gives the remainder + #------------ + gridpoints <- list() + gridpoints$i <- 1 + (((lons - griddes$xfirst) / griddes$xinc) %% griddes$xsize) + + #------------ + # Compute j (with decimals) + #------------ + if(griddes$gridtype=='lonlat') { + gridpoints$j <- 1 + (lats - griddes$yfirst) / griddes$yinc + } else if(griddes$gridtype == 'gaussian') { + # We assume gaussian grid latitudes are ordered north to south + # findInterval can only work with monotonic ascending values so we revert twice + northj <- griddes$ysize-findInterval(lats, -griddes$yvals) + southj <- northj + 1 + + # Special case: We are north of the first lat + gridpoints$j[northj == 0] <- 1 + + # Special case: We are south of the last lat + gridpoints$j[southj == griddes$ysize + 1] <- griddes$ysize + + # Generic case + ok_idx <- !(northj == 0 | southj == griddes$ysize+1) + gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - + lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) + } else { stop("Unsupported grid") } + + return(gridpoints) +} + +#====================== +# Use cdo griddes to obtain grid information +#====================== +get_griddes <- function(ncfile) { + tmp <- system(paste0("cdo griddes ", ncfile, + " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) + arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) + griddes <- as.list(arr[,2]) + names(griddes) <- arr[,1] + + if(griddes$gridtype == "gaussian") { + griddes$yvals <- get_lats(ncfile) + } + + # Convert some fields to numeric. Ensures all fields are present. + for(nm in c("xsize", "ysize", "xfirst", "yfirst", "xinc", "yinc")) { + griddes[[nm]] <- ifelse(is.null(griddes[[nm]]), NA, as.numeric(griddes[[nm]])) + } + + return(griddes) +} + +#====================== +# Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". +#====================== +get_lats <- function(ncfile) { + + tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) + + if(!is.null(attributes(tmp)$status)) { + tmp <- system(paste0('ncks -H -s "%f " -v lat ',ncfile),intern=T) + } + + lats <- as.numeric(strsplit(tmp[1],"\\s+",perl=T)[[1]]) + + return(lats) +} + +#====================== +# Load model data at all (i,j) pairs listed in the weights dataframe. +# Uses StartR. All ... parameters go to Start (i.e. specify dat, var, +# sdate, time, ensemble, num_procs, etc) +#====================== +get_model_data <- function(weights.df, mdata) { + + #----------------- + # Get data for all combinations of i and j. + # (inefficient, getting many unneded pairs). + # Avoid retrieving duplicates with unique() + # These are the indices of the global grid + #----------------- + is <- weights.df$i + js <- weights.df$j + + #----------------- + # Get indices of original is and js in unique(is),unique(js) that were requested + #----------------- + idxi <- match(is, unique(is)) + idxj <- match(js, unique(js)) + + #----------------- + # Subsample mdata to keep only the needed (i,j) pairs. + #----------------- + if (is.na(match("longitude", names(dim(mdata))))) { + londim <- match("lon", names(dim(mdata))) + } else { + londim <- match("longitude", names(dim(mdata))) + } + if (is.na(match("latitude", names(dim(mdata))))) { + latdim <- match("lat", names(dim(mdata))) + } else { + latdim <- match("latitude", names(dim(mdata))) + } + + # trick: exchange idxi and idxj + #if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } + #keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] + + #sub_mdata <- apply(mdata, keepdims, function(x) { + # laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + #names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Retrieve with multiApply + #----------------- + sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 + names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Return an array that contains as many gridpoints as (i,j) pairs were requested + #----------------- + return(sub_mdata) +} + +#====================== +# Multiply the grid-point series by the weights, +# to obtain the desired interpolations +#====================== +interpolate_data <- function(model_data, weights.df) { + #----------------- + # Multiply each gridpoint matrix by its corresponding weight + #----------------- + gpdim <- match("gridpoint", names(dim(model_data))) + weighted_data <- sweep(model_data, gpdim, weights.df$weight, "*") + + #----------------- + # Sum all series that belong to same interpolation point + # Return an array that contains the requested locations and interpolation type + #----------------- + #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) + #names(dim(interp_data))[1] <- "location" + interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { + rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"))$output1 + + return(interp_data) +} + + diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R new file mode 100644 index 0000000000000000000000000000000000000000..565e30468aeb0b5fcc7fe6eabc8c0d18bb7dff9a --- /dev/null +++ b/modules/Downscaling/tmp/Intlr.R @@ -0,0 +1,525 @@ +#'@rdname CST_Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, + method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", member_dim = "member", + large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, points = points, source_file_exp = exp$source_files[1], + source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, + int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, + member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, + loocv = loocv, region = region, ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude and start date. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, + ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(lr_method, 'character')) { + stop("Parameter 'lr_method' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (!is.null(points) & (is.null(source_file_exp) | is.null(source_file_obs))) { + stop("No source files found. Source files for exp and obs must be provided in the parameters ", + "'source_file_exp' and 'source_file_obs', respectively.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + # sdate must be the time dimension in the input data + stopifnot(sdate_dim %in% names(dim(exp))) + stopifnot(sdate_dim %in% names(dim(obs))) + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + restore_ens <- TRUE + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + # checkings for the parametre 'predictors' + if (!is.null(predictors)) { + if (!is.array(predictors)) { + stop("Parameter 'predictors' must be of the class 'array'") + } else { + # ensure the predictor variable name matches the parametre large_scale_predictor_dimname + stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) + stopifnot(sdate_dim %in% names(dim(predictors))) + stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) + } + } + + #----------------------------------- + # Interpolation + #----------------------------------- + if (lr_method != '4nn') { + + if (is.null(int_method)) { + stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", + "Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + obs_interpolated <- obs_interpolated$data + } else { + obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons + } + } + + #----------------------------------- + # Linear regressions + #----------------------------------- + # Pointwise linear regression + # Predictor: model data + # Predictand: observations + if (lr_method == 'basic') { + predictor <- exp_interpolated$data + predictand <- obs_interpolated + + target_dims_predictor <- sdate_dim + target_dims_predictand <- sdate_dim + } + + # (Multi) linear regression with large-scale predictors + # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations + # Predictand: model data + else if (lr_method == 'large-scale') { + if (is.null(predictors)) { + stop("The large-scale predictors must be passed through the parametre 'predictors'") + } + + predictand <- obs_interpolated + predictor <- predictors + + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) + target_dims_predictand <- sdate_dim + } + + # Multi-linear regression with the four nearest neighbours + # Predictors: model data + # Predictand: observations + else if (lr_method == '4nn') { + predictor <- find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) + + if (is.null(points)) { + if (!is.null(target_grid)) { + warning("Interpolating to the 'obs' grid") + } + predictand <- obs + + lats <- obs_lats + lons <- obs_lons + } + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + # interpolate to point locations + else { + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + lats <- predictor$lat + lons <- predictor$lon + predictor <- predictor$data + predictand <- predictand$data + } + + target_dims_predictor <- c(sdate_dim,'nn') + target_dims_predictand <- sdate_dim + } + + else { + stop(paste0(lr_method, " method is not implemented yet")) + } + + # Apply the linear regressions + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + + names(dim(res))[1] <- sdate_dim + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) + } + + # Return a list of three elements + res <- list(data = res, obs = predictand, lon = lons, lat = lats) + + return(res) +} + +#----------------------------------- +# Atomic function to generate and apply the linear regressions +#----------------------------------- +.intlr <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- train_lm(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + } + + return(res) + +} + +#----------------------------------- +# Function to generate the linear regressions. +# Returns a list +#----------------------------------- +train_lm <- function(df, loocv) { + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) lm(df[-j,], formula = y ~ .)) + + } else { + + lm1 <- list(lm(data = df, formula = y ~ .)) + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the linear regressions. +#----------------------------------- +pred_lm <- function(df, lm1, loocv) { + + if (loocv) { + + pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,])) + + } else { + + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + } + + return(pred_vals) +} + +#----------------------------------- +# Function to find N nearest neighbours. +# 'coar' is an array with named dimensions +#----------------------------------- +find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4) { + + # Sort the distances from closest to furthest + idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) + idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) + + names(dim(idx_lat)) <- c('nn', lat_dim) + names(dim(idx_lon)) <- c('nn', lon_dim) + + # obtain the values of the nearest neighbours + nearest <- Apply(list(coar, idx_lat, idx_lon), + target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), + fun = function(x, y, z) x[y, z])$output1 + + return(nearest) +} + + diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R new file mode 100644 index 0000000000000000000000000000000000000000..24be693603320058f1da85b60d9e1fc9c56d694b --- /dev/null +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -0,0 +1,497 @@ +#'@rdname CST_LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. The +#'object is expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export +CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", + probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + member_dim = "member", region = NULL, loocv = FALSE, ncores = 1) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, + int_method = int_method, log_reg_method = log_reg_method, points = points, + method_point_interp = method_point_interp, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + source_file = exp$source_files[1], region = region, loocv = loocv, + ncores = ncores) + + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$lon <- res$lon + exp$lat <- res$lat + + obs$data <- res$obs + obs$lat <- res$lat + obs$lon <- res$lon + + res_s2dv <- list(exp = exp, obs = obs) + return(res_s2dv) +} + +#'@rdname LogisticReg +#'@title Downscaling using interpolation and logistic regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a logistic +#'regression. See \code{\link[nnet]{multinom}} for further details. It is recommended that +#'the observations are passed already in the target grid. Otherwise, the function will also +#'perform an interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to be of +#'the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param log_reg_method a character vector indicating the logistic regression method to be used. +#'Accepted methods are "ens_mean", "ens_mean_sd", "sorted_members". "ens_mean" uses the ensemble +#'mean anomalies as predictors in the logistic regression, "ens_mean_sd" uses the ensemble +#'mean anomalies and the ensemble spread (computed as the standard deviation of all the members) +#'as predictors in the logistic regression, and "sorted_members" considers all the members +#'ordered decreasingly as predictors in the logistic regression. Default method is "ens_mean". +#'@param probs_cat a numeric vector indicating the percentile thresholds separating the +#'climatological distribution into different classes (categories). Default to c(1/3, 2/3). See +#'\code{\link[easyVerification]{convert2prob}}. +#'@param return_most_likely_cat if TRUE, the function returns the most likely category. If +#'FALSE, the function returns the probabilities for each category. Default to FALSE. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param loocv a logical vector indicating whether to perform leave-one-out cross-validation +#'in the fitting of the logistic regression. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#'@import nnet +#'@importFrom laply plyr +#' +#'@seealso \code{\link[nnet]{multinom}} +#' +#'@return An list of three elements. 'data' contains the dowscaled data, that could be either +#'in the form of probabilities for each category or the most likely category. 'lat' contains the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(1500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 15) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(2700) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- LogisticReg(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', +#'probs_cat = c(1/3, 2/3)) +#'@export +LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, + int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", + source_file = NULL, region = NULL, loocv = FALSE, ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(target_grid, 'character')) { + stop("Parameter 'target_grid' must be of the class 'character'") + } + + if (!is.null(int_method) & !inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!is.null(method_point_interp) & !inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + if (!is.null(source_file) & !inherits(source_file, 'character')) { + stop("Parameter 'source_file' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'member_dim'") + } + + if (!is.null(points) & (is.null(source_file))) { + stop("No source files found. One source file for exp must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # compute ensemble mean anomalies + if (log_reg_method == "ens_mean") { + predictor <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim) + + target_dims_predictor <- sdate_dim + } + else if (log_reg_method == "ens_mean_sd") { + ens_mean_anom <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, + sdate_dim = sdate_dim) + ens_sd <- get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim) + + #merge two arrays into one array of predictors + predictor <- abind(ens_mean_anom, ens_sd, along = 1/2) + names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) + + target_dims_predictor <- c(sdate_dim, "pred") + } else if (log_reg_method == "sorted_members") { + predictor <- sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim) + + target_dims_predictor <- c(sdate_dim, member_dim) + } else { + stop(paste0(log_reg_method, " not recognised or not implemented.")) + } + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # convert observations to categorical predictands + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + terc <- convert2prob(as.vector(x), prob = probs_cat) + apply(terc, 1, function(r) which (r == 1))}, + output_dims = sdate_dim)$output1 + + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv), output_dims = c(sdate_dim, "category"))$output1 + + if (return_most_likely_cat) { + res <- Apply(res, target_dims = c(sdate_dim, "category"), most_likely_category, + output_dims = sdate_dim)$output1 + } + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + +most_likely_category <- function(data) { +# data, expected dims: start date, category (in this order) + + if (all(is.na(data))) { + mlc <- rep(NA, nrow(data)) + } else { + mlc <- apply(data, 1, which.max) + } + return(mlc) +} + +sort_members <- function(obj_ens, member_dim) { + + sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE)$output1 + + return(sorted) +} + +get_ens_sd <- function(obj_ens, member_dim) { + + # compute ensemble spread + ens_sd <- Apply(obj_ens, target_dims = member_dim, sd, na.rm = TRUE)$output1 + + return(ens_sd) +} + +get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { + + require(s2dv) + + # compute climatology + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean)$output1 + + # compute ensemble mean + ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE)$output1 + + # compute ensemble mean anomalies + anom <- Ano(ens_mean, clim) + + return(anom) +} + +# atomic functions for logistic regressions +.log_reg <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- matrix(NA, nrow = n, ncol = length(unique(tmp_df$y))) + + } else { + # training + lm1 <- train_lr(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + } + return(res) +} + +#----------------------------------- +# Function to train the logistic regressions. +#----------------------------------- +train_lr <- function(df, loocv) { + + require(nnet) + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ])) + + } else { + + lm1 <- list(multinom(y ~ ., data = df)) + + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the logistic regressions. +#----------------------------------- +pred_lr <- function(df, lm1, loocv) { + + require(plyr) + + if (loocv) { + + # The error: "Error: Results must have the same dimensions." can + # appear when the number of sdates is insufficient + pred_vals_ls <- list() + for (j in 1:nrow(df)) { + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") + } + + pred_vals <- laply(pred_vals_ls, .fun = as.array) + + } else { + + # type = class, probs + #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") + #pred_vals <- unlist(pred_vals_ls) + pred_vals <- predict(lm1[[1]], df, type = "probs") + } + + return(pred_vals) +} + + diff --git a/modules/Downscaling/tmp/Utils.R b/modules/Downscaling/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..4c7274652f1876cc84777174f7736258895c6fa6 --- /dev/null +++ b/modules/Downscaling/tmp/Utils.R @@ -0,0 +1,31 @@ +.check_coords <- function(lat1, lon1, lat2, lon2) { + match <- TRUE + if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { + match <- FALSE + } + return(match) +} + +# reorder dims to a reference array. If they do not exist, they are created +# example +#arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) +#arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) + +.reorder_dims <- function(arr_ref, arr_to_reorder) { + + miss_dims <- names(dim(arr_ref))[!(names(dim(arr_ref)) %in% names(dim(arr_to_reorder)))] + + if (length(miss_dims) != 0) { + for (m in seq(miss_dims)) { + arr_to_reorder <- InsertDim(data = arr_to_reorder, posdim = length(dim(arr_to_reorder)) + 1, lendim = 1, + name = miss_dims[m]) + } + } + + # TODO: add code to reorder dimensions and put the non-common dimensions at the end + + orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) + return(Reorder(data = arr_to_reorder, order = orddim)) +} + + diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml index 31ae079d2dfe76c600ecef3083db5943e5f2ae20..fbb1ffe4fadc47744097674884b88e6a80fe3289 100644 --- a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -43,5 +43,6 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/out-logs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ + diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml new file mode 100644 index 0000000000000000000000000000000000000000..99e02c7dc6c54cd0f804eb70d6f96aea926fb319 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml @@ -0,0 +1,62 @@ +# ▄████▄ ██████ ▓█████▄ ▒█████ █ █░███▄ █ ██████ ▄████▄ ▄▄▄ ██▓ ▓█████ +#▒██▀ ▀█ ▒██ ▒ ▒██▀ ██▌▒██▒ ██▒▓█░ █ ░█░██ ▀█ █ ▒██ ▒ ▒██▀ ▀█ ▒████▄ ▓██▒ ▓█ ▀ +#▒▓█ ▄ ░ ▓██▄ ░██ █▌▒██░ ██▒▒█░ █ ░█▓██ ▀█ ██▒░ ▓██▄ ▒▓█ ▄ ▒██ ▀█▄ ▒██░ ▒███ +#▒▓▓▄ ▄██▒ ▒ ██▒░▓█▄ ▌▒██ ██░░█░ █ ░█▓██▒ ▐▌██▒ ▒ ██▒▒▓▓▄ ▄██▒░██▄▄▄▄██ ▒██░ ▒▓█ ▄ +#▒ ▓███▀ ░▒██████▒▒░▒████▓ ░ ████▓▒░░░██▒██▓▒██░ ▓██░▒██████▒▒▒ ▓███▀ ░ ▓█ ▓██▒░██████▒░▒████▒ +#░ ░▒ ▒ ░▒ ▒▓▒ ▒ ░ ▒▒▓ ▒ ░ ▒░▒░▒░ ░ ▓░▒ ▒ ░ ▒░ ▒ ▒ ▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░ ▒▒ ▓▒█░░ ▒░▓ ░░░ ▒░ ░ +# ░ ▒ ░ ░▒ ░ ░ ░ ▒ ▒ ░ ▒ ▒░ ▒ ░ ░ ░ ░░ ░ ▒░░ ░▒ ░ ░ ░ ▒ ▒ ▒▒ ░░ ░ ▒ ░ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ▒ ░ ░ ░ +#░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ +#░ ░ ░ +Description: + Author: J. Ramon +Info: downscaling of seasonal predictions from coarse to fine grids + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '0501' + hcst_start: '2000' + hcst_end: '2005' + ftime_min: 1 + ftime_max: 1 +#sometimes we want the region for the obs to be bigger than for hcst + Region: + latmin: 34.1 + latmax: 45.1 + lonmin: -12.5 + lonmax: 6.35 + Regrid: + method: bilinear + type: none + Workflow: + Calibration: + method: raw + Skill: + metric: BSS10 BSS90 Mean_Bias_SS RPSS ROCSS + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + # TO DO: add downscaling to point locations + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg' + int_method: conservative # optional, regridding method accepted by CDO + bc_method: simple_bias # optional, 'simple_bias', 'calibration', 'quantile_mapping' + lr_method: # optional, 'basic', 'large_scale', '4nn' + log_reg_method: # optional, 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # optional, nc file or grid accepted by CDO + nanalogs: # optional, number of analogs to be searched + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 93196b86d194fa2dbed8913d6c675f84d038ec9b..b2023ae65178a2e2083ddb62c51960a8020006a7 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -92,8 +92,17 @@ get_dir <- function(recipe, agg = "global") { fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) } } - + # Calibration or downscaling method calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + if ((calib.method == "raw") && + (!is.null(recipe$Analysis$Workflow$Downscaling))) { + if (recipe$Analysis$Workflow$Downscaling$type == "none") { + calib.method <- "raw" + } else { + calib.method <- recipe$Analysis$Workflow$Downscaling$type + } + } + # Frequency store.freq <- recipe$Analysis$Variables$freq ## TODO: Change "_country" switch(tolower(agg), diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a8664569047bb60185733e27a1250ec985cd481d..9e453bdcc8ee6337b2f821728b007cc43c8debcb 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -22,7 +22,7 @@ plot_data <- function(recipe, # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - outdir <- paste0(get_dir(recipe), "/plots/") + outdir <- paste0(get_dir(recipe), "plots/") dir.create(outdir, showWarnings = FALSE, recursive = TRUE) if ((is.null(skill_metrics)) && (is.null(data$fcst))) { @@ -47,7 +47,7 @@ plot_data <- function(recipe, plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, significance) } - + # Plot forecast ensemble mean if (!is.null(data$fcst)) { plot_ensemble_mean(recipe, archive, data$fcst, outdir) @@ -80,9 +80,10 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { stop("The element 'skill_metrics' must be a list of named arrays.") } - + latitude <- data_cube$lat longitude <- data_cube$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b8541488c8540e61c694c42a0f36be60595699a9..1f3b8b9b467e82af9215e8171212446e2d329cbb 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -1,25 +1,28 @@ source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") +source("modules/Downscaling/Downscaling.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") source("modules/Visualization/Visualization.R") -recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" recipe <- prepare_outputs(recipe_file) # Load datasets data <- load_datasets(recipe) +# Downscale datasets +data <- downscale_datasets(recipe, data) # Calibrate datasets -calibrated_data <- calibrate_datasets(recipe, data) +data <- calibrate_datasets(recipe, data) # Compute anomalies -calibrated_data <- compute_anomalies(recipe, calibrated_data) +# data <- compute_anomalies(recipe, data) # Compute skill metrics -skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +skill_metrics <- compute_skill_metrics(recipe, data) # Compute percentiles and probability bins -probabilities <- compute_probabilities(recipe, calibrated_data) +# probabilities <- compute_probabilities(recipe, data) # Export all data to netCDF -save_data(recipe, calibrated_data, skill_metrics, probabilities) +save_data(recipe, data, skill_metrics, probabilities) # Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, +plot_data(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/modules/test_seasonal_downscaling.R b/modules/test_seasonal_downscaling.R new file mode 100644 index 0000000000000000000000000000000000000000..5f375abe5a700ec70e9879dd6ffe84e30202a957 --- /dev/null +++ b/modules/test_seasonal_downscaling.R @@ -0,0 +1,111 @@ + +# DO source /esarchive/scratch/jramon/GitLab_jramon/auto-s2s/MODULES before opening the R session + +setwd("/esarchive/scratch/jramon/GitLab_jramon/auto-s2s/") +source("modules/Loading/Loading.R") +source("modules/Downscaling/Downscaling.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# use this development to run the analogs function. Also change the Interpolation.R script to +# not load CDORemap from s2dv. +library(easyNCDF) +library(ncdf4) +library(ClimProjDiags) +source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/Utils.R') +source('https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/CDORemap.R') + +# Loop all downscaling methods and types +#types <- c('int', 'intbc', 'intlr', 'analogs', 'logreg') +#int_methods <- c('con', 'bil', 'bic', 'nn', 'con2') +#bc_methods <- c('simple_bias', 'calibration', 'quantile_mapping') +#lr_methods <- c('basic', 'large_scale', '4nn') +#logreg_methods <- c('ens_mean', 'ens_mean_sd', 'sorted_members') + +# Read recipe +recipe_file <- "modules/Loading/testing_recipes/recipe_system5c3s-tas_downscaling.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- load_datasets(recipe) + +# Modify the recipe manually +recipe$Analysis$Workflow$Downscaling$type <- 'analogs' +recipe$Analysis$Workflow$Downscaling$int_method <- NULL # con, bil, bic, nn, con2 +recipe$Analysis$Workflow$Downscaling$bc_method <- NULL +recipe$Analysis$Workflow$Downscaling$lr_method <- NULL +recipe$Analysis$Workflow$Downscaling$log_reg_method <- NULL + +# Downscale datasets +downscaled_data <- downscale_datasets(recipe, data) + +# Compute skill +# NOTE: LogReg method does not have member dimension because it provides directly the +# predicted categories +# The following line should work only for Interpolation, Intbc, Intlr, Analogs +#stopifnot(recipe$Analysis$Workflow$Downscaling$type %in% c('int', 'intbc', 'intlr', 'analogs')) +#skill_metrics <- compute_skill_metrics(recipe, downscaled_data$hcst$exp, downscaled_data$hcst$obs) + +# Save downscaled data to later intercompare them +saveRDS(downscaled_data, file = paste0('/esarchive/scratch/jramon/downscaling/data/FOCUS/', + recipe$Analysis$Workflow$Downscaling$type, '-', + recipe$Analysis$Workflow$Downscaling$int_method, '-', + recipe$Analysis$Workflow$Downscaling$bc_method, '-', + recipe$Analysis$Workflow$Downscaling$lr_method, '-', + recipe$Analysis$Workflow$Downscaling$log_reg_method, '.RDS')) + +# Plot downscaled data +plot_data(recipe, data = NULL, downscaled_data = downscaled_data) + +# Plot skill metrics +my_data <- list() +my_data$hcst <- downscaled_data$hcst$exp +plot_data(recipe, data = my_data, skill_metrics = skill_metrics) + +# Plot Taylor diagram intercomparison +source('/esarchive/scratch/jramon/GitLab_jramon/nc2ts/taylor_diagram.R') +rds_files <- list.files(path = '/esarchive/scratch/jramon/downscaling/data/FOCUS', full.names = T) +down_data <- list() +methods <- c(list.files(path = '/esarchive/scratch/jramon/downscaling/data/FOCUS')) +for (nfile in seq(rds_files)) { + + # to plot all points in the same Taylor diagram + flag <- ifelse(nfile == 1, F, T) + + down_data[[nfile]] <- readRDS(rds_files[[nfile]]) + + #compute model ensemble mean + ens_mean <- Apply(down_data[[nfile]]$hcst$exp$data, 'ensemble', mean)$output1 + + # remove ensemble dimension in obs + obs <- Subset(down_data[[nfile]]$hcst$obs$data, along = 'ensemble', indices = 1, drop = 'selected') + + ens_mean <- .reorder_dims(arr_ref = obs, arr_to_reorder = ens_mean) + + plot <- taylor.diagram(ref = as.vector(obs), model = as.vector(ens_mean), normalize = TRUE, + add = flag, pch = nfile, sd.arcs = TRUE, maxsd = 1.5) + +} + +legend(1.3, 1.5, cex = 0.8, pt.cex = 1, legend = methods, pch = 1:length(rds_files), ncol = 1) + +################################## +# EXAMPLE FOR SEASONAL CALIBRATION +################################## +#data <- load_datasets(recipe) +# Calibrate datasets +#calibrated_data <- calibrate_datasets(recipe, data) +# Compute skill metrics +#skill_metrics <- compute_skill_metrics(recipe, calibrated_data$hcst, data$obs) +# Compute percentiles and probability bins +#probabilities <- compute_probabilities(recipe, calibrated_data$hcst) +# Export all data to netCDF +#save_data(recipe, data, calibrated_data, skill_metrics, probabilities) +# Plot data +#plot_data(recipe, data, calibrated_data, skill_metrics, probabilities, +# significance = T) + + + diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 964c44462ebd98295627315689ea9249903130b1..b2c62e44bd9f507dc29e773fd5b6944c0e8c54a3 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -1,8 +1,6 @@ check_recipe <- function(recipe) { - # recipe: yaml recipe already read it - ## TODO: Adapt to decadal case - + ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) # --------------------------------------------------------------------- @@ -222,7 +220,9 @@ check_recipe <- function(recipe) { # WORKFLOW CHECKS # --------------------------------------------------------------------- - # Only one Calibration method allowed: + # Calibration + # If 'method' is FALSE/no/'none' or NULL, set to 'raw' + ## TODO: Review this check if ((is.logical(recipe$Analysis$Workflow$Calibration$method) && recipe$Analysis$Workflow$Calibration$method == FALSE) || tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' || @@ -257,6 +257,107 @@ check_recipe <- function(recipe) { error_status <- T } } + # Downscaling + ## TODO: Simplify checks (reduce number of lines) + downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) + # Define accepted entries + DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") + BC_METHODS <- c("simple_bias", "calibration", "quantile_mapping") + LR_METHODS <- c("basic", "large_scale", "4nn") + LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") + # Check downscaling type + if ("type" %in% names(downscal_params)) { + if (length(downscal_params$type) == 0) { + downscal_params$type <- "none" + warn(recipe$Run$logger, + paste("Downscaling 'type' is empty in the recipe, setting it to", + "'none'.")) + } + if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { + error(recipe$Run$logger, + paste0("The type of Downscaling request in the recipe is not ", + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) + error_status <- T + } + if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && + (length(downscal_params$target_grid) == 0)) { + error(recipe$Run$logger, + paste("A target grid is required for the downscaling method", + "requested in the recipe.")) + error_status <- T + } + if (downscal_params$type == "int") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested, but no", + "interpolation method is provided in the recipe.")) + error_status <- T + } + } else if (downscal_params$type == "intbc") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'int' was requested in the recipe, but no", + "interpolation method is provided.")) + error_status <- T + } + if (length(downscal_params$bc_method)== 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intbc' was requested in the recipe, but", + "no bias correction method is provided.")) + error_status <- T + } else if (!(downscal_params$bc_method %in% BC_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted Bias Correction methods for the downscaling", + " module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "intlr") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (length(downscal_params$lr_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'intlr' was requested in the recipe, but", + "no linear regression method was provided.")) + error_status <- T + } else if (!(downscal_params$lr_method %in% LR_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted linear regression methods for the", + " downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T + } + } else if (downscal_params$type == "analogs") { + if (length(nanalogs) == 0) { + warn(recipe$Run$logger, + paste("Downscaling type is 'analogs, but the number of analogs", + "has not been provided in the recipe.")) + } + } else if (downscal_params$type == "logreg") { + if (length(downscal_params$int_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe, but", + "no interpolation method was provided.")) + error_status <- T + } + if (length(downscal_params$log_reg_method) == 0) { + error(recipe$Run$logger, + paste("Downscaling type 'logreg' was requested in the recipe,", + "but no logistic regression method is provided.")) + error_status <- T + } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { + error(recipe$Run$logger, + paste0("The accepted logistic regression methods for the ", + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T + } + } + } # Skill if (("Skill" %in% names(recipe$Analysis$Workflow)) && (is.null(recipe$Analysis$Workflow$Skill$metric))) { @@ -282,6 +383,7 @@ check_recipe <- function(recipe) { # RUN CHECKS # --------------------------------------------------------------------- + ## TODO: These checks should probably go first RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL")