diff --git a/R/Analogs.R b/R/Analogs.R new file mode 100644 index 0000000000000000000000000000000000000000..ccd3bf2368ba94553e55251648e42d7562cf8411 --- /dev/null +++ b/R/Analogs.R @@ -0,0 +1,455 @@ +#'@rdname CST_Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an 's2dv' object with named dimensions containing the experimental field on +#'the coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv' object with named dimensions containing the observational field. +#'The object must have, at least, the dimensions latitude, longitude and start date. +#'The object is expected to be already subset for the desired region. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file. +#'@param nanalogs an integer indicating the number of analogs to be searched +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') +#'@export +CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", region = NULL, return_indices = FALSE, + loocv_window = TRUE, ncores = 1) { + + # input exp and obs must be s2dv_cube objects + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + # input exp and obs must be s2dv_cube objects + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, grid_exp = grid_exp, + nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, time_dim = time_dim, region = region, return_indices = return_indices, + loocv_window = loocv_window, ncores = ncores) + + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) + +} + +#'@rdname Analogs +#'@title Downscaling using Analogs based on large scale fields. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function performs a downscaling using Analogs. To compute +#'the analogs given a coarse-scale field, the function looks for days with +#'similar conditions in the historical observations. The coarse scale and +#'observation data can be either global or regional. In the latter case, the +#'region is defined by the user. In principle, the coarse and observation data +#'should be of the same variable, although different variables can also be admitted. +#'The analogs function will find the N best analogs based in Minimum Euclidean +#'distance. +#' +#'The search of analogs must be done in the longest dataset posible, but might +#'require high-memory computational resources. This is important since it is +#'necessary to have a good representation of the possible states of the field in +#'the past, and therefore, to get better analogs. The function can also look for +#'analogs within a window of D days, but is the user who has to define that window. +#'Otherwise, the function will look for analogs in the whole dataset. This function +#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal +#'and decadal predictions) but can admit climate projections or reanalyses. It does +#'not have constrains of specific region or variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and time. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude, start date and time. The object +#'is expected to be already subset for the desired region. Optionally, 'obs' can have the +#'dimension 'window', containing the sampled fields into which the function will look for +#'the analogs. See function 'generate_window()'. Otherwise, the function will look for +#'analogs using all the possible fields contained in obs. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param grid_exp a character vector with a path to an example file of the exp data. +#'It can be either a path to another NetCDF file which to read the target grid from +#'(a single grid must be defined in such file) or a character vector indicating the +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file. +#'@param nanalogs an integer indicating the number of analogs to be searched. +#'@param fun_analog a function to be applied over the found analogs. Only these options +#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), +#'the function returns the found analogs. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''data' in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''data' in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the +#'element 'data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param return_indices a logical vector indicating whether to return the indices of the +#'analogs together with the downscaled fields. The indices refer to the position of the +#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will +#'refer to the position of the element in the dimension 'window'. Default to FALSE. +#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension +#''window'. It indicates whether to apply leave-one-out cross-validation in the creation +#'of the window. It is recommended to be set to TRUE. Default to TRUE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@import multiApply +#'@import CSTools +#'@importFrom s2dv InsertDim CDORemap +#'@importFrom FNN get.knnx +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL +#'(default), the output array in 'data' also contains the dimension 'analog' with the best +#'analog days. +#'@examples +#'exp <- rnorm(15000) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 30) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(27000) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 30) +#'obs_lons <- seq(0,6, 6/14) +#'obs_lats <- seq(0,6, 6/11) +#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, +#'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') +#'@export +Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, nanalogs = 3, + fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", region = NULL, return_indices = FALSE, loocv_window = TRUE, + ncores = 1) { + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(grid_exp, 'character')) { + stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + + if (!inherits(nanalogs, 'numeric')) { + stop("Parameter 'nanalogs' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(time_dim, 'character')) { + stop("Parameter 'time_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) { + stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'time_dim'") + } + + # Ensure we have enough data to interpolate from high-res to coarse grid + #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) | + # (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) { + + # stop("There are not enough data in 'obs'. Please to add more latitudes or ", + # "longitudes.") + #} + + # Select a function to apply to the analogs selected for a given observation + if (!is.null(fun_analog)) { + stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) + } + + # Correct indices later if cross-validation + loocv_correction <- FALSE + if ( !("window" %in% names(dim(obs))) & loocv_window) { + loocv_correction <- TRUE + } + + # Create window if user does not have it in obs + if ( !("window" %in% names(dim(obs))) ) { + obs <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) + } + + #----------------------------------- + # Interpolate high-res observations to the coarse grid + #----------------------------------- + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'exp_lats' and ", + "'exp_lons'.") + region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) + } + + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = grid_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + region = region) + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = grid_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", + region = region)$data + } else { + exp_interpolated <- exp + } + + #----------------------------------- + # Reshape train and test + #----------------------------------- + res.data <- Apply(list(obs_interpolated$data, exp_interpolated, obs), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog), ncores = ncores)$output1 + + # Return the indices of the best analogs + if (return_indices) { + res.ind <- Apply(list(obs_interpolated$data, exp_interpolated, obs), + target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), + c("window", lat_dim, lon_dim)), + fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, + fun_analog = fun_analog, return_indices = TRUE), ncores = ncores, output_dims = 'ind')$output1 + + # If cross-validation has been applied, correct the indices + if (loocv_correction) { + nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim] + ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim] + res.ind <- Apply(res.ind, target_dims = c("ind", sdate_dim), function(x) + sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), + output_dims = c("ind", sdate_dim))$output1 + } + res <- list(data = res.data, ind = res.ind, lon = obs_lons, lat = obs_lats) + } + else { + res <- list(data = res.data, lon = obs_lons, lat = obs_lats) + } + + # tests Jaume + #test <- exp_interpolated[1,1,1,1,1,,] + #train <- obs_interpolated$data[,1,1,1,,] + #obs_hres <- obs[,1,1,1,,1,] + #test <- aperm(test,c(2,1,3,4)) + #train <- aperm(train,c(1,4,2,3,5)) + #obs_hres <- aperm(obs_hres,c(1,4,2,3,5)) + + # tests Alba + #train <- obs_interpolated$data[,1,1,,,,] + #train <- InsertDim(train, posdim = 3, lendim = 1, name = "time") + #test <- exp_interpolated[1,1,,1,,,] + #test <- InsertDim(test, posdim = 2, lendim = 1, name = "time") + #obs_hres <- obs[,1,1,,,,] + #obs_hres <- InsertDim(obs_hres, posdim = 3, lendim = 1, name = "time") + + return(res) +} + +# For each element in test, find the indices of the k nearest neigbhors in train +.analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) + # test dim: 2 dimensions lat and lon (in this order) + # Number of lats/lons of the high-resolution data + space_dims_hres <- dim(obs_hres)[c(2,3)] + + # Reformat train and test as an array with (time, points) + train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space" + test <- as.vector(test) + obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" + + # Identify and remove NA's + idx_na_tr <- is.na(train[ , 1]) + idx_na_te <- is.na(test) + idx_na <- idx_na_tr | idx_na_te + tr_wo_na <- t(train[!idx_na , ]) + te_wo_na <- test[!idx_na] + te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time") + + knn.ind <- get.knnx(tr_wo_na, te_wo_na, k) + + dist <- knn.ind$nn.dist + idx <- knn.ind$nn.index + + # Either return only the indices or the analogs + if (return_indices) { + res <- as.numeric(idx) + } else { + res <- obs_hres[ , idx] + dim(res) <- c(space_dims_hres, analogs = k) + + if (!is.null(fun_analog)) { + if (fun_analog == "wmean") { + weight <- 1 / dist + res <- apply(res, c(1,2), function(x) weighted.mean(x, weight)) + } else { + res <- apply(res, c(1,2), fun_analog) + } + } + } + + return(res) +} + +# Add the dimension window to an array that contains, at least, the start date and time +# dimensions +# object has at least dimensions sdate and time +generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { + + rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim] + ntimes <- dim(obj)[names(dim(obj)) == time_dim] + rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] + + # Generate a window containing all the data + if (is.null(size)) { + + # Generate window removing one start date each time (leave-one-out cross-validation) + if (loocv) { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), + output_dims = c('window', 'sdate'))$output1 + # Generate window without cross-validation + } else { + obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = 'window')$output1 + } + } + # Generate window of the size specified by the user. Only applied with CV + else { + # For an accurate generation of the window, it is mandatory to add some "extra" data. + if (!("smonth" %in% names(dim(obj)))) { + stop("Missing 'smonth' dimension") + } + + # Concatenate data from previous, target and posterior months + obj_new <- Apply(obj, target_dims = list(c("time", "smonth")), + fun = as.vector, output_dims = "time")$output1 + + if (loocv) { + obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), + fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), + output_dims = 'window')$output1 + names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c("time", "sdate") + } else { + obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), + fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), + output_dims = c('window', 'time'))$output1 + + } + } + + return(obj_window) +} + + + + + + + + diff --git a/R/Intbc.R b/R/Intbc.R new file mode 100644 index 0000000000000000000000000000000000000000..e198730177cba01185af11db220091e5dd4f93e8 --- /dev/null +++ b/R/Intbc.R @@ -0,0 +1,316 @@ +#'@rdname CST_Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export + +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", + region = NULL, ncores = 1) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, source_file = exp$source_files[1], + method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, + sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores) + + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) +} + +#'@rdname Intbc +#'@title Downscaling using interpolation and bias adjustment. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a later bias +#'adjustment. It is recommended that the observations are passed already in the target grid. +#'Otherwise, the function will also perform an interpolation of the observed field into the +#'target grid. The coarse scale and observation data can be either global or regional. In the +#'latter case, the region is defined by the user. In principle, the coarse and observation data +#'are intended to be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param bc_method a character vector indicating the bias adjustment method to be applied after +#'the interpolation. Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The +#'abbreviations 'sbc', 'cal', 'qm' can also be used. +#'@param int_method a character vector indicating the regridding method to be passed to CDORemap. +#'Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is to be used, CDO_1.9.8 +#'or newer version is required. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_point_interp a character vector indicating the interpolation method to interpolate +#'model gridded data into the point locations. Accepted methods are "nearest", "bilinear", "9point", +#'"invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling is to a point location. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param region a numeric vector indicating the borders of the region defined in obs. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import CSTools +#' +#'@seealso \code{\link[CSTools]{BiasCorrection}} +#'@seealso \code{\link[CSTools]{Calibration}} +#'@seealso \code{\link[CSTools]{QuantileMapping}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'@export +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file = NULL, region = NULL, ncores = 1, ...) { + + if (!inherits(bc_method, 'character')) { + stop("Parameter 'bc_method' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp', or does not match the parameter 'member_dim'") + } + + if (!(bc_method %in% c('sbc', 'cal', 'qm', 'dbc', 'simple_bias', 'calibration', + 'quantile_mapping', 'dynamical_bias'))) { + stop("Parameter 'bc_method' must be a character vector indicating the bias adjustment method. ", + "Accepted methods are 'simple_bias', 'calibration', 'quantile_mapping'. The abbreviations ", + "'sbc', 'cal', 'qm' can also be used.") + } + + if (!is.null(points) & is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # Some functions only accept the dimension names "member" and "sdate" for exp and + # "sdate" for obs + if (member_dim != 'member') { + names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + which(names(dim(exp_interpolated$data)) == member_dim), 'member') + } + + if (sdate_dim != 'sdate') { + names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), + which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') + names(dim(obs_ref)) <- replace(names(dim(obs_ref)), + which(names(dim(obs_ref)) == sdate_dim), 'sdate') + } + + if (bc_method == 'sbc' | bc_method == 'simple_bias') { + if (dim(obs_ref)[sdate_dim] == 1) { + warning('Simple Bias Correction should not be used with only one observation. Returning NA.') + } + + res <- BiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) + } + else if (bc_method == 'cal' | bc_method == 'calibration') { + if (dim(exp_interpolated$data)[member_dim] == 1) { + stop('Calibration must not be used with only one ensemble member.') + } + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = 'member', + sdate_dim = 'sdate', ...) + } + else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { + # TO REMOVE ONCE CSTools includes the new changes + source('/esarchive/scratch/jramon/GitLab_jramon/cstools/R/CST_QuantileMapping.R') + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, sample_dims = sdate_dim, + na.rm = TRUE, ...) + } + else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # the temporal dimension must be only one dimension called "time" + if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { + exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), + fun = as.vector, output_dims = "time")$output1 + } + if (all(c(time_dim, sdate_dim) %in% names(dim(obs_ref)))) { + obs_ref <- Apply(obs_ref, target_dims = c(time_dim, sdate_dim), fun = as.vector, + output_dims = "time")$output1 + } + # REMEMBER to add na.rm = T in colMeans in .proxiesattractor + res <- DynBiasCorrection(exp = exp_interpolated$data, obs = obs_ref, ...) + } + + # Return a list of three elements + res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + + + + diff --git a/R/Interpolation.R b/R/Interpolation.R new file mode 100644 index 0000000000000000000000000000000000000000..d2d9373d51bde2df76d3464d51b6d9a042434f4b --- /dev/null +++ b/R/Interpolation.R @@ -0,0 +1,735 @@ +#'@rdname CST_Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp s2dv object containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The field data is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An s2dv object containing the dowscaled field. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, + lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + #if (is.null(exp[[lat_dim]]) | is.null(exp[[lon_dim]])) { + # stop("The name of the latitude/longitude elements in 'exp' must match the parametres ", + # "'lat_dim' and 'lon_dim'") + #} + + if ((length(which(names(dim(exp$data)) == lat_dim)) == 0) | (length(which(names(dim(exp$data)) == lon_dim)) == 0)) { + stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") + } + + res <- Interpolation(exp = exp$data, lats = exp$lat, lons = exp$lon, + source_file = exp$source_files[1], points = points, + method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, + lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) + + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) +} + +#'@rdname Interpolation +#'@title Regrid or interpolate gridded data to a point location. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int} +#' +#'@description This function interpolates gridded model data from one grid to +#'another (regrid) or interpolates gridded model data to a set of point locations. +#'The gridded model data can be either global or regional. In the latter case, the +#'region is defined by the user. It does not have constrains of specific region or +#'variables to downscale. +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude and longitude. The object is expected to be already subset +#'for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param lats a numeric vector containing the latitude values. Latitudes must range from +#'-90 to 90. +#'@param lons a numeric vector containing the longitude values. Longitudes can range from +#'-180 to 180 or from 0 to 360. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param source_file a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param method_remap a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param lat_dim a character vector indicating the latitude dimension name in the element +#''exp' and/or 'points'. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element +#''exp' and/or 'points'. Default set to "lon". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". Only needed if the downscaling +#'is to a point location. +#'@import multiApply +#'@import plyr +#'@importFrom s2dv CDORemap +#' +#'@seealso \code{\link[s2dverification]{CDORemap}} +#' +#'@return An list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#' +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) +#'lons <- 1:5 +#'lats <- 1:4 +#'res <- Interpolation(exp = exp, lats = lats, lons = lons, method_remap = 'conservative', target_grid = 'r1280x640') +#'@export +Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, method_remap = NULL, + target_grid = NULL, lat_dim = "lat", lon_dim = "lon", region = NULL, + method_point_interp = NULL) +{ + if (!is.null(method_remap)) { + if (!inherits(method_remap, 'character')) { + stop("Parameter 'method_remap' must be of the class 'character'") + } + } + + if (!is.null(method_point_interp)) { + if (!inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + } + + if (is.na(match(lon_dim, names(dim(exp))))) { + stop("Missing longitude dimension in 'exp', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp))))) { + stop("Missing latitude dimension in 'exp', or does not match the parameter 'lat_dim'") + } + + # Check for negative latitudes in the exp data + if (any(lats < -90 | lats > 90) ) { + stop("Out-of-range latitudes have been found. Latitudes must range from -90 to 90") + } + + # checkings for the case of interpolation to point locations + if (!is.null(points)) { + if (!inherits(points, 'list')) { + stop("Parameter 'points' must be a list of two elements containing the point ", + "latitudes and longitudes.") + } + + if (is.null(method_point_interp)) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + if (!(method_point_interp %in% c('nearest', 'bilinear', '9point', 'invdist4nn', 'NE', 'NW', 'SE', 'SW'))) { + stop("Parameter 'method_point_interp' must be a character vector indicating the ", + "interpolation method. Accepted methods are nearest, bilinear, 9point, ", + "invdist4nn, NE, NW, SE, SW") + } + + # Points must be a list of two elements + if (length(points) != 2) { + stop("'points' must be a lis of two elements containing the point ", + "latitudes and longitudes in the form 'points$lat', 'points$lon'") + } + + # The names of the two elements must be 'lat' and 'lon' + if (any(!(c(lat_dim, lon_dim) %in% names(points)))) { + stop("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'") + } + + # Check that the number of latitudes and longitudes match + if (length(unique(lengths(points))) != 1L) { + stop("The number of latitudes and longitudes must match") + } + + # Check for negative latitudes in the point coordinates + if (any(points[[lat_dim]] < -90 | points[[lat_dim]] > 90) ) { + stop("Out-of-range latitudes have been found in 'points'. Latitudes must range from ", + "-90 to 90") + } + + if (is.null(source_file)) { + stop("No source file found. Source file must be provided in the parameter 'source_file'.") + } + } else { + if (is.null(method_remap)) { + stop("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(target_grid)) { + stop("Parameter 'target_grid' can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must be ", + "defined in such file) or a character vector indicating the coarse grid to ", + "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.") + } + } + + #---------------------------------- + # Limits of the region defined by the model data + #---------------------------------- + # for the case when region limits are not passed by the user + # regions contains the following elements in order: lonmin, lonmax, latmin, latmax + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders of the ", + "downscaling region are defined by the first and last elements of the parametres 'lats' and 'lons'.") + region <- c(lons[1], lons[length(lons)], lats[1], lats[length(lats)]) + } + + # Ensure points to be within the region limits + if (!is.null(points)) { + if (any(points[[lat_dim]] > region[4]) | any(points[[lat_dim]] < region[3]) | + any(points[[lon_dim]] > region[2]) | any(points[[lon_dim]] < region[1])) { + stop("At least one of the points lies outside the model region") + } + } + + #---------------------------------- + # Map regrid with CDO + #---------------------------------- + if (is.null(points)) { + res <- s2dv::CDORemap(data_array = exp, + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) + + # Return a list + res <- list(data = res$data_array, lon = res$lons, lat = res$lats) + + #---------------------------------- + # Interpolate to point locations + #---------------------------------- + } else { + # First create interpolation weights, depending on the chosen method + weights <- create_interp_weights(ncfile = source_file, locids = 1:unique(lengths(points)), + lats = points[[lat_dim]], lons = points[[lon_dim]], + method = method_point_interp, region = list(lat_min = region[3], + lat_max = region[4], lon_min = region[1], lon_max = region[2])) + + # Select coarse-scale data to be interpolated + model_data_gridpoints <- get_model_data(weights.df = weights, mdata = exp) + + # Interpolate model data to point locations + res <- interpolate_data(model_data_gridpoints, weights) + + # Return a list + res <- list(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]]) + } + + return(res) +} + +#====================== +# Compute weights for interpolation at several (lat,lon) positions +# We assume that grid boxes are centered in the grid point. +#====================== +create_interp_weights <- function(ncfile, locids, lats, lons, region = NULL, + method = c("nearest", "bilinear", "9point", "invdist4nn", "NE", + "NW", "SE", "SW")) +{ + # crop the region to get the correct weights - save temporary file + nc_cropped1 <- paste0('tmp_cropped_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + nc_cropped2 <- paste0('tmp_cropped2_', format(Sys.time(), "%Y%m%d%H%M"),'.nc') + + system(paste0('cdo sellonlatbox,', region$lon_min, ',', region$lon_max, ',', region$lat_min, + ',', region$lat_max, ' ', ncfile, ' ', nc_cropped1)) + + #---------------- + # Read grid description and compute (i,j) of requested locations (including decimals) + #---------------- + griddes <- get_griddes(nc_cropped1) + + if (is.null(griddes$yinc)) { + system(paste0('rm ', nc_cropped1)) + stop("'griddes$yinc' not found in NetCDF file. Remember that only regular grids are accepted when ", + "downscaling to point locations.") + } + + # If latitudes are decreasingly ordered, revert them + if (griddes$yinc < 0) { + system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) + griddes <- get_griddes(nc_cropped2) + } + # remove temporary files + system(paste0('rm ', nc_cropped1)) + system(paste0('rm ', nc_cropped2)) + + if (is.null(griddes)) { + stop("'griddes' not found in the NetCDF source files") + } + + gridpoints <- latlon2ij(griddes, lats, lons) + + #---------------- + # Compute the weights according to the selected method + #---------------- + if(method == "nearest") { + #---------------- + # Round i and j to closest integer. Weight is always 1. + #---------------- + + # | | | + # -+-----+-----+- + # | x| | + # | a | | + # | | | + # -+-----+-----+- + # | | | + + centeri <- round(gridpoints$i,0) + centeri[centeri == griddes$xsize+1] <- 1 # close longitudes + + weights.df <- data.frame(locid = locids, + lat = lats, + lon = lons, + rawi = gridpoints$i, + rawj = gridpoints$j, + i = centeri, + j = round(gridpoints$j, 0), + weight = 1) + } else if (method %in% c("bilinear","invdist4nn")) { + #---------------- + # Get the (i,j) coordinates of the 4 points (a,b,c,d) around x. + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + lowi <- floor(gridpoints$i) + highi <- ceiling(gridpoints$i) + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- floor(gridpoints$j) + highj <- ceiling(gridpoints$j) + # Note: highi and lowi are the same if i is integer + # Note: highj and lowj are the same if j is integer + + #---------------- + # Get x position wrt ad and ab axes (from 0 to 1) + #---------------- + pcti <- gridpoints$i - lowi + pctj <- gridpoints$j - lowj + + #---------------- + # Compute weights for a,b,c,d grid points + #---------------- + if(method == "bilinear") { + wa = (1 - pcti) * (1 - pctj) + wb = (1 - pcti) * pctj + wc = pcti * pctj + wd = pcti * (1 - pctj) + } else if(method == "invdist4nn") { + #---------------- + # Note: the distance is computed in the (i,j) space. + # Note2: this method does not guarantees a continuous interpolation. + # Use bilinear if that's desirable. + # When x is on the ab line, c and d would be used. In the limit of x + # being just left of ab other points would be used. + # Here we just dropped c and d coeffs when over ab. Same for ad line, + # b and c coeffs dropped. This prevents repeated nodes. + #---------------- + ida = 1 / sqrt(pcti^2 + pctj^2) + idb = 1 / sqrt(pcti^2 + (1 - pctj)^2) + idc = 1 / sqrt((1-pcti)^2 + (1-pctj)^2) + idd = 1 / sqrt((1-pcti)^2 + pctj^2) + idb[pctj == 0] <- 0; + idc[pctj == 0] <- 0; + idc[pcti == 0] <- 0; + idd[pcti == 0] <- 0; + + #---------------- + # Normalize vector of inverse distances + #---------------- + invdist <- cbind(ida, idb, idc, idd) + print(invdist) + w <- t(apply(invdist, 1, function(x) { print(x); if(any(is.infinite(x))) { + x <- is.infinite(x) * 1 } ; x <- x/sum(x) })) + print(w) + + wa = w[ , 1] + wb = w[ , 2] + wc = w[ , 3] + wd = w[ , 4] + } + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats,lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = wa) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = wb) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = wc) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = wd) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df) + } else if(method == "9point") { + #---------------- + # Get the (i,j) coordinates of the 9 points (a,b,...,i) around x + # This plot shows i increasing to the right and + # j increasing to the top, but the computations are generic. + #---------------- + # | | | | + #-+-----+-----+-----+- + # | | | | + # | c | f | i | + # | | | | + #-+-----+-----+-----+- + # | | x| | + # | b | e | h | + # | | | | + #-+-----+-----+-----+- + # | | | | + # | a | d | g | + # | | | | + #-+-----+-----+-----+- + # | | | | + + centeri <- round(gridpoints$i, 0) + centeri[centeri == griddes$xsize + 1] <- 1 + centerj <- round(gridpoints$j, 0) + lowi <- centeri - 1 + highi <- centeri + 1 + lowi[lowi == 0] <- griddes$xsize # close the longitudes + highi[highi == griddes$xsize+1] <- 1 # close the longitudes + lowj <- centerj - 1 + highj <- centerj + 1 + + #---------------- + # For the north and south pole do a 6-point average + #---------------- + w_highj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,0 ,1/9)) + w_centerj <- ifelse(centerj == 1,1/6,ifelse(centerj == griddes$ysize,1/6,1/9)) + w_lowj <- ifelse(centerj == 1,0 ,ifelse(centerj == griddes$ysize,1/6,1/9)) + + #---------------- + # Put info in dataframes and rbind them + #---------------- + weightsa.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = lowj, weight = w_lowj) + weightsb.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = centerj, weight = w_centerj) + weightsc.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = lowi, j = highj, weight = w_highj) + weightsd.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = lowj, weight = w_lowj) + weightse.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = centerj, weight = w_centerj) + weightsf.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = centeri, j = highj, weight = w_highj) + weightsg.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = lowj, weight = w_lowj) + weightsh.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = centerj, weight = w_centerj) + weightsi.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = highi, j = highj, weight = w_highj) + weights.df <- rbind(weightsa.df, weightsb.df, weightsc.df, weightsd.df, weightse.df, + weightsf.df, weightsg.df, weightsh.df, weightsi.df) + } else if(method %in% c("NE", "NW", "SW", "SE")) { + #---------------- + # Find if increasing i and j increases or decreases lat and lon + #---------------- + westtoeast <- (griddes$xinc > 0) + southtonorth <- T + if(griddes$gridtype == "gaussian") { + # We assume gaussian grid latitudes are ordered north to south + southtonorth <- F + } else { #lonlat + if(griddes$yinc < 0) {southtonorth <- F} + } + + #---------------- + # Get the (i,j) coordinates of the desired point (a,b,c or d) around x + #---------------- + # | | | + #- +-----+-----+- + # | | | + # | b | c | + # | | | + #- +-----+-----+- + # | x| | + # | a | d | + # | | | + #- +-----+-----+- + # | | | + + if(substr(method,1,1) == "N" & southtonorth == T) { selj <- ceiling(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == T) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "N" & southtonorth == F) { selj <- floor(gridpoints$j) } + if(substr(method,1,1) == "S" & southtonorth == F) { selj <- ceiling(gridpoints$j) } + + if(substr(method,2,2) == "E" & westtoeast == T) {seli <- ceiling(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == T) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "E" & westtoeast == F) {seli <- floor(gridpoints$i) } + if(substr(method,2,2) == "W" & westtoeast == F) {seli <- ceil(gridpoints$i) } + + seli[seli == griddes$xsize + 1] <- 1 # close the longitudes + + weights.df <- data.frame(locid = locids, lat = lats, lon = lons, rawi = gridpoints$i, + rawj = gridpoints$j, i = seli, j = selj, weight = 1) + } else { + stop(paste0("Method " ,method, " not implemented")) + } + + #---------------- + # Order by locid and remove lines with 0 weight + # This also removes some duplicates in the bilinear/invdist4nn methods when i + # or j is a whole number, or in the 9-point method when at the poles. + #---------------- + weights.df <- weights.df[order(weights.df$locid), ] + weights.df <- weights.df[weights.df$weight != 0, ] + + #---------------- + # Add as attributes the method and the nc file used to compute the weights + #---------------- + attributes(weights.df)$nc_file <- normalizePath(ncfile) + attributes(weights.df)$method <- method + + return(weights.df) +} + +#====================== +# Compute (i,j) from (lat,lon). +# Works only for 'lonlat' and 'gaussian' grids. +# Grids are supposed to cover whole globe. +#====================== +latlon2ij <- function(griddes, lats, lons) { + #------------ + # Check input params + #------------ + if(length(lons) != length(lats)) {stop("Input lat and lon have different lengths.")} + if(any(lats < -90) | any(lats > 90)) {stop("Latitude out of valid range")} + if((griddes$xfirst > 180) & (any(lons < 0))) { + stop("Please use the same convention for the longitudes in the source file and the ", + "longitude values in 'points'.") + } + #if(round(griddes$xinc*griddes$xsize) != 360) {stop("Grid is not global")} + # no need to resize lons to [0,360) + + #------------ + # Compute i (with decimals) + # i lies in [1,xsize+1) + # %% gives the remainder + #------------ + gridpoints <- list() + gridpoints$i <- 1 + (((lons - griddes$xfirst) / griddes$xinc) %% griddes$xsize) + + #------------ + # Compute j (with decimals) + #------------ + if(griddes$gridtype=='lonlat') { + gridpoints$j <- 1 + (lats - griddes$yfirst) / griddes$yinc + } else if(griddes$gridtype == 'gaussian') { + # We assume gaussian grid latitudes are ordered north to south + # findInterval can only work with monotonic ascending values so we revert twice + northj <- griddes$ysize-findInterval(lats, -griddes$yvals) + southj <- northj + 1 + + # Special case: We are north of the first lat + gridpoints$j[northj == 0] <- 1 + + # Special case: We are south of the last lat + gridpoints$j[southj == griddes$ysize + 1] <- griddes$ysize + + # Generic case + ok_idx <- !(northj == 0 | southj == griddes$ysize+1) + gridpoints$j[ok_idx] <- northj[ok_idx] + (griddes$yvals[northj[ok_idx]] - + lats[ok_idx])/(griddes$yvals[northj[ok_idx]] - griddes$yvals[southj[ok_idx]]) + } else { stop("Unsupported grid") } + + return(gridpoints) +} + +#====================== +# Use cdo griddes to obtain grid information +#====================== +get_griddes <- function(ncfile) { + tmp <- system(paste0("cdo griddes ", ncfile, + " 2>/dev/null | egrep 'gridtype|xsize|ysize|xfirst|xinc|yfirst|yinc'"), intern = T) + arr <- do.call(rbind, strsplit(tmp,"\\s+= ", perl = T)) + griddes <- as.list(arr[,2]) + names(griddes) <- arr[,1] + + if(griddes$gridtype == "gaussian") { + griddes$yvals <- get_lats(ncfile) + } + + # Convert some fields to numeric. Ensures all fields are present. + for(nm in c("xsize", "ysize", "xfirst", "yfirst", "xinc", "yinc")) { + griddes[[nm]] <- ifelse(is.null(griddes[[nm]]), NA, as.numeric(griddes[[nm]])) + } + + return(griddes) +} + +#====================== +# Use nco to obtain latitudes. Latitudes shall be named "lat" or "latitude". +#====================== +get_lats <- function(ncfile) { + + tmp <- system(paste0('ncks -H -s "%f " -v latitude ',ncfile),intern=T) + + if(!is.null(attributes(tmp)$status)) { + tmp <- system(paste0('ncks -H -s "%f " -v lat ',ncfile),intern=T) + } + + lats <- as.numeric(strsplit(tmp[1],"\\s+",perl=T)[[1]]) + + return(lats) +} + +#====================== +# Load model data at all (i,j) pairs listed in the weights dataframe. +# Uses StartR. All ... parameters go to Start (i.e. specify dat, var, +# sdate, time, ensemble, num_procs, etc) +#====================== +get_model_data <- function(weights.df, mdata) { + + #----------------- + # Get data for all combinations of i and j. + # (inefficient, getting many unneded pairs). + # Avoid retrieving duplicates with unique() + # These are the indices of the global grid + #----------------- + is <- weights.df$i + js <- weights.df$j + + #----------------- + # Get indices of original is and js in unique(is),unique(js) that were requested + #----------------- + idxi <- match(is, unique(is)) + idxj <- match(js, unique(js)) + + #----------------- + # Subsample mdata to keep only the needed (i,j) pairs. + #----------------- + if (is.na(match("longitude", names(dim(mdata))))) { + londim <- match("lon", names(dim(mdata))) + } else { + londim <- match("longitude", names(dim(mdata))) + } + if (is.na(match("latitude", names(dim(mdata))))) { + latdim <- match("lat", names(dim(mdata))) + } else { + latdim <- match("latitude", names(dim(mdata))) + } + + # trick: exchange idxi and idxj + #if(londim > latdim) { idx.tmp <- idxi; idxi <- idxj; idxj <- idx.tmp } + #keepdims <- (1:length(dim(mdata)))[-c(londim,latdim)] + + #sub_mdata <- apply(mdata, keepdims, function(x) { + # laply(1:length(is),function(k) { x[idxi[k],idxj[k]] }) }) + #names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Retrieve with multiApply + #----------------- + sub_mdata <- Apply(mdata, target_dims = list(c(latdim, londim)), fun = function(x) {laply(1:length(is),function(k) { x[js[k],is[k]] }) })$output1 + names(dim(sub_mdata))[1] <- "gridpoint" + + #----------------- + # Return an array that contains as many gridpoints as (i,j) pairs were requested + #----------------- + return(sub_mdata) +} + +#====================== +# Multiply the grid-point series by the weights, +# to obtain the desired interpolations +#====================== +interpolate_data <- function(model_data, weights.df) { + #----------------- + # Multiply each gridpoint matrix by its corresponding weight + #----------------- + gpdim <- match("gridpoint", names(dim(model_data))) + weighted_data <- sweep(model_data, gpdim, weights.df$weight, "*") + + #----------------- + # Sum all series that belong to same interpolation point + # Return an array that contains the requested locations and interpolation type + #----------------- + #interp_data <- apply(weighted_data, -gpdim, function(x) { rowsum(x, weights.df$locid) }) + #names(dim(interp_data))[1] <- "location" + interp_data <- Apply(weighted_data, target_dims = gpdim, fun = function(x) { + rowsum(x, weights.df$locid)}, output_dims = c("location", "aux"))$output1 + + return(interp_data) +} + + diff --git a/R/Intlr.R b/R/Intlr.R new file mode 100644 index 0000000000000000000000000000000000000000..0da52229a56f71cf81f04d7572229c07468b9233 --- /dev/null +++ b/R/Intlr.R @@ -0,0 +1,502 @@ +#'@rdname CST_Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an 's2dv object' containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude, start date and member. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an 's2dv object' containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, + method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", large_scale_predictor_dimname = 'vars', + loocv = FALSE, region = NULL, ncores = 1) { + + if (!inherits(exp,'s2dv_cube')) { + stop("Parameter 'exp' must be of the class 's2dv_cube'") + } + + if (!inherits(obs,'s2dv_cube')) { + stop("Parameter 'obs' must be of the class 's2dv_cube'") + } + + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, + obs_lats = obs$lat, obs_lons = obs$lon, points = points, source_file_exp = exp$source_files[1], + source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, + int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, + lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, + region = region, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, + ncores = ncores) + + #----------------------------------- + # Create an s2dv_cube object + #----------------------------------- + res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + + return(res_s2dv) +} + +#'@rdname Intlr +#'@title Downscaling using interpolation and linear regression. +#' +#'@author J. Ramon, \email{jaume.ramon@bsc.es} +#' +#'@description This function performs a downscaling using an interpolation and a linear +#'regression. Different methodologies that employ linear regressions are available. See +#'parameter 'lr_method' for more information. It is recommended that the observations +#'are passed already in the target grid. Otherwise, the function will also perform an +#'interpolation of the observed field into the target grid. The coarse scale and +#'observation data can be either global or regional. In the latter case, the region is +#'defined by the user. In principle, the coarse and observation data are intended to +#'be of the same variable, although different variables can also be admitted. +#' +#'@param exp an array with named dimensions containing the experimental field on the +#'coarse scale for which the downscaling is aimed. The object must have, at least, +#'the dimensions latitude, longitude and start date. The object is expected to be +#'already subset for the desired region. Data can be in one or two integrated regions, e.g., +#'crossing the Greenwich meridian. To get the correct results in the latter case, +#'the borders of the region should be specified in the parameter 'region'. See parameter +#''region'. +#'@param obs an array with named dimensions containing the observational field. The object +#'must have, at least, the dimensions latitude, longitude and start date. The object is +#'expected to be already subset for the desired region. +#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must +#'range from -90 to 90. +#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must +#'range from -90 to 90. +#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes +#'can range from -180 to 180 or from 0 to 360. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param target_grid a character vector indicating the target grid to be passed to CDO. +#'It must be a grid recognised by CDO or a NetCDF file. +#'@param points a list of two elements containing the point latitudes and longitudes +#'of the locations to downscale the model data. The list must contain the two elements +#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is +#'to a point location, only regular grids are allowed for exp and obs. Only needed if the +#'downscaling is to a point location. +#'@param int_method a character vector indicating the regridding method to be passed +#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is +#'to be used, CDO_1.9.8 or newer version is required. +#'@param method_point_interp a character vector indicating the interpolation method to +#'interpolate model gridded data into the point locations. Accepted methods are "nearest", +#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW". +#'@param source_file_exp a character vector with a path to an example file of the exp data. +#'Only needed if the downscaling is to a point location. +#'@param source_file_obs a character vector with a path to an example file of the obs data. +#'Only needed if the downscaling is to a point location. +#'@param predictors an array with large-scale data to be used in the 'large-scale' method. +#'Only needed if the linear regression method is set to 'large-scale'. It must have, at +#'least the dimension start date and another dimension whose name has to be specified in +#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the +#'number of large-scale predictors. +#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' +#'in exp and obs. Default set to "lat". +#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' +#'in exp and obs. Default set to "lon". +#'@param sdate_dim a character vector indicating the start date dimension name in the element +#''data' in exp and obs. Default set to "sdate". +#'@param time_dim a character vector indicating the time dimension name in the element +#''data' in exp and obs. Default set to "time". +#'@param region a numeric vector indicating the borders of the region defined in exp. +#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers +#'to the left border, while lonmax refers to the right border. latmin indicates the lower +#'border, whereas latmax indicates the upper border. If set to NULL (default), the function +#'takes the first and last elements of the latitudes and longitudes. +#'@param large_scale_predictor_dimname a character vector indicating the name of the +#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'generating the linear regressions. Default to FALSE. +#'@param ncores an integer indicating the number of cores to use in parallel computation. +#' +#'@import multiApply +#' +#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the +#'downscaled latitudes, and 'lon' the downscaled longitudes. +#'@examples +#'exp <- rnorm(500) +#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) +#'exp_lons <- 1:5 +#'exp_lats <- 1:4 +#'obs <- rnorm(900) +#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) +#'obs_lons <- seq(1,5, 4/14) +#'obs_lats <- seq(1,4, 3/11) +#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, +#'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') +#'@export +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(lr_method, 'character')) { + stop("Parameter 'lr_method' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(large_scale_predictor_dimname, 'character')) { + stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (!is.null(points) & (is.null(source_file_exp) | is.null(source_file_obs))) { + stop("No source files found. Source files for exp and obs must be provided in the parameters ", + "'source_file_exp' and 'source_file_obs', respectively.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + # sdate must be the time dimension in the input data + stopifnot(sdate_dim %in% names(dim(exp))) + stopifnot(sdate_dim %in% names(dim(obs))) + + # checkings for the parametre 'predictors' + if (!is.null(predictors)) { + if (!is.array(predictors)) { + stop("Parameter 'predictors' must be of the class 'array'") + } else { + # ensure the predictor variable name matches the parametre large_scale_predictor_dimname + stopifnot(large_scale_predictor_dimname %in% names(dim(predictors))) + stopifnot(sdate_dim %in% names(dim(predictors))) + stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) + } + } + + #----------------------------------- + # Interpolation + #----------------------------------- + if (lr_method != '4nn') { + + if (is.null(int_method)) { + stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", + "Accepted methods are con, bil, bic, nn, con2") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region) + + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + obs_interpolated <- obs_interpolated$data + } else { + obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons + } + } + + #----------------------------------- + # Linear regressions + #----------------------------------- + # Pointwise linear regression + # Predictor: model data + # Predictand: observations + if (lr_method == 'basic') { + predictor <- exp_interpolated$data + predictand <- obs_interpolated + + target_dims_predictor <- sdate_dim + target_dims_predictand <- sdate_dim + } + + # (Multi) linear regression with large-scale predictors + # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations + # Predictand: model data + else if (lr_method == 'large-scale') { + if (is.null(predictors)) { + stop("The large-scale predictors must be passed through the parametre 'predictors'") + } + + predictand <- obs_interpolated + predictor <- predictors + + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) + target_dims_predictand <- sdate_dim + } + + # Multi-linear regression with the four nearest neighbours + # Predictors: model data + # Predictand: observations + else if (lr_method == '4nn') { + predictor <- find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4) + + if (is.null(points)) { + if (!is.null(target_grid)) { + warning("Interpolating to the 'obs' grid") + } + predictand <- obs + + lats <- obs_lats + lons <- obs_lons + } + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + # interpolate to point locations + else { + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, region = region) + + lats <- predictor$lat + lons <- predictor$lon + predictor <- predictor$data + predictand <- predictand$data + } + + target_dims_predictor <- c(sdate_dim,'nn') + target_dims_predictand <- sdate_dim + } + + else { + stop(paste0(lr_method, " method is not implemented yet")) + } + + # Apply the linear regressions + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + + names(dim(res))[1] <- sdate_dim + + # Return a list of three elements + res <- list(data = res, lon = lons, lat = lats) + + return(res) +} + +#----------------------------------- +# Atomic function to generate and apply the linear regressions +#----------------------------------- +.intlr <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- train_lm(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + } + + return(res) + +} + +#----------------------------------- +# Function to generate the linear regressions. +# Returns a list +#----------------------------------- +train_lm <- function(df, loocv) { + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + lm1 <- lapply(1:nrow(df), function(j) lm(df[-j,], formula = y ~ .)) + + } else { + + lm1 <- list(lm(data = df, formula = y ~ .)) + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the linear regressions. +#----------------------------------- +pred_lm <- function(df, lm1, loocv) { + + if (loocv) { + + pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,])) + + } else { + + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + } + + return(pred_vals) +} + +#----------------------------------- +# Function to find N nearest neighbours. +# 'coar' is an array with named dimensions +#----------------------------------- +find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4) { + + # Sort the distances from closest to furthest + idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) + idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) + + names(dim(idx_lat)) <- c('nn', lat_dim) + names(dim(idx_lon)) <- c('nn', lon_dim) + + # obtain the values of the nearest neighbours + nearest <- Apply(list(coar, idx_lat, idx_lon), + target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), + fun = function(x, y, z) x[y, z])$output1 + + return(nearest) +} + + diff --git a/R/LogisticReg.R b/R/LogisticReg.R new file mode 100644 index 0000000000000000000000000000000000000000..8226b23e60738c610a5af438b1d2d43283eb9b85 --- /dev/null +++ b/R/LogisticReg.R @@ -0,0 +1,195 @@ + +LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", source_file = NULL, region = NULL, + loocv = FALSE, ncores = 1) { + + #----------------------------------- + # Checkings + #----------------------------------- + if (!inherits(target_grid, 'character')) { + stop("Parameter 'target_grid' must be of the class 'character'") + } + + if (!is.null(int_method) & !inherits(int_method, 'character')) { + stop("Parameter 'int_method' must be of the class 'character'") + } + + if (!is.null(method_point_interp) & !inherits(method_point_interp, 'character')) { + stop("Parameter 'method_point_interp' must be of the class 'character'") + } + + if (!inherits(lat_dim, 'character')) { + stop("Parameter 'lat_dim' must be of the class 'character'") + } + + if (!inherits(lon_dim, 'character')) { + stop("Parameter 'lon_dim' must be of the class 'character'") + } + + if (!inherits(sdate_dim, 'character')) { + stop("Parameter 'sdate_dim' must be of the class 'character'") + } + + if (!inherits(member_dim, 'character')) { + stop("Parameter 'member_dim' must be of the class 'character'") + } + + if (!is.null(source_file) & !inherits(source_file, 'character')) { + stop("Parameter 'source_file' must be of the class 'character'") + } + + if (!inherits(loocv, 'logical')) { + stop("Parameter 'loocv' must be set to TRUE or FALSE") + } + + if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) { + stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) { + stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) { + stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp))))) { + stop("Missing member dimension in 'exp' and/or 'obs', or does not match the parameter ", + "'member_dim'") + } + + if (!is.null(points) & (is.null(source_file))) { + stop("No source files found. One source file for exp must be provided in the parameter 'source_file'.") + } + + if (!is.null(points) & is.null(method_point_interp)) { + stop("Please provide the interpolation method to interpolate gridded data to point locations ", + "through the parameter 'method_point_interp'.") + } + + if (is.null(region)) { + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") + region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) + } + + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + region = region) + + # compute ensemble mean anomalies + exp_anom <- get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim) + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !is.null(points)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + + # convert observations to categorical predictands + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + terc <- convert2prob(as.vector(x), prob = c(1:2/3)) + apply(terc, 1, function(r) which (r == 1))}, + output_dims = sdate_dim)$output1 + + res <- Apply(list(exp_anom, obs_cat), target_dims = sdate_dim, fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv), output_dims = sdate_dim)$output1 + + res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + + return(res) +} + + +get_ens_mean_anom <- function(obj_ens, member_dim, sdate_dim) { + + # compute climatology + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean)$output1 + + # compute ensemble mean + ens_mean <- Apply(obj_ens, target_dims = member_dim, mean)$output1 + + # compute ensemble mean anomalies + anom <- Apply(list(ens_mean, clim), margins = list(sdate_dim, NULL), fun = function(x,y) x - y)$output1 + + return(anom) +} + +# atomic functions for logistic regressions +.log_reg <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) + + # if the data is all NA, force return return NA + if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { + + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { + # training + lm1 <- train_lr(df = tmp_df, loocv = loocv) + + # prediction + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv) + } + + return(res) +} + +#----------------------------------- +# Function to train the logistic regressions. +#----------------------------------- +train_lr <- function(df, loocv) { + + require(nnet) + + # Remove columns containing only NA's + df <- df[ , apply(df, 2, function(x) !all(is.na(x)))] + + if (loocv) { + + #lm1 <- lapply(1:nrow(df), function(j) glm(df[-j,], formula = y ~ ., family = "binomial")) + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[-j,])) + } else { + + #lm1 <- list(glm(data = df, formula = y ~ ., family = "binomial")) + lm1 <- list(multinom(y ~ ., data = df)) + } + + return(lm1) +} + +#----------------------------------- +# Function to apply the logistic regressions. +#----------------------------------- +pred_lr <- function(df, lm1, loocv) { + + if (loocv) { + + pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,], type = "class")) + + } else { + + pred_vals_ls <- lapply(lm1, predict, data = df, type = "class") + pred_vals <- unlist(pred_vals_ls) + } + + return(pred_vals) +} + + diff --git a/R/Utils.R b/R/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..4c7274652f1876cc84777174f7736258895c6fa6 --- /dev/null +++ b/R/Utils.R @@ -0,0 +1,31 @@ +.check_coords <- function(lat1, lon1, lat2, lon2) { + match <- TRUE + if (!((length(lat1) == length(lat2)) & (length(lon1) == length(lon2)))) { + match <- FALSE + } + return(match) +} + +# reorder dims to a reference array. If they do not exist, they are created +# example +#arr_ref <- array(NA, c(dataset = 1, sdate = 8, member = 3, ftime = 1, lon = 269, lat = 181)) +#arr_to_reorder <- array(NA, c(dataset = 1, member = 3, sdate = 8, lat = 181, lon = 269, pp = 1)) + +.reorder_dims <- function(arr_ref, arr_to_reorder) { + + miss_dims <- names(dim(arr_ref))[!(names(dim(arr_ref)) %in% names(dim(arr_to_reorder)))] + + if (length(miss_dims) != 0) { + for (m in seq(miss_dims)) { + arr_to_reorder <- InsertDim(data = arr_to_reorder, posdim = length(dim(arr_to_reorder)) + 1, lendim = 1, + name = miss_dims[m]) + } + } + + # TODO: add code to reorder dimensions and put the non-common dimensions at the end + + orddim <- match(names(dim(arr_ref)),names(dim(arr_to_reorder))) + return(Reorder(data = arr_to_reorder, order = orddim)) +} + + diff --git a/examples/analogs.R b/examples/analogs.R new file mode 100644 index 0000000000000000000000000000000000000000..7886cffd23e3f17916616637f61ce9e3b1e31439 --- /dev/null +++ b/examples/analogs.R @@ -0,0 +1,172 @@ + +library(CSTools) +library(startR) +library(s2dv) +library(lubridate) +library(multiApply) +library(FNN) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' +#target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' + +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 +extra <- 1 + +#sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') +#sdates <- format(seq(ymd("20000101"), ymd("20071201"), '1 month'), "%Y%m%d") +sdates_exp <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m%d") +sdates_obs <- format(ymd("20000401") + months(0:2) + rep(years(0:2), each=3),"%Y%m") +#sdates_obs <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m") + +#--------------------------- +# Observations +#--------------------------- +# dim(obs) <- c('sdate', 'smonth', ...) +obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc', + var = 'tas', time = indices(1:28), latitude = values(list(latmin - extra, latmax + extra)), + sdate = sdates_obs, latitude_reorder = Sort(decreasing = FALSE), + longitude = values(list(lonmin - extra, lonmax + extra)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), return_vars = list(latitude = 'dat', longitude = 'dat'), + num_procs = 1, retrieve = TRUE) + +# Create 's2dv_cube' object +obs2 <- s2dv_cube(obs1, lat = attr(obs1, "Variables")$dat1$lat, lon = attr(obs1, "Variables")$dat1$lon, + source_files = attr(obs1, "Files")[1,1,]) + +# Create window to look for analogues +#dim(obs2$data) <- c(data = 1, var = 1, time = 28, lat = 39, smonth = 3, sdate = 5, lon = 67) # correct +#obs_new <- Apply(obs2$data, target_dims = list(c("time", "smonth")), fun = as.vector, output_dims = "time")$output1 +#wlen <- 7 + +#result <- array(NA, dim = c(window = 2*wlen + 1, time = 28, lat = 39, sdate = 7, lon = 67)) +#for (i in 1:28) { +# result[,i,,,] <- obs_new[(28 + i - wlen):(28 + i + wlen),1,1,,,] +#} +#obs2$data <- result + +# > dim(obs$data) +# data var time lat smonth sdate lon +# 1 1 28 39 3 7 67 +# > dim(result) +# window time lat sdate lon +# 15 28 39 7 67 + +#--------------------------- +# Model +#--------------------------- +exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1:28), member = indices(1:3), sdate = sdates_exp, + latitude = values(list(latmin, latmax)), latitude_reorder = Sort(decreasing = FALSE), + longitude = values(list(lonmin, lonmax)), longitude_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(latitude = 'dat', longitude = 'dat'), + num_procs = 1, retrieve = TRUE) + +# Create 's2dv_cube' object +exp2 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, + source_files = attr(exp1, "Files")[1,1,]) + +#--------------------------- +# Analogs with no 'window' dimension in the initial object and with cv +#--------------------------- +ana_1 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, ncores = 4) + +#--------------------------- +# Analogs with no 'window' dimension in the initial object and without cv +#--------------------------- +ana_2 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = FALSE, ncores = 4) + +#--------------------------- +# Analogs with 'window' dimension in the initial object and with cv +#--------------------------- +dim(obs1) <- c(data = 1, var = 1, time = 28, latitude = 39, smonth = 3, sdate = 3, longitude = 67) +obs_window <- generate_window(obj = obs1, sdate_dim = 'sdate', time_dim = 'time', size = 7, loocv = TRUE) +ana_3 <- Analogs(exp = exp1, obs = obs_window, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', ncores = 4) + +#--------------------------- +# Analogs with cv and returning the indices +#--------------------------- +ana_4 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, + return_indices = TRUE, ncores = 4) + +#--------------------------- +# Analogs with 'window' dimension and returning the indices +#--------------------------- +ana_5 <- Analogs(exp = exp1, obs = obs_window, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', return_indices = TRUE, ncores = 4) + +#--------------------------- +# Analogs with NA's in obs +#--------------------------- +obs1[,,,10:20,,] <- NA +ana_mean_2 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, source_file_exp = attr(exp1, "Files")[1,1,1], + fun_analog = 'mean', ncores = 4,lat_dim='latitude',lon_dim='longitude') + + +#--------------------------- +# Analogs with CST_Analogs() +#--------------------------- +ana_mean <- CST_Analogs(exp = exp2, obs = obs2, fun_analog = "mean", ncores = 4) + +# test examples +ana_mean <- Analogs(exp = exp, obs = obs, fun_analog = "mean", ncores = 4) +ana_wmean <- Analogs(exp = exp, obs = obs, fun_analog = "wmean", ncores = 4) +ana_max <- Analogs(exp = exp, obs = obs, fun_analog = "max", ncores = 4) +ana_min <- Analogs(exp = exp, obs = obs, fun_analog = "min", ncores = 4) +ana_median <- Analogs(exp = exp, obs = obs, fun_analog = "median", ncores = 4) + +# plot examples +s2dv::PlotEquiMap(ana_mean$data[,,1,1,1,1,1], lat = ana_mean$lat, lon = ana_mean$lon, filled.continents = F, + toptitle = 'Analogs mean', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_mean_2.png")) +s2dv::PlotEquiMap(ana_wmean$data[1,,,1,1,1,1], lat = ana_wmean$lat, lon = ana_wmean$lon, filled.continents = F, + toptitle = 'Analogs weighted mean', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_wmean.png")) +s2dv::PlotEquiMap(ana_max$data[1,,,1,1,1,1], lat = ana_max$lat, lon = ana_max$lon, filled.continents = F, + toptitle = 'Analogs max', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_max.png")) +s2dv::PlotEquiMap(ana_median$data[1,,,1,1,1,1], lat = ana_max$lat, lon = ana_max$lon, filled.continents = F, + toptitle = 'Analogs median', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_median.png")) +s2dv::PlotEquiMap(ana_min$data[1,,,1,1,1,1], lat = ana_max$lat, lon = ana_max$lon, filled.continents = F, + toptitle = 'Analogs min', brks = seq(275,288,1), height = 8, width = 10, + fileout = file.path(plotpath, "ip_ana_min.png")) + + + + + + + + + + diff --git a/examples/interpolation-bc.R b/examples/interpolation-bc.R new file mode 100644 index 0000000000000000000000000000000000000000..ad074b64c92e7d84f7c2830b1f59e3111859edc0 --- /dev/null +++ b/examples/interpolation-bc.R @@ -0,0 +1,108 @@ + +library(startR) +library(s2dv) +library(CSTools) +library(lubridate) +library(multiApply) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' +target_grid <- '/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc' + +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 + +#sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') +sdates <- format(ymd("20000201") + rep(years(0:20), each=1),"%Y%m%d") +sdates <- format(seq(ymd("20000201"), ymd("20200201"), "1 month"), "%Y%m%d") + +obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', + var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), + lat_reorder = Sort(decreasing = FALSE), lon = values(list(lonmin, lonmax)), + lon_reorder = CircularSort(-180, 180), sdate = format(ymd(sdates), "%Y%m"), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +#------------------------------------ +# Downscaling with Intbc +#------------------------------------ +down_1_sbc <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'simple_bias', ncores = 4) + +down_1_cal <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'calibration', ncores = 4) + +down_1_qm <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'quantile_mapping', ncores = 4) + +down_1_dbc <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'dynamical_bias', quanti = 0.6, ncores = 4) +#Error in (function (logdista, quanti) : +# Parameter 'quanti' is too high for the length of the data provided. + +#------------------------------------ +# Transform exp and obs into s2dv_objects +#------------------------------------ +obs <- s2dv_cube(obs, lat = attr(obs, "Variables")$dat1$lat, lon = attr(obs, "Variables")$dat1$lon, + source_files = attr(obs, "Files")[1,1,]) +attr(obs$lon,"first_lon") <- obs$lon[1] +attr(obs$lon,"last_lon") <- obs$lon[length(obs$lon)] +attr(obs$lat,"first_lat") <- obs$lat[1] +attr(obs$lat,"last_lat") <- obs$lat[length(obs$lat)] + +exp <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, + source_files = attr(exp, "Files")[1,1,]) +attr(exp$lon,"first_lon") <- exp$lon[1] +attr(exp$lon,"last_lon") <- exp$lon[length(exp$lon)] +attr(exp$lat,"first_lat") <- exp$lat[1] +attr(exp$lat,"last_lat") <- exp$lat[length(exp$lat)] + +#------------------------------------ +# Downscaling with CST_Intbc +#------------------------------------ +down_2_sbc <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', + bc_method = 'simple_bias', ncores = 4) + +down_2_cal <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', + bc_method = 'calibration', ncores = 4) + +down_2_qm <- CST_Intbc(exp = exp, obs = obs, target_grid = target_grid, int_method = 'con', + bc_method = 'quantile_mapping', ncores = 4) + +#------------------------------------ +# Downscaling to point locations +#------------------------------------ +points <- list(lat = c(36.1, 38.7), lon = c(-1.9, 0.8)) +down_points_sbc <- CST_Intbc(exp = exp, obs = obs, points = points, method_point_interp = 'bilinear', + target_grid = target_grid, bc_method = 'simple_bias', ncores = 4) +down_points_cal <- CST_Intbc(exp = exp, obs = obs, points = points, method_point_interp = 'bilinear', + target_grid = target_grid, bc_method = 'calibration', ncores = 4) +down_points_qm <- CST_Intbc(exp = exp, obs = obs, points = points, method_point_interp = 'bilinear', + target_grid = target_grid, bc_method = 'quantile_mapping', ncores = 4) + diff --git a/examples/interpolation-lr.R b/examples/interpolation-lr.R new file mode 100644 index 0000000000000000000000000000000000000000..89dcea8939d6d443fde28cd6ae9997e5eaef204d --- /dev/null +++ b/examples/interpolation-lr.R @@ -0,0 +1,122 @@ + +library(CSTools) +library(startR) +library(s2dv) +library(lubridate) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' +target_grid <- '/esarchive/recon/ecmwf/era5/daily/tasmax-r1440x721cds/tasmax_201702.nc' + +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 + + +#sdates <- format(ymd("19811201") + rep(years(0:36),each=1),"%Y%m%d") +sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') + +obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', + var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), + sdate = format(ymd(sdates), "%Y%m"), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude')), return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +#---------------------------------- +# Downscaling with Intlr +#---------------------------------- +ind_rdm <- array(NA, dim = c('sdate' = length(sdates), member = 3, 'vars' = 2)) +ind_rdm[,,1] <- rnorm(n = prod(length(sdates), 3), mean = 0, sd = 1) +ind_rdm[,,2] <- rnorm(n = prod(length(sdates), 3), mean = 0, sd = 1) + +down_1_bas <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "basic", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +down_1_lsc <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "large-scale", + int_method = "conservative", predictors = ind_rdm, loocv = TRUE, ncores = 4) + +down_1_nn <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "4nn", + int_method = "conservative", predictors = ind_rdm, loocv = TRUE, ncores = 4) + +#---------------------------------- +# Downscaling to point locations with CST_Intlr +# Note: load first NCO module +#---------------------------------- +points <- list(lat = c(36.1, 38.7), lon = c(-1.9, 0.8)) +down_2_bas <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "basic", + points = points, method_point_interp = 'bilinear', int_method = NULL, predictors = NULL, + source_file_exp = attr(exp1, "Files")[1,1,1], source_file_obs = attr(obs1, "Files")[1,1,1], + loocv = TRUE, ncores = 4) + +down_2_lsc <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "large-scale", + points = points, method_point_interp = 'bilinear', int_method = NULL, predictors = ind_rdm, + source_file_exp = attr(exp1, "Files")[1,1,1], source_file_obs = attr(obs1, "Files")[1,1,1], + loocv = TRUE, ncores = 4) + +down_2_nn <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "4nn", + points = points, method_point_interp = 'bilinear', int_method = NULL, predictors = NULL, + source_file_exp = attr(exp1, "Files")[1,1,1], source_file_obs = attr(obs1, "Files")[1,1,1], + loocv = TRUE, ncores = 4) + +#---------------------------------- +# Create s2dv objects +#---------------------------------- +obs1 <- s2dv_cube(obs1, lat = attr(obs1, "Variables")$dat1$lat, lon = attr(obs1, "Variables")$dat1$lon, + source_files = attr(obs, "Files")[1,1,]) +attr(obs1$lon,"first_lon") <- obs1$lon[1] +attr(obs1$lon,"last_lon") <- obs1$lon[length(obs1$lon)] +attr(obs1$lat,"first_lat") <- obs1$lat[1] +attr(obs1$lat,"last_lat") <- obs1$lat[length(obs1$lat)] + +exp1 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, + source_files = attr(exp1, "Files")[1,1,]) +attr(exp1$lon,"first_lon") <- exp1$lon[1] +attr(exp1$lon,"last_lon") <- exp1$lon[length(exp1$lon)] +attr(exp1$lat,"first_lat") <- exp1$lat[1] +attr(exp1$lat,"last_lat") <- exp1$lat[length(exp1$lat)] + +#---------------------------------- +# Downscaling with CST_Intlr +#---------------------------------- +down_3_bas <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "basic", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +down_3_lsc <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "large-scale", + int_method = "bilinear", predictors = ind_rdm, loocv = TRUE, ncores = 4) + +down_3_nn <- CST_Intlr(exp = exp1, obs = obs1, target_grid = target_grid, lr_method = "4nn", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + + + + diff --git a/examples/interpolation.R b/examples/interpolation.R new file mode 100644 index 0000000000000000000000000000000000000000..96c446b04969e3503cea749e7a9bd498a4da35a7 --- /dev/null +++ b/examples/interpolation.R @@ -0,0 +1,85 @@ +library(CSTools) +library(lubridate) +library(startR) +library(s2dv) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') + +plotpath <- '/esarchive/scratch/jramon/downscaling/plots/ip' +target_grid <- '/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc' + +#lonmin <- -22 +#lonmax <- 45 +#latmin <- 27 +#latmax <- 72 +#lonmin <- -11.5 +#lonmax <- 5.35 +#latmin <- 35.1 +#latmax <- 44.1 +lonmin <- -11.5 +lonmax <- 5.35 +latmin <- 35.1 +latmax <- 44.1 + +sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') + +exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), + lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) +exp2 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, + source_files = attr(exp1, "Files")[1,1,]) +attr(exp2$lon,"first_lon") <- exp2$lon[1] +attr(exp2$lon,"last_lon") <- exp2$lon[length(exp2$lon)] +attr(exp2$lat,"first_lat") <- exp2$lat[1] +attr(exp2$lat,"last_lat") <- exp2$lat[length(exp2$lat)] + +#-------------------------------- +# Downscaling with Interpolation +# If lon_reorder = CircularSort(0, 360) is set, please add the parameter remap_region = c(lonmin,lonmax,latmin,latmax) +#-------------------------------- +exp1_con <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'con') +exp1_bil <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'bil') +exp1_bic <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'bic') +exp1_nn <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'nn') +exp1_con2 <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, + lons = attr(exp1, "Variables")$dat1$lon, target_grid = target_grid, method_remap = 'con2') + +#-------------------------------- +# Downscaling with CST_Interpolation +#-------------------------------- +exp2_con <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'con') +exp2_bil <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'bil') +exp2_bic <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'bic') +exp2_nn <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'nn') +exp2_con2 <- CST_Interpolation(exp = exp2, target_grid = target_grid, method_remap = 'con2') + +#-------------------------------- +# Downscaling to point locations +#-------------------------------- +points <- list(lat = c(36.1, 38.7), lon = c(-1.9, 0.8)) +down_points_sbc <- Interpolation(exp = exp1, lats = attr(exp1, "Variables")$dat1$lat, lons = attr(exp1, "Variables")$dat1$lon, + points = points, method_point_interp = 'bilinear', remap_region = c(lonmin,lonmax,latmin,latmax), + source_file = attr(exp1, "Files")[1,1,1]) + + +#s2dv::PlotEquiMap(var = obs$data[1,1,1,,,1], lat = obs$lat, lon = obs$lon, filled.continents = FALSE, toptitle = 'Observations', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_obs.png")) +#s2dv::PlotEquiMap(var = exp$data[1,1,1,1,1,,], lat = exp$lat, lon = exp$lon, filled.continents = FALSE, toptitle = 'Predictions', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_fcst.png")) +#s2dv::PlotEquiMap(var = exp1_con$data[1,1,1,1,1,,], lat = exp_con$lat, lon = exp_con$lon, filled.continents = FALSE, toptitle = 'Conservative', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_con.png")) +#s2dv::PlotEquiMap(var = exp1_bil$data[1,1,1,1,1,,], lat = exp_bil$lat, lon = exp_bil$lon, filled.continents = FALSE, toptitle = 'Bilinear', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_bil.png")) +#s2dv::PlotEquiMap(var = exp1_bic$data[1,1,1,1,1,,], lat = exp_bic$lat, lon = exp_bic$lon, filled.continents = FALSE, toptitle = 'Bicubic', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_bic.png")) +#s2dv::PlotEquiMap(var = exp1_nn$data[1,1,1,1,1,,], lat = exp_nn$lat, lon = exp_nn$lon, filled.continents = FALSE, toptitle = 'Nearest neighbour', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_nn.png")) +#s2dv::PlotEquiMap(var = exp1_con2$data[1,1,1,1,1,,], lat = exp_con2$lat, lon = exp_con2$lon, filled.continents = FALSE, toptitle = '2nd order conservative', brks = seq(275,288,1), height = 8, width = 10, fileout = file.path(plotpath, "ip_con2.png")) + + + + + diff --git a/tests/QuantileMapping_new.R b/tests/QuantileMapping_new.R new file mode 100644 index 0000000000000000000000000000000000000000..45a8becd20381a0e9b7a9c134ecf2421294e29a2 --- /dev/null +++ b/tests/QuantileMapping_new.R @@ -0,0 +1,159 @@ +#'Quaintiles Mapping for seasonal or decadal forecast data +#' +#'@description This function is a wrapper from fitQmap and doQmap from package 'qmap'to be applied in CSTools objects of class 's2dv_cube'. +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#'@param exp an object of class \code{s2dv_cube} +#'@param obs an object of class \code{s2dv_cube} +#'@parma exp_cor an object of class \code{s2dv_cube} in which the quantile mapping correction should be applied. If it is not specified, the correction is applied in object 'exp'. +#'@param sdate_dim +#'@param memb_dim +#'@param window_dim +#'@param method a character string indicating the method to be used: 'PTF','DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping 'QUANT' is used. +#' +#'@return an oject of class \code{s2dv_cube} containing the experimental data after applyingthe quantile mapping correction. +#') <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#'@import qmap +#'@import multiApply +#' +#'@seealso \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} +#'@examples +#'exp$data <- 1 : c(1 * 10 * 20 * 60 * 6 * 7) +#'dim(exp$data) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'class(exp) <- 's2dv_cube' +#'obs$data <- 101 : c(100 + 1 * 1 * 20 * 60 * 6 * 7) +#'dim(obs$data) <- c(dataset = 1, member = 1, sdate = 20, ftime = 60 , +#' lat = 6, lon = 7) +#'class(obs) <- 's2dv_cube' +#'res <- CST_QuantileMapping(exp, obs) +#'exp <- lonlat_data$exp +#'obs <- lonlat_data$obs +#'res <- CST_QuantileMapping(exp, obs) +#'@export +CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + memb_dim = 'member', window_dim = NULL, + method = 'QUANT', na.rm = FALSE, + ncores = NULL, ...) { + if (!inherits(exp, 's2dv_cube') || !inherits(exp, 's2dv_cube')) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + dimnames <- names(dim(exp$data)) + QMapped <- QuantileMapping(exp = exp$data, obs = obs$data, + exp_cor = exp_cor$data, + sdate_dim = sdate_dim, memb_dim = memb_dim, + window_dim = window_dim, method = method, + na.rm = na.rm, ncores = ncores, ...) + exp$data <- QMapped + exp$Datasets <- c(exp$Datasets, obs$Datasets) + exp$source_files <- c(exp$source_files, obs$source_files) + return(exp) +} +QuantileMapping <- function(exp, obs, exp_cor = NULL, sdate_dim = 'sdate', + memb_dim = 'member', window_dim = NULL, + method = 'QUANT', + na.rm = FALSE, ncores = NULL, ...) { + obsdims <- names(dim(obs)) + expdims <- names(dim(exp)) + if (is.null(expdims)) { + stop("Parameter 'exp' musth have dimension names.") + } + if (is.null(obsdims)) { + stop("Parameter 'obs' musth have dimension names.") + } + if (!is.null(exp_cor)) { + exp_cordims <- names(dim(exp_cor)) + if (is.null(exp_cordims)) { + stop("Parameter 'exp_cor' musth have dimension names.") + } + } + if (!is.null(window_dim)) { + if (!(window_dim %in% obsdims)) { + stop("Dimension 'window_dim' not found in 'obs'.") + } + obs <- CSTools::MergeDims(obs, c(memb_dim, window_dim)) + if (window_dim %in% expdims) { + exp <- CSTools::MergeDims(exp, c(memb_dim, window_dim)) + warning("window_dim found in exp and it is merged to memb_dim.") + } + } + sample_dims <- c(memb_dim, sdate_dim) + if (!all(memb_dim %in% obsdims)) { + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = memb_dim[!(memb_dim %in% obsdims)]) + } + if (!all(sample_dims %in% expdims)) { + stop("Parameter 'exp' requires 'sdate_dim' and 'member_dim' dimensions.") + } + if (!is.character(method)) { + warning("Parameter 'method' must be a character string indicating ", + "one of the following methods: 'PTF', 'DIST', 'RQUANT', + 'QUANT', 'SSPLIN'. Method 'QUANT' is being used.") + method = 'QUANT' + } + if (length(method) > 1) { + warning("Parameter 'method' has length > 1 and only the first element", + " is used.") + method <- method[1] + } + if (!is.null(exp_cor)) { + qmaped <- Apply(list(exp, obs, exp_cor), target_dims = sample_dims, + fun = qmapcor, method = method, na.rm = na.rm, ..., + ncores = ncores)$output1 + } else { + qmaped <- Apply(list(exp, obs), target_dims = sample_dims, + fun = qmapcor, exp_cor = NULL, method = method, + na.rm = na.rm, ..., + ncores = ncores)$output1 + } + return(qmaped) +} +qmapcor <- function(exp, obs, exp_cor = NULL, method = 'QUANT', na.rm = FALSE, + ...) { + # exp[memb, sdate] + # obs[window, sdate] + if (is.null(exp_cor)) { + applied <- exp * NA + for (sd in 1:dim(exp)['sdate']) { + if (na.rm) { + # select start date for cross-val + nas_pos <- which(!is.na(exp[,sd])) + obs2 <- as.vector(obs[,-sd]) + exp2 <- as.vector(exp[,-sd]) + exp_cor2 <- as.vector(exp[,sd]) + # remove NAs + obs2 <- obs2[!is.na(obs2)] + exp2 <- exp2[!is.na(exp2)] + exp_cor2 <- exp_cor2[!is.na(exp_cor2)] + tryCatch({ + adjust <- fitQmap(obs2, exp2, method = method, ...) + applied[nas_pos, sd] <- doQmap(exp_cor2, adjust, ...) + }, + error = function(error_message) { + return(applied[,sd]) + }) + } else { + adjust <- fitQmap(as.vector(obs[,-sd]), as.vector(exp[,-sd]), + method = method, ...) + applied[,sd] <- doQmap(as.vector(exp[,sd]), adjust, ...) + } + } + } else { + applied <- exp_cor * NA + if (na.rm) { + tryCatch({ + adjust <- fitQmap(obs[!is.na(obs)], exp[!is.na(exp)], + method = method, ...) + applied[!is.na(exp_cor)] <- doQmap(exp_cor[!is.na(exp_cor)], + adjust, ...) + }, + error = function(error_message) { + return(applied) + }) + } else { + adjust <- fitQmap(as.vector(obs), as.vector(exp), method = method, ...) + applied <- doQmap(as.vector(exp_cor), adjust, ...) + } + } + return(applied) +} diff --git a/tests/testthat/test_CST_Analogs.R b/tests/testthat/test_CST_Analogs.R new file mode 100644 index 0000000000000000000000000000000000000000..48870a69b11abe34a5f5dfb01ca85fe9aab98d75 --- /dev/null +++ b/tests/testthat/test_CST_Analogs.R @@ -0,0 +1,44 @@ +set.seed(1) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Analogs.R') +require(testthat) +require(CSTools) +require(multiApply) +require(s2dv) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Analogs(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + exp_lons <- 1:5 + exp_lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons)) + + expect_error(CST_Analogs(exp = exp), "argument \"obs\" is missing, with no default") + + obs <- rnorm(900) + dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 1) + obs_lons <- seq(1,5, 4/14) + obs_lats <- seq(1,4, 3/11) + obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) + + expect_error( + CST_Analogs(exp = exp, obs = obs, grid_exp = 1), + paste0("Parameter 'grid_exp' must be of class 'character'. It can be either a path ", + "to another NetCDF file which to read the target grid from (a single grid must ", + "be defined in such file) or a character vector indicating the coarse grid to be ", + "passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.")) + + exp$data[1,1,1,1,1] <- NA + + l <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') + }) + + + + diff --git a/tests/testthat/test_CST_Intbc.R b/tests/testthat/test_CST_Intbc.R new file mode 100644 index 0000000000000000000000000000000000000000..122c50b12c8e7eeca730907793399c5ba8927c06 --- /dev/null +++ b/tests/testthat/test_CST_Intbc.R @@ -0,0 +1,63 @@ +set.seed(1) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intbc.R') +require(testthat) +require(CSTools) +require(multiApply) +require(s2dv) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Intbc(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + exp_lons <- 1:5 + exp_lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons)) + + expect_error(CST_Intbc(exp = exp), "argument \"obs\" is missing, with no default") + + obs <- rnorm(900) + dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 1) + obs_lons <- seq(1,5, 4/14) + obs_lats <- seq(1,4, 3/11) + obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) + + expect_error( + CST_Intbc(exp = exp, obs = obs, bc_method = 'cal'), + paste0("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2")) + + obs2 <- rnorm(180) + dim(obs2) <- c(lat = 12, lon = 15, sdate = 1) + obs2 <- s2dv_cube(data = obs2, lat = obs_lats, lon = obs_lons) + + expect_warning( + CST_Intbc(exp = exp, obs = obs2, int_method = 'bil', bc_method = 'sbc', target_grid = 'r1280x640'), + 'Simple Bias Correction should not be used with only one observation. Returning NA.') + + exp2 <- rnorm(100) + dim(exp2) <- c(member = 1, lat = 4, lon = 5, sdate = 5, time = 1) + exp2 <- suppressWarnings(s2dv_cube(data = exp2, lat = exp_lats, lon = exp_lons)) + + expect_error( + CST_Intbc(exp = exp2, obs = obs, int_method = 'bil', bc_method = 'cal', target_grid = 'r1280x640'), + 'Calibration must not be used with only one ensemble member.') + + d1 <- CST_Intbc(exp = exp, obs = obs, int_method = 'bil', bc_method = 'sbc', target_grid = 'r640x320') + expect_equal(round(d1$data[, 1, 1, 1, 1], 2), c(-0.40, 0.08, -0.23, 0.38, -0.03)) + + d2 <- CST_Intbc(exp = exp, obs = obs, int_method = 'bil', bc_method = 'cal', target_grid = 'r640x320') + expect_equal(round(d2$data[, 1, 1, 1, 1], 2), c(-0.47, -0.03, -0.31, 0.24, -0.13)) + + d3 <- CST_Intbc(exp = exp, obs = obs, int_method = 'bil', bc_method = 'qm', target_grid = 'r640x320') + expect_equal(round(d3$data[, 1, 1, 1, 1], 2), c(0.00, 0.00, 0.00, 0.33, 0.00)) + }) + + + + diff --git a/tests/testthat/test_CST_Interpolation.R b/tests/testthat/test_CST_Interpolation.R new file mode 100644 index 0000000000000000000000000000000000000000..ae9dcc5a9a887a92ffebc02b56136d23f25b5bc3 --- /dev/null +++ b/tests/testthat/test_CST_Interpolation.R @@ -0,0 +1,82 @@ +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +require(testthat) +require(CSTools) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Interpolation(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + lons <- 1:5 + lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = lats, lon = lons)) + + expect_error( + CST_Interpolation(exp = exp, lon_dim = 'longitude'), + paste0("The name of the latitude/longitude dimensions in 'exp$data' must ", + "match the parametres 'lat_dim' and 'lon_dim'"), fixed = TRUE) # the '$' needs adding fixed = TRUE + + expect_error( + CST_Interpolation(exp = exp), + paste0("Parameter 'method_remap' must be a character vector indicating the ", + "interpolation method. Accepted methods are con, bil, bic, nn, con2")) + + expect_error( + CST_Interpolation(exp = exp, method_remap = 'bil'), + paste0("Parameter 'target_grid' can be either a path to another NetCDF file which to ", + "read the target grid from (a single grid must be defined in such file) or a character ", + "vector indicating the coarse grid to be passed to CDO, and it must be a grid recognised ", + "by CDO or a NetCDF file."), fixed = TRUE) + + expect_warning( + CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r1280x640', region = NULL), + paste0("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last elements ", + "of the parametres 'lats' and 'lons'")) + + d <- CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r1280x640') + + expect_equal(length(d), 8) + expect_equal(class(d), "s2dv_cube") + expect_equal(as.numeric(dim(d$data)[names(dim(d$data)) == 'lat']), length(d$lat)) + expect_equal(as.numeric(dim(d$data)[names(dim(d$data)) == 'lon']), length(d$lon)) + + expect_error( + CST_Interpolation(exp = exp, points = 1), + paste0("Parameter 'points' must be a list of two elements containing the point latitudes ", + "and longitudes.")) + + expect_error( + CST_Interpolation(exp = exp, points = list(latitudes = c(1,0), longitudes = c(-1,1)), + method_point_interp = 'bilinear', lat_dim = 'lat', lon_dim = 'lon'), + paste0("The names of the elements in the list 'points' must coincide with the parametres ", + "'lat_dim' and 'lon_dim'")) + + exp$source_files[1] <- 'null.nc' + expect_error( + CST_Interpolation(exp = exp, points = list(lat = c(1,0), lon = c(-1,1)), + method_point_interp = 'bilinear'), + "At least one of the points lies outside the model region") + + exp <- rep(letters[1:5], 100) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + lons <- 1:5 + lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = lats, lon = lons)) + + expect_error( + CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r1280x640', region = c(1, 5, 1, 4)), + "Parameter 'data_array' must be a numeric array.") + + exp <- array(1:6, c(lat = 3, lon = 2)) + lons <- 1:2 + lats <- 1:3 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = lats, lon = lons)) + + d2 <- CST_Interpolation(exp = exp, method_remap = 'bil', target_grid = 'r640x320')$data + expect_equal(d2, array(c(2, 2, 3, 3, 4, 5), dim = c(lat = 3, lon = 2))) + }) + diff --git a/tests/testthat/test_CST_Intlr.R b/tests/testthat/test_CST_Intlr.R new file mode 100644 index 0000000000000000000000000000000000000000..decdb085ab644dad0af1906b1d0f0d4b08434fa4 --- /dev/null +++ b/tests/testthat/test_CST_Intlr.R @@ -0,0 +1,68 @@ +set.seed(1) +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') +source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') +require(testthat) +require(CSTools) +require(multiApply) +require(s2dv) + +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_Intlr(exp = 1), + "Parameter 'exp' must be of the class 's2dv_cube'") + + exp <- rnorm(500) + dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) + exp_lons <- 1:5 + exp_lats <- 1:4 + exp <- suppressWarnings(s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons)) + + expect_error(CST_Intlr(exp = exp), "argument \"obs\" is missing, with no default") + + obs <- rnorm(900) + dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time = 1) + obs_lons <- seq(1,5, 4/14) + obs_lats <- seq(1,4, 3/11) + obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = 1), + "Parameter 'lr_method' must be of the class 'character'") + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = "basic"), + paste0("Parameter 'int_method' must be a character vector indicating the interpolation ", + "method. Accepted methods are con, bil, bic, nn, con2")) + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = "large-scale", int_method = "bil", target_grid = 'r1280x640', + region = c(1, 5, 1, 4)), + "The large-scale predictors must be passed through the parametre 'predictors'") + + expect_error( + CST_Intlr(exp = exp, obs = obs, lr_method = "large-scale", int_method = "bil", target_grid = 'r1280x640', + region = c(1, 5, 1, 4), predictors = 1), + "Parameter 'predictors' must be of the class 'array'") + + expect_error( + CST_Intlr(exp = exp, obs = obs, int_method = "bil", target_grid = 'r1280x640', region = c(1, 5, 1, 4), + lr_method = "a"), + "a method is not implemented yet") + + d1 <- CST_Intlr(exp = exp, obs = obs, lr_method = "basic", int_method = "bil", target_grid = 'r1280x640') + expect_equal(round(d1$data[, 1, 1, 1, 1], 2), c(-0.70, -0.20, 0.56, 0.87, 0.60)) + + d2 <- CST_Intlr(exp = exp, obs = obs, lr_method = "4nn") + expect_equal(round(d2$data[, 1, 1, 1, 1], 2), c(0.08, 1.08, 0.80, 0.47, -0.52)) + + ind_rdm <- array(rnorm(30), dim = c(sdate = 5, member = 3, vars = 2)) + d3 <- CST_Intlr(exp = exp, obs = obs, lr_method = "large-scale", int_method = "bil", target_grid = 'r1280x640', + region = c(1, 5, 1, 4), predictors = ind_rdm) + expect_equal(round(d3$data[, 1, 1, 1, 1], 2), c(-0.26, -0.92, 0.70, 0.74, 0.86)) + }) + + + +