diff --git a/R/Analogs.R b/R/Analogs.R index 9ed49efb3aa28e32f7dce6f0fd5ef21c6df8d178..4e85d22a86beb37cf7e66f98cfd5a453f4c220c9 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -31,10 +31,17 @@ #'@param obs an 's2dv' object with named dimensions containing the observational field. #'The object must have, at least, the dimensions latitude, longitude and start date. #'The object is expected to be already subset for the desired region. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. #'@param grid_exp a character vector with a path to an example file of the exp data. #'It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the -#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file. +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. #'@param nanalogs an integer indicating the number of analogs to be searched #'@param fun_analog a function to be applied over the found analogs. Only these options #'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), @@ -47,6 +54,8 @@ #'element 'data' in exp and obs. Default set to "sdate". #'@param time_dim a character vector indicating the time dimension name in the element #''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". #'@param region a numeric vector indicating the borders of the region defined in exp. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -76,9 +85,9 @@ #'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) #'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180') #'@export -CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", region = NULL, return_indices = FALSE, - loocv_window = TRUE, ncores = 1) { +CST_Analogs <- function(exp, obs, grid_exp, obs2 = NULL, nanalogs = 3, fun_analog = NULL, lat_dim = "lat", + lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", + region = NULL, return_indices = FALSE, loocv_window = TRUE, ncores = 1) { # input exp and obs must be s2dv_cube objects if (!inherits(exp,'s2dv_cube')) { @@ -93,10 +102,13 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat res <- Analogs(exp = exp$data, obs = obs$data, exp_lats = exp$lat, exp_lons = exp$lon, obs_lats = obs$lat, obs_lons = obs$lon, grid_exp = grid_exp, nanalogs = nanalogs, fun_analog = fun_analog, lat_dim = lat_dim, lon_dim = lon_dim, - sdate_dim = sdate_dim, time_dim = time_dim, region = region, return_indices = return_indices, - loocv_window = loocv_window, ncores = ncores) + sdate_dim = sdate_dim, time_dim = time_dim, member_dim = member_dim, + region = region, return_indices = return_indices, loocv_window = loocv_window, + ncores = ncores) + + res_s2dv <- list(exp = suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)), + obs = suppressWarnings(s2dv_cube(data = res$obs, lon = res$lon, lat = res$lat))) - res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) return(res_s2dv) @@ -150,7 +162,14 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'@param grid_exp a character vector with a path to an example file of the exp data. #'It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the -#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file. +#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. +#'@param obs2 an 's2dv' object with named dimensions containing a different observational +#'field to that in 'obs'. If provided, these observations will be used in the training, +#'i.e. the searching for analogs, so that they should be in a coarser grid to those in +#''obs'. Training with observations on a grid with a spatial resolution closer to that +#'in 'exp', will in principle ensure better results. The object must have, at least, the +#'dimensions latitude, longitude and start date. The object is expected to be already +#'subset for the desired region. #'@param nanalogs an integer indicating the number of analogs to be searched. #'@param fun_analog a function to be applied over the found analogs. Only these options #'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), @@ -163,6 +182,8 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'element 'data' in exp and obs. Default set to "sdate". #'@param time_dim a character vector indicating the time dimension name in the element #''data' in exp and obs. Default set to "time". +#'@param member_dim a character vector indicating the member dimension name in the element +#''data' in exp and obs. Default set to "member". #'@param region a numeric vector indicating the borders of the region defined in exp. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -199,10 +220,11 @@ CST_Analogs <- function(exp, obs, grid_exp, nanalogs = 3, fun_analog = NULL, lat #'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, #'obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180') #'@export -Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, nanalogs = 3, - fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", region = NULL, return_indices = FALSE, loocv_window = TRUE, - ncores = 1) { +Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, obs2 = NULL, + obs2_lats = NULL, obs2_lons = NULL, nanalogs = 3, fun_analog = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, return_indices = FALSE, + loocv_window = TRUE, ncores = 1) { #----------------------------------- # Checkings #----------------------------------- @@ -214,7 +236,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, } if (!inherits(nanalogs, 'numeric')) { - stop("Parameter 'nanalogs' must be of the class 'character'") + stop("Parameter 'nanalogs' must be of the class 'numeric'") } if (!inherits(lat_dim, 'character')) { @@ -262,22 +284,72 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # "longitudes.") #} + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (!is.null(obs2)) { + # the code is not yet prepared to handle members in the observations + if (member_dim %in% names(dim(obs2))) { + if (identical(as.numeric(dim(obs2)[member_dim]), 1)) { + obs2 <- ClimProjDiags::Subset(x = obs2, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs2' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + + if (is.null(obs2_lats) | is.null(obs2_lons)) { + stop("Missing latitudes and/or longitudes for the provided training observations. Please ", + "provide them with the parametres 'obs2_lats' and 'obs2_lons'") + } + + if (is.na(match(lon_dim, names(dim(obs2))))) { + stop("Missing longitude dimension in 'obs2', or does not match the parameter 'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(obs2))))) { + stop("Missing latitude dimension in 'obs2', or does not match the parameter 'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(obs2))))) { + stop("Missing start date dimension in 'obs2', or does not match the parameter 'sdate_dim'") + } + + if (is.na(match(time_dim, names(dim(obs2))))) { + stop("Missing time dimension in 'obs2', or does not match the parameter 'time_dim'") + } + } + # Select a function to apply to the analogs selected for a given observation if (!is.null(fun_analog)) { stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median")) } + if (!is.null(obs2)) { + obs_train <- obs2 + obs_train_lats <- obs2_lats + obs_train_lons <- obs2_lons + } else { + obs_train <- obs + obs_train_lats <- obs_lats + obs_train_lons <- obs_lons + } + # Correct indices later if cross-validation loocv_correction <- FALSE - if ( !("window" %in% names(dim(obs))) & loocv_window) { + if ( !("window" %in% names(dim(obs_train))) & loocv_window) { loocv_correction <- TRUE } - # Create window if user does not have it in obs - if ( !("window" %in% names(dim(obs))) ) { - obs <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) - } - #----------------------------------- # Interpolate high-res observations to the coarse grid #----------------------------------- @@ -288,9 +360,9 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], exp_lats[length(exp_lats)]) } - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = grid_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_remap = "conservative", - region = region) + obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, + target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = "conservative", region = region) # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to # the same grid to force the matching if (!.check_coords(lat1 = obs_interpolated$lat, lat2 = exp_lats, lon1 = obs_interpolated$lon, lon2 = exp_lons)) { @@ -301,10 +373,17 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, exp_interpolated <- exp } + # Create window if user does not have it in the training observations + if ( !("window" %in% names(dim(obs_interpolated$data))) ) { + obs_train_interpolated <- generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim, + time_dim = time_dim, loocv = loocv_window) + obs_hres <- generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim, loocv = loocv_window) + } + #----------------------------------- # Reshape train and test #----------------------------------- - res.data <- Apply(list(obs_interpolated$data, exp_interpolated, obs), + res.data <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), c("window", lat_dim, lon_dim)), fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, @@ -312,7 +391,7 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # Return the indices of the best analogs if (return_indices) { - res.ind <- Apply(list(obs_interpolated$data, exp_interpolated, obs), + res.ind <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres), target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim), c("window", lat_dim, lon_dim)), fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob, k = nanalogs, @@ -326,10 +405,21 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, sapply(1:nsdates, function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]]), output_dims = c("ind", sdate_dim))$output1 } - res <- list(data = res.data, ind = res.ind, lon = obs_lons, lat = obs_lats) + + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats) } else { - res <- list(data = res.data, lon = obs_lons, lat = obs_lats) + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats) } return(res) @@ -337,6 +427,9 @@ Analogs <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, grid_exp, # For each element in test, find the indices of the k nearest neigbhors in train .analogs <- function(train, test, obs_hres, k, fun_analog, return_indices = FALSE) { + + require(FNN) + # train and obs_hres dim: 3 dimensions window, lat and lon (in this order) # test dim: 2 dimensions lat and lon (in this order) # Number of lats/lons of the high-resolution data @@ -396,7 +489,7 @@ generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { if (loocv) { obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), - output_dims = c('window', 'sdate'))$output1 + output_dims = c('window', sdate_dim))$output1 # Generate window without cross-validation } else { obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim), @@ -418,11 +511,11 @@ generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL) { obj_window <- Apply(list(obj_new, rtimes, rsdates), target_dims = list(c(time_dim, sdate_dim), NULL, NULL), fun = function(x, t, s) as.vector(x[(ntimes + t - size):(ntimes + t + size), -s]), output_dims = 'window')$output1 - names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c("time", "sdate") + names(dim(obj_window))[(length(names(dim(obj_window))) - 1):length(names(dim(obj_window)))] <- c(time_dim, sdate_dim) } else { obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim), fun = function(x) sapply(rtimes, function(t) as.vector(x[(ntimes + t - size):(ntimes + t + size), ])), - output_dims = c('window', 'time'))$output1 + output_dims = c('window', time_dim))$output1 } } diff --git a/R/Intbc.R b/R/Intbc.R index 4fab26d64ac48cdd12c61e1a67eb01a7d9d8e19a..a819b76a729831001e3acb2275be02311232c6a0 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -66,9 +66,9 @@ #'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') #'@export -CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - region = NULL, ncores = 1) +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = 1) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -84,8 +84,9 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores) - res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) - + res_s2dv <- list(exp = suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)), + obs = suppressWarnings(s2dv_cube(data = res$obs, lon = res$lon, lat = res$lat))) + return(res_s2dv) } @@ -287,19 +288,9 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, sdate_dim = sdate_dim, ...) } else if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - # TO REMOVE ONCE CSTools includes the new changes - source('/esarchive/scratch/jramon/GitLab_jramon/cstools/R/CST_QuantileMapping.R') - - # Quantile mapping requires the time dimension to be named sdate, ftime or time - if (sdate_dim != 'sdate') { - names(dim(exp_interpolated$data)) <- replace(names(dim(exp_interpolated$data)), - which(names(dim(exp_interpolated$data)) == sdate_dim), 'sdate') - names(dim(obs_ref)) <- replace(names(dim(obs_ref)), - which(names(dim(obs_ref)) == sdate_dim), 'sdate') - } - - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, sample_dims = 'sdate', - na.rm = TRUE, ...) + + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, + memb_dim = member_dim, sdate_dim = sdate_dim, ...) } else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { # the temporal dimension must be only one dimension called "time" @@ -316,7 +307,7 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } # Return a list of three elements - res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) return(res) } diff --git a/R/Interpolation.R b/R/Interpolation.R index d2d9373d51bde2df76d3464d51b6d9a042434f4b..7a81d8b76ecebd98bcce8240f0641bf11bd81cfb 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -72,7 +72,8 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, lon_dim = lon_dim, region = region, method_point_interp = method_point_interp) - res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + res_s2dv <- list(exp = suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)), + obs = NULL) return(res_s2dv) } @@ -251,15 +252,15 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # Map regrid with CDO #---------------------------------- if (is.null(points)) { - res <- s2dv::CDORemap(data_array = exp, - lats = lats, - lons = lons, - grid = target_grid, - method = method_remap, - crop = region) + res <- CDORemap(data_array = exp, + lats = lats, + lons = lons, + grid = target_grid, + method = method_remap, + crop = region) # Return a list - res <- list(data = res$data_array, lon = res$lons, lat = res$lats) + res <- list(data = res$data_array, obs = NULL, lon = res$lons, lat = res$lats) #---------------------------------- # Interpolate to point locations @@ -278,7 +279,7 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me res <- interpolate_data(model_data_gridpoints, weights) # Return a list - res <- list(data = res, lon = points[[lon_dim]], lat = points[[lat_dim]]) + res <- list(data = res, obs = NULL, lon = points[[lon_dim]], lat = points[[lat_dim]]) } return(res) diff --git a/R/Intlr.R b/R/Intlr.R index 0da52229a56f71cf81f04d7572229c07468b9233..bf744b36bb8572011d706f9d850c64385119ab37 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -88,8 +88,8 @@ #'@export CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", large_scale_predictor_dimname = 'vars', - loocv = FALSE, region = NULL, ncores = 1) { + sdate_dim = "sdate", time_dim = "time", member_dim = "member", + large_scale_predictor_dimname = 'vars', loocv = FALSE, region = NULL, ncores = 1) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -104,13 +104,16 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in source_file_obs = obs$source_files[1], target_grid = target_grid, lr_method = lr_method, int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, - region = region, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, - ncores = ncores) + member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, + loocv = loocv, region = region, ncores = ncores) #----------------------------------- # Create an s2dv_cube object #----------------------------------- - res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + res_s2dv <- list(exp = suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)), + obs = suppressWarnings(s2dv_cube(data = res$obs, lon = res$lon, lat = res$lat))) + + return(res_s2dv) } @@ -217,7 +220,8 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, ncores = 1) { + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = FALSE, + ncores = 1) { #----------------------------------- # Checkings @@ -278,7 +282,19 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # sdate must be the time dimension in the input data stopifnot(sdate_dim %in% names(dim(exp))) stopifnot(sdate_dim %in% names(dim(obs))) - + + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + restore_ens <- TRUE + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + # checkings for the parametre 'predictors' if (!is.null(predictors)) { if (!is.array(predictors)) { @@ -408,8 +424,13 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t names(dim(res))[1] <- sdate_dim + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim) + } + # Return a list of three elements - res <- list(data = res, lon = lons, lat = lats) + res <- list(data = res, obs = predictand, lon = lons, lat = lats) return(res) } diff --git a/R/LogisticReg.R b/R/LogisticReg.R index 5496e00a936b7346b95939dab37d8bdfd659c4d1..d46fb6a396f8578328b6ce18c91e68b822d2880e 100644 --- a/R/LogisticReg.R +++ b/R/LogisticReg.R @@ -113,7 +113,9 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #----------------------------------- # Create an s2dv_cube object #----------------------------------- - res_s2dv <- suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)) + res_s2dv <- list(exp = suppressWarnings(s2dv_cube(data = res$data, lon = res$lon, lat = res$lat)), + obs = suppressWarnings(s2dv_cube(data = res$obs, lon = res$lon, lat = res$lat))) + return(res_s2dv) } @@ -296,6 +298,18 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } + # the code is not yet prepared to handle members in the observations + restore_ens <- FALSE + if (member_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[member_dim]), 1)) { + restore_ens <- TRUE + obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'member_dim', ", + "but it should be of length = 1).") + } + } + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, method_remap = int_method, points = points, source_file = source_file, lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, @@ -352,7 +366,12 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target output_dims = sdate_dim)$output1 } - res <- list(data = res, lon = exp_interpolated$lon, lat = exp_interpolated$lat) + # restore ensemble dimension in observations if it existed originally + if (restore_ens) { + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) + } + + res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) return(res) } @@ -452,11 +471,8 @@ pred_lr <- function(df, lm1, loocv) { if (loocv) { - # type = class, probs - #if (return_type == "class") { - # pred_vals <- sapply(1:nrow(df), function(j) predict(lm1[[j]], df[j,], type = "class")) - #} - + # The error: "Error: Results must have the same dimensions." can + # appear when the number of sdates is insufficient pred_vals_ls <- list() for (j in 1:nrow(df)) { pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") diff --git a/inst/examples/analogs.R b/inst/examples/analogs.R index 7886cffd23e3f17916616637f61ce9e3b1e31439..3446647dd00a7c241a85275f03accb244f319fcc 100644 --- a/inst/examples/analogs.R +++ b/inst/examples/analogs.R @@ -24,15 +24,17 @@ extra <- 1 #sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') #sdates <- format(seq(ymd("20000101"), ymd("20071201"), '1 month'), "%Y%m%d") -sdates_exp <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m%d") -sdates_obs <- format(ymd("20000401") + months(0:2) + rep(years(0:2), each=3),"%Y%m") -#sdates_obs <- format(ymd("20000501") + rep(years(0:2), each=1),"%Y%m") +sdates_exp <- format(ymd("20000501") + rep(years(0:8), each=1),"%Y%m%d") +#sdates_obs <- format(ymd("20000401") + months(0:2) + rep(years(0:2), each=3),"%Y%m") +sdates_obs <- format(ymd("20000501") + rep(years(0:8), each=1),"%Y%m") #--------------------------- # Observations #--------------------------- # dim(obs) <- c('sdate', 'smonth', ...) -obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc', +# ERA5 observations: used in the training +# change to time = indices(1:28) +obs_era5 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$var$_$sdate$.nc', var = 'tas', time = indices(1:28), latitude = values(list(latmin - extra, latmax + extra)), sdate = sdates_obs, latitude_reorder = Sort(decreasing = FALSE), longitude = values(list(lonmin - extra, lonmax + extra)), @@ -42,19 +44,31 @@ obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1h/$v num_procs = 1, retrieve = TRUE) # Create 's2dv_cube' object -obs2 <- s2dv_cube(obs1, lat = attr(obs1, "Variables")$dat1$lat, lon = attr(obs1, "Variables")$dat1$lon, - source_files = attr(obs1, "Files")[1,1,]) +obs_s2dv <- s2dv_cube(obs_era5, lat = attr(obs_era5, "Variables")$dat1$lat, + lon = attr(obs_era5, "Variables")$dat1$lon, source_files = attr(obs_era5, "Files")[1,1,]) + +# ERA5-Land observations: high-resolution observations +# change to time = indices(1:28) +obs_hres <- startR::Start(dat = '/esarchive/recon/ecmwf/era5land/daily_mean/$var$_f1h/$var$_$sdate$.nc', + var = 'tas', time = indices(1:28), latitude = values(list(latmin - extra, latmax + extra)), + sdate = sdates_obs, latitude_reorder = Sort(decreasing = FALSE), + longitude = values(list(lonmin - extra, lonmax + extra)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), longitude = c('lon', 'longitude'), + latitude = c('lat', 'latitude')), return_vars = list(latitude = 'dat', longitude = 'dat'), + num_procs = 1, retrieve = TRUE) + # Create window to look for analogues -#dim(obs2$data) <- c(data = 1, var = 1, time = 28, lat = 39, smonth = 3, sdate = 5, lon = 67) # correct -#obs_new <- Apply(obs2$data, target_dims = list(c("time", "smonth")), fun = as.vector, output_dims = "time")$output1 +#dim(obs_s2dv$data) <- c(data = 1, var = 1, time = 28, lat = 39, smonth = 3, sdate = 5, lon = 67) # correct +#obs_new <- Apply(obs_s2dv$data, target_dims = list(c("time", "smonth")), fun = as.vector, output_dims = "time")$output1 #wlen <- 7 -#result <- array(NA, dim = c(window = 2*wlen + 1, time = 28, lat = 39, sdate = 7, lon = 67)) +#result <- array(NA, dim = c(window = 2*wlen + 1, time = 28, lat = 39, sdate = 7, lon = 67)) #for (i in 1:28) { # result[,i,,,] <- obs_new[(28 + i - wlen):(28 + i + wlen),1,1,,,] #} -#obs2$data <- result +#obs_s2dv$data <- result # > dim(obs$data) # data var time lat smonth sdate lon @@ -66,7 +80,7 @@ obs2 <- s2dv_cube(obs1, lat = attr(obs1, "Variables")$dat1$lat, lon = attr(obs1, #--------------------------- # Model #--------------------------- -exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc', +exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h/$var$_$sdate$.nc', var = 'tas', time = indices(1:28), member = indices(1:3), sdate = sdates_exp, latitude = values(list(latmin, latmax)), latitude_reorder = Sort(decreasing = FALSE), longitude = values(list(lonmin, lonmax)), longitude_reorder = CircularSort(-180, 180), @@ -76,66 +90,77 @@ exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6 num_procs = 1, retrieve = TRUE) # Create 's2dv_cube' object -exp2 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, - source_files = attr(exp1, "Files")[1,1,]) +exp_s2dv <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, + source_files = attr(exp, "Files")[1,1,]) #--------------------------- # Analogs with no 'window' dimension in the initial object and with cv #--------------------------- -ana_1 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, - exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, - obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], - fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, ncores = 4) +ana_1 <- Analogs(exp = exp, obs = obs_hres, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs_hres, "Variables")$dat1$lat, + obs_lons = attr(obs_hres, "Variables")$dat1$lon, grid_exp = attr(exp, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, + return_indices = TRUE, ncores = 4) #--------------------------- # Analogs with no 'window' dimension in the initial object and without cv #--------------------------- -ana_2 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, - exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, - obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], +ana_2 <- Analogs(exp = exp, obs = obs_era5, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs_era5, "Variables")$dat1$lat, + obs_lons = attr(obs_era5, "Variables")$dat1$lon, grid_exp = attr(exp, "Files")[1,1,1], fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = FALSE, ncores = 4) #--------------------------- # Analogs with 'window' dimension in the initial object and with cv #--------------------------- -dim(obs1) <- c(data = 1, var = 1, time = 28, latitude = 39, smonth = 3, sdate = 3, longitude = 67) -obs_window <- generate_window(obj = obs1, sdate_dim = 'sdate', time_dim = 'time', size = 7, loocv = TRUE) -ana_3 <- Analogs(exp = exp1, obs = obs_window, exp_lats = attr(exp1, "Variables")$dat1$lat, - exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, - obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], +dim(obs_era5) <- c(data = 1, var = 1, time = 28, latitude = 39, smonth = 3, sdate = 3, longitude = 67) +obs_window <- generate_window(obj = obs_era5, sdate_dim = 'sdate', time_dim = 'time', size = 7, loocv = TRUE) +ana_3 <- Analogs(exp = exp, obs = obs_window, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs_era5, "Variables")$dat1$lat, + obs_lons = attr(obs_era5, "Variables")$dat1$lon, grid_exp = attr(exp, "Files")[1,1,1], fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', ncores = 4) #--------------------------- # Analogs with cv and returning the indices #--------------------------- -ana_4 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, - exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, - obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], +ana_4 <- Analogs(exp = exp, obs = obs_era5, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs_era5, "Variables")$dat1$lat, + obs_lons = attr(obs_era5, "Variables")$dat1$lon, grid_exp = attr(exp, "Files")[1,1,1], fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, return_indices = TRUE, ncores = 4) #--------------------------- # Analogs with 'window' dimension and returning the indices #--------------------------- -ana_5 <- Analogs(exp = exp1, obs = obs_window, exp_lats = attr(exp1, "Variables")$dat1$lat, - exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, - obs_lons = attr(obs1, "Variables")$dat1$lon, grid_exp = attr(exp1, "Files")[1,1,1], +ana_5 <- Analogs(exp = exp, obs = obs_window, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs_era5, "Variables")$dat1$lat, + obs_lons = attr(obs_era5, "Variables")$dat1$lon, grid_exp = attr(exp, "Files")[1,1,1], fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', return_indices = TRUE, ncores = 4) #--------------------------- # Analogs with NA's in obs #--------------------------- -obs1[,,,10:20,,] <- NA -ana_mean_2 <- Analogs(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, - exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, - obs_lons = attr(obs1, "Variables")$dat1$lon, source_file_exp = attr(exp1, "Files")[1,1,1], +obs_era5[,,,10:20,,] <- NA +ana_mean_2 <- Analogs(exp = exp, obs = obs_era5, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs_era5, "Variables")$dat1$lat, + obs_lons = attr(obs_era5, "Variables")$dat1$lon, source_file_exp = attr(exp, "Files")[1,1,1], fun_analog = 'mean', ncores = 4,lat_dim='latitude',lon_dim='longitude') +#--------------------------- +# Analogs with two types of observations +#--------------------------- +ana_6 <- Analogs(exp = exp, obs = obs_hres, obs2 = obs_era5, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs_hres, "Variables")$dat1$lat, + obs_lons = attr(obs_hres, "Variables")$dat1$lon, obs2_lats = attr(obs_era5, "Variables")$dat1$lat, + obs2_lons = attr(obs_era5, "Variables")$dat1$lon, grid_exp = attr(exp, "Files")[1,1,1], + fun_analog = NULL, lat_dim = 'latitude', lon_dim = 'longitude', loocv_window = TRUE, + return_indices = TRUE, ncores = 4) + #--------------------------- # Analogs with CST_Analogs() #--------------------------- -ana_mean <- CST_Analogs(exp = exp2, obs = obs2, fun_analog = "mean", ncores = 4) +ana_mean <- CST_Analogs(exp = exp2, obs = obs_s2dv, fun_analog = "mean", ncores = 4) # test examples ana_mean <- Analogs(exp = exp, obs = obs, fun_analog = "mean", ncores = 4)