diff --git a/R/Analogs.R b/R/Analogs.R index b342a91a2291ddf06f0dd1ee614942777eb9b473..d90883f674271cf204f79f1b9bd8f46b7d16a8d2 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -434,23 +434,33 @@ Analogs <- function(exp, obs, exp_lats = NULL, exp_lons = NULL, obs_lats, obs_lo exp_lats[length(exp_lats)]) } - obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, - target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = "conservative", region = region, - ncores = ncores) - - # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to - # the same grid to force the matching - if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat), - lat2 = exp_lats, - lon1 = as.numeric(obs_interpolated$lon), - lon2 = exp_lons)) { - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, - target_grid = grid_exp, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = "conservative", - region = region, ncores = ncores)$data - } else { + if (.check_coords(lat1 = exp_lats, lat2 = obs_train_lats, + lon1 = exp_lons, lon2 = obs_train_lons)) { + + obs_interpolated <- NULL + obs_interpolated$data <- obs_train exp_interpolated <- exp + + } else { + + obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons, + target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = "conservative", region = region, + ncores = ncores) + + # If after interpolating 'obs' data the coordinates do not match, the exp data is interpolated to + # the same grid to force the matching + if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat), + lat2 = exp_lats, + lon1 = as.numeric(obs_interpolated$lon), + lon2 = exp_lons)) { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = grid_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = "conservative", + region = region, ncores = ncores)$data + } else { + exp_interpolated <- exp + } } # Create window if user does not have it in the training observations @@ -539,9 +549,10 @@ Analogs <- function(exp, obs, exp_lats = NULL, exp_lons = NULL, obs_lats, obs_lo obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space" # Identify and remove NA's - dum<-which(!apply(train,2,function (x) all(is.na(x))))[1] ## the column in which NA in space will be investigated. it shouldn't be all-NA time-step + dum <- which(!apply(train,2,function (x) all(is.na(x))))[1] ## the column in which NA in space will be investigated. it shouldn't be all-NA time-step + dum2 <- which(!apply(train,1,function (x) all(is.na(x))))[1] ## the row in which NA in time will be investigated. it shouldn't be all-NA time-step idx_na_tr <- is.na(train[ , dum]) # NA in space - idy_na_tr <- is.na(train[1, ]) # NA in time + idy_na_tr <- is.na(train[dum2, ]) # NA in time idx_na_te <- is.na(test) idx_na <- idx_na_tr | idx_na_te tr_wo_na <- t(train[!idx_na , !idy_na_tr ]) diff --git a/R/Intbc.R b/R/Intbc.R index d11b5e1ac9e6ccfc4f83cfc97e3befdcffd75d67..33f9b757a08a79c8c67506769bf4ba3e2113a428 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -55,6 +55,8 @@ #'takes the first and last elements of the latitudes and longitudes. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'The default value is NULL. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'applying the bias correction. Default to TRUE. #'@return An 's2dv' object. The element 'data' contains the dowscaled field, 'lat' the #'downscaled latitudes, and 'lon' the downscaled longitudes. #'@examples @@ -74,7 +76,7 @@ CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", time_dim = "time", - region = NULL, ncores = NULL, ...) + region = NULL, ncores = NULL, loocv = TRUE, ...) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -93,7 +95,7 @@ CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_meth source_file_obs = obs$attrs$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - time_dim = time_dim, region = region, ncores = ncores, ...) + time_dim = time_dim, region = region, ncores = ncores, loocv = loocv, ...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data obs$data <- res$obs @@ -190,6 +192,8 @@ CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_meth #'takes the first and last elements of the latitudes and longitudes. #'@param ncores an integer indicating the number of cores to use in parallel computation. #'The default value is NULL. +#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when +#'applying the bias correction. Default to TRUE. #'@import CSTools #' #'@seealso \code{\link[CSTools]{BiasCorrection}} @@ -214,7 +218,7 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", member_dim = "member", source_file_exp = NULL, - source_file_obs = NULL, region = NULL, ncores = NULL, ...) { + source_file_obs = NULL, region = NULL, ncores = NULL, loocv = TRUE, ...) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -317,40 +321,61 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, - region = region, ncores = ncores) + if (!is.null(exp_cor) & loocv) { # loocv should be equal to false to train with + # the whole hindcast and predict with the forecast + loocv <- FALSE + warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", + "to train the model with the whole hindcast and predict with the forecast.") + } - exp_cor_interpolated <- NULL + if (.check_coords(lat1 = exp_lats, lat2 = obs_lats, + lon1 = exp_lons, lon2 = obs_lons)) { - if (!is.null(exp_cor)) { - exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, - region = region, ncores = ncores) + exp_interpolated <- NULL + exp_interpolated$data <- exp + exp_interpolated$lat <- exp_lats + exp_interpolated$lon <- exp_lons + exp_cor_interpolated <- NULL + if (!is.null(exp_cor)) { + exp_cor_interpolated$data <- exp_cor + } + obs_ref <- obs + } else { - } - - # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to - # the same grid to force the matching - if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_obs, + points = points, source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, region = region, ncores = ncores) - obs_ref <- obs_interpolated$data - } else { - obs_ref <- obs - } + exp_cor_interpolated <- NULL + + if (!is.null(exp_cor)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, + region = region, ncores = ncores) + + } + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, + region = region, ncores = ncores) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } + } # Some functions only accept the dimension names "member" and "sdate" for exp and # "sdate" for obs #if (member_dim != 'member') { @@ -368,19 +393,30 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo k_out <- loop <- 1 # the data is NOT hourly/daily/weekly, i.e., time = 1. Apply leave one data out. daily <- FALSE # for the case at which time > 1 or for the case of forecast dwn (exp_cor) - eval.method <- "leave-one-out" - # if time > 1, apply leave-one-year-out - if ( time_dim %in% names(dim(obs_ref)) & dim(obs_ref)[time_dim] > 1 & is.null(exp_cor)) { + ## cross val + if (loocv) { + eval.method <- "leave-one-out" + } else { + eval.method <- "in-sample" + } + # if time > 1, apply leave-one-year-out manually. + if ( time_dim %in% names(dim(obs_ref)) & dim(obs_ref)[time_dim] > 1 ) { daily <- TRUE eval.method <- "in-sample" - sdate_num <- as.numeric (dim(obs_ref)[sdate_dim]) k_out <- as.numeric (dim(obs_ref)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out obs_ref <- MergeDims(obs_ref, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - exp_interpolated$data <- MergeDims(exp_interpolated$data, + exp_interpolated$data <- MergeDims(exp_interpolated$data, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - loop <- dim(obs_ref)[sdate_dim]/k_out - } else if (!is.null(exp_cor)) { - eval.method <- "in-sample" + if (is.null(exp_cor)) { + if (loocv) { + loop <- dim(obs_ref)[sdate_dim]/k_out + } + } else { + exp_cor_interpolated$data <- MergeDims(exp_cor_interpolated$data, + merge_dims = c(time_dim, sdate_dim), + rename_dim = sdate_dim) + sdate_num_fr <- as.numeric (dim(exp_cor_interpolated$data)[sdate_dim]) + } } sdate_num <- as.numeric (dim(obs_ref)[sdate_dim]) @@ -458,8 +494,13 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo names(dim(res)) <- dimnames if (daily) { - res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim, - indices = rep(1:k_out, sdate_num/k_out)) + if (is.null(exp_cor)) { + res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep(1:k_out, sdate_num/k_out)) + } else { + res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim, + indices = rep(1:k_out, sdate_num_fr/k_out)) + } obs_ref <- SplitDim(obs_ref, split_dim = sdate_dim, new_dim_name = time_dim, indices = rep(1:k_out, sdate_num/k_out)) } diff --git a/R/Intlr.R b/R/Intlr.R index f6d972171b14b152b5adfaaea049ef779175a6e0..50dff4514a4266decfe4efa7363f0461a38b19ca 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -450,44 +450,66 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo "elements of the parametres 'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } - - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, points = points, - method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = int_method, - region = region, ncores = ncores) - - if (!is.null(exp_cor) & is.null(predictors)) { - exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, points = points, - method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = int_method, - region = region, ncores = ncores) - } - - # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to - # the same grid to force the matching - if ((!suppressWarnings(.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons))) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + + # Normally, the data to be downscaled is not expected to have the same spatial resolution + # as the observations. However, in the use of full cross-validation with SUNSET, + # interpolation is performed outside the loop using a separate "Interpolation" function + # to speed up the process and avoid repeating interpolation at each step. + # Therefore, in such cases, the observations and model data can share the same coordinates + # and the interpolation step is skipped through the following if condition to save time. + + if (.check_coords(lat1 = exp_lats, lat2 = obs_lats, + lon1 = exp_lons, lon2 = obs_lons)) { + exp_interpolated <- NULL + exp_interpolated$data <- exp + exp_interpolated$lat <- exp_lats + exp_interpolated$lon <- exp_lons + if (!is.null(exp_cor)) { + exp_cor_interpolated <- NULL + exp_cor_interpolated$data <- exp_cor + } + obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons + } else { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, lat_dim = lat_dim, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, method_remap = int_method, region = region, ncores = ncores) + + if (!is.null(exp_cor) & is.null(predictors)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + } + + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated + # to the same grid to force the matching + if ((!suppressWarnings(.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons))) | + !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) - lats <- obs_interpolated$lat - lons <- obs_interpolated$lon - obs_interpolated <- obs_interpolated$data - } else { - obs_interpolated <- obs - lats <- obs_lats - lons <- obs_lons + lats <- obs_interpolated$lat + lons <- obs_interpolated$lon + obs_interpolated <- obs_interpolated$data + } else { + obs_interpolated <- obs + lats <- obs_lats + lons <- obs_lons + } } - } - + } #----------------------------------- # Linear regressions #----------------------------------- @@ -745,7 +767,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo .train_lm <- function(df, loocv, k_out = 1, pca = FALSE) { # Remove predictor columns containing only NA's - df_all<- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, + df_all <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] if (loocv) { @@ -930,7 +952,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo PR_mat <- PR$x[, 1:N_predictors, drop = FALSE] if (!is.null(PR$na.action)) { ## if there are NAs, relocate them to the correct order dum <- array(NA, dim = c(nrow(dx), ncol(PR_mat) )) - dum [as.numeric(rownames(PR_mat)), ] <- PR_mat + dum [which(!is.na(dx[, 1])), ] <- PR_mat PR_mat <- dum } return (list(PR_mat = PR_mat, N_predictors = N_predictors, diff --git a/R/LogisticReg.R b/R/LogisticReg.R index 2a6259bea18d023bafc773064b5dcae418de658b..e2ba697d320790b4d4186343eb549975e2fd7236 100644 --- a/R/LogisticReg.R +++ b/R/LogisticReg.R @@ -394,20 +394,48 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, - region = region, ncores = ncores) + if (.check_coords(lat1 = exp_lats, lat2 = obs_lats, + lon1 = exp_lons, lon2 = obs_lons)) { + exp_interpolated <- NULL + exp_interpolated$data <- exp + exp_interpolated$lat <- exp_lats + exp_interpolated$lon <- exp_lons + exp_cor_interpolated <- NULL + if (!is.null(exp_cor)) { + exp_cor_interpolated <- NULL + exp_cor_interpolated$data <- exp_cor + } + obs_ref <- obs + } else { + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, + region = region, ncores = ncores) - if (!is.null(exp_cor)) { - exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, points = points, - method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = int_method, - region = region, ncores = ncores) + if (!is.null(exp_cor)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + } + # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to + # the same grid to force the matching + if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, + lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, + region = region, ncores = ncores) + obs_ref <- obs_interpolated$data + } else { + obs_ref <- obs + } } # compute ensemble mean anomalies @@ -469,21 +497,6 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, stop(paste0(log_reg_method, " not recognised or not implemented.")) } - # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to - # the same grid to force the matching - if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, - lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, - region = region, ncores = ncores) - obs_ref <- obs_interpolated$data - } else { - obs_ref <- obs - } - k_out <- 1 ## in case loocv = TRUE and the data is NOT daily, leave one data out. daily <- FALSE if ( time_dim %in% names(dim(obs_ref)) & dim(obs_ref)[time_dim] > 1) {