diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 89158e8c819d477ce0745a399b3ae3daa92385d4..59233dc2ac02749e91b597e832c721a412915f55 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -14,15 +14,18 @@ Downscaling <- function(recipe, data) { # recipe: object obtained when passing the .yml recipe file to read_yaml() type <- tolower(recipe$Analysis$Workflow$Downscaling$type) - + if (type == "none") { - hcst_downscal$exp <- data$hcst - hcst_downscal$obs <- data$obs - fcst_downscal <- data$fcst - DOWNSCAL_MSG <- "##### METHOD SET TO 'none': NO DOWNSCALING PERFORMED #####" + hcst_downscal <- data$hcst + DOWNSCAL_MSG <- "##### NO DOWNSCALING PERFORMED #####" } else { + if (!is.null(data$fcst)) { + warn(recipe$Run$logger, + "The downscaling will be only performed to the hindcast data") + data$fcst <- NULL + } # Downscaling function params int_method <- tolower(recipe$Analysis$Workflow$Downscaling$int_method) bc_method <- tolower(recipe$Analysis$Workflow$Downscaling$bc_method) @@ -39,20 +42,15 @@ Downscaling <- function(recipe, data) { } #TO DO: add the parametre loocv where it corresponds - if (is.null(recipe$Analysis$loocv) & is.null(data$fcst)) { + if (is.null(recipe$Analysis$loocv)) { loocv <- TRUE - } else if (!is.null(data$fcst) & (isTRUE(recipe$Analysis$loocv) | - is.null(recipe$Analysis$loocv))) { - loocv <- FALSE - warning("'loocv' is set to FALSE to train the model with the whole hindcast ", - "and predict with the forecast.") } else { loocv <- recipe$Analysis$loocv } DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") - LR_METHODS <- c("basic", "large-scale", "9nn") + LR_METHODS <- c("basic", "large-scale", "4nn") LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") if (!(type %in% DOWNSCAL_TYPES)) { @@ -118,7 +116,7 @@ Downscaling <- function(recipe, data) { "'rpc-based', 'qm'.")) } - hcst_downscal <- CST_Intbc(data$hcst, data$obs, data$fcst, + hcst_downscal <- CST_Intbc(data$hcst, data$obs, target_grid = target_grid, bc_method = bc_method, int_method = int_method, @@ -138,7 +136,7 @@ Downscaling <- function(recipe, data) { if (is.null(lr_method)) { stop("Please provide one linear regression method in the recipe. Accepted ", - "methods are 'basic', 'large-scale', '9nn'.") + "methods are 'basic', 'large-scale', '4nn'.") } if (is.null(target_grid)) { @@ -147,7 +145,7 @@ Downscaling <- function(recipe, data) { if (!(lr_method %in% LR_METHODS)) { stop(paste0(lr_method, " method in the recipe is not available. Accepted methods ", - "are 'basic', 'large-scale', '9nn'.")) + "are 'basic', 'large-scale', '4nn'.")) } # TO DO: add the possibility to have the element 'pred' in 'data' @@ -159,7 +157,7 @@ Downscaling <- function(recipe, data) { data$pred$data <- NULL } - hcst_downscal <- CST_Intlr(data$hcst, data$obs, data$fcst, + hcst_downscal <- CST_Intlr(data$hcst, data$obs, lr_method = lr_method, target_grid = target_grid, points = NULL, @@ -178,34 +176,17 @@ Downscaling <- function(recipe, data) { DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####" } else if (type == "analogs") { - - if ((!is.null(data$hcstL) | !is.null(data$fcstL)) & is.null(data$obsL)) { - stop("Large scale model field (i.e., data$hcstL or data$fcstL) is provided. ", - "Observation must also be provided for the large scale variable.") - } - if (is.null(data$hcstL) & is.null(data$fcstL) & !is.null(data$obsL)) { - stop("Large scale observational field (i.e., data$obsL) is provided. ", - "Model data must also be provided for the large scale variable.") - } - - if (!is.null(data$hcstL) & !is.null(data$obsL) & !is.null(data$fcst) & is.null(data$fcstL)) { - stop("Large scale observational field (i.e., data$obsL) is provided. Please also ", - "provide large scale forecast data (i.e., data$fcstL) ", - "or set data$fcst to NULL.") - } - if (is.null(nanalogs)) { warning("The number of analogs for searching has not been provided in the ", "recipe. Setting it to 3.") nanalogs <- 3 } - if (!is.null(size) & (recipe$Analysis$Variables$freq == "monthly_mean" | - recipe$Analysis$Variables$freq == "weekly_mean")) { + if (!is.null(size) & recipe$Analysis$Variables$freq == "monthly_mean") { size <- NULL warning("Size is set to NULL. ", - "It must be NULL for the weekly or monthly input data.") + "It must be NULL for the monthly input data.") } if (!is.null(size)) { @@ -214,44 +195,15 @@ Downscaling <- function(recipe, data) { sdate_dim = 'syear', time_dim = 'time', loocv = TRUE, - size = size, - ncores = ncores) - - if (!is.null(data$hcstL)) { - data$obsL$data <- .generate_window(data$obsL$data, - sdate_dim = 'syear', - time_dim = 'time', - loocv = TRUE, - size = size, - ncores = ncores) - } + size = size) + data$obs$data <- Apply(data$obs$data, + target_dims="window", + fun=function (x) x[!is.na(x)])$output1 } - ## Priority: fcstL, fcst, hcstL, hcst - exp <- data$hcst - - if (!is.null(data$hcstL)) { - exp <- data$hcstL - if (is.null(data$fcst)) { ## if data$fcst exists, no need for the warning - print("Provided large scale variable is used as the predictor.") - } - } - - if (!is.null(data$fcst)) { - exp <- data$fcst - } - - if (!is.null(data$fcstL)) { - exp <- data$fcstL - if (is.null(data$hcstL)) { ## if data$hcstL exist, the warning have already been given. - print("Provided large scale variable is used as the predictor.") - } - } - - hcst_downscal <- CST_Analogs(exp, data$obs, - grid_exp = exp$attrs$source_files[ - which(!is.na(exp$attrs$source_files))[1]], - obsL = data$obsL, + hcst_downscal <- CST_Analogs(data$hcst, data$obs, + grid_exp = data$hcst$attrs$source_files[ + which(!is.na(data$hcst$attrs$source_files))[1]], nanalogs = nanalogs, fun_analog = "wmean", lat_dim = "latitude", @@ -261,8 +213,7 @@ Downscaling <- function(recipe, data) { member_dim = "ensemble", region = NULL, return_indices = FALSE, - loocv_window = loocv, - metric = "dist", + loocv_window = loocv, ncores = ncores) if (!is.null(size)) { @@ -301,7 +252,7 @@ Downscaling <- function(recipe, data) { "are 'ens_mean', 'ens_mean_sd', 'sorted_members'.")) } - hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, data$fcst, + hcst_downscal <- CST_LogisticReg(data$hcst, data$obs, target_grid = target_grid, int_method = int_method, log_reg_method = log_reg_method, @@ -312,7 +263,6 @@ Downscaling <- function(recipe, data) { lat_dim = "latitude", lon_dim = "longitude", sdate_dim = "syear", - time_dim = "time", member_dim = "ensemble", region = NULL, loocv = loocv, @@ -322,13 +272,6 @@ Downscaling <- function(recipe, data) { } } print(DOWNSCAL_MSG) - - fcst_downscal <- NULL - - if (!is.null(data$fcst) | ( !is.null(data$fcstL) & type == "analogs" ) ) { - fcst_downscal <- hcst_downscal - hcst_downscal$exp <- NULL - } # Saving if (recipe$Analysis$Workflow$Downscaling$save != 'none') { @@ -348,5 +291,5 @@ Downscaling <- function(recipe, data) { save_observations(recipe = recipe, data_cube = hcst_downscal$obs) } - return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = fcst_downscal$exp)) + return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL)) } diff --git a/modules/Downscaling/tmp/Analogs.R b/modules/Downscaling/tmp/Analogs.R index b342a91a2291ddf06f0dd1ee614942777eb9b473..73b76ae56cd114536a07ec410e172b09e33b3067 100644 --- a/modules/Downscaling/tmp/Analogs.R +++ b/modules/Downscaling/tmp/Analogs.R @@ -36,8 +36,7 @@ #'The object is expected to be already subset for the desired region. Data can be in one #'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct #'results in the latter case, the borders of the region should be specified in the parameter -#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. -#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE. +#''region'. See parameter 'region'. #'@param obs an 's2dv_cube' object with named dimensions containing the observational field #'for the variable targeted for downscaling. The object must have, at least, the dimensions #'latitude, longitude and start date. The object is expected to be already subset for the @@ -45,7 +44,8 @@ #'@param obsL an 's2dv_cube' object with named dimensions containing the observational #'field of the large-scale variable. The object must have, at least, the dimensions latitude, #'longitude and start date. The object is expected to be already subset for the desired region. -#'@param grid_exp a character vector with a path to an example file of the exp +#'@param grid_exp a character vector with a path to an example file of the exp (if the +#'predictor is the local scale variable) or expL (if the predictor is a large scale variable) #'data. It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the #'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. @@ -159,7 +159,7 @@ CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, #'the best analog, the function extracts the corresponding local-scale variable for that day #'from the observation of the local scale variable. The used local-scale and large-scale #'variables can be retrieved from independent regions. The input data for the first case must -#'include 'exp' and 'obs,' while in the second case, 'exp', 'obs,' and 'obsL' are the +#'include 'exp' and 'obs,' while in the second case, 'obs,' 'obsL,' and 'expL' are the #'required input fields. Users can perform the downscaling process over the subregions #'that can be identified through the 'region' and 'regionL' arguments, instead of focusing #'on the entire area of the loaded data. @@ -180,8 +180,7 @@ CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, #'The object is expected to be already subset for the desired region. Data can be in one #'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct #'results in the latter case, the borders of the region should be specified in the parameter -#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. -#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE. +#''region'. See parameter 'region'. #'@param obs an array with named dimensions containing the observational field for the variable #'targeted for downscaling. The object must have, at least, the dimensions latitude, longitude, #'start date and time. The object is expected to be already subset for the desired region. @@ -206,7 +205,8 @@ CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, #'range from -90 to 90. #'@param obsL_lons a numeric vector containing the longitude values in 'obsL'. Longitudes #'can range from -180 to 180 or from 0 to 360. -#'@param grid_exp a character vector with a path to an example file of the exp data. +#'@param grid_exp a character vector with a path to an example file of the exp (if the +#'predictor is the local scale variable) or expL (if the predictor is a large scale variable) data. #'It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the #'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. @@ -390,14 +390,9 @@ Analogs <- function(exp, obs, exp_lats = NULL, exp_lons = NULL, obs_lats, obs_lo # metric method to be used to specify the analogs stopifnot(metric %in% c("cor", "dcor", "dist")) - - if ( (dim(exp)[sdate_dim] != dim(obs)[sdate_dim]) & loocv_window ) { - loocv_window <- FALSE - warning("The lengths of sdate_dim in the data provided through exp and obs are ", - "not the same. Thus, exp is considered as forecast data, and loocv_window ", - "is set to FALSE.") - } + + if (!is.null(obsL)) { obs_train <- obsL obs_train_lats <- obsL_lats diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R index 70043c1fe9cd1ac8e06fcceb3df5f81849c7e1eb..4b9527323aa58ecd67daf71870c5755e7b31492b 100644 --- a/modules/Downscaling/tmp/Intbc.R +++ b/modules/Downscaling/tmp/Intbc.R @@ -20,10 +20,6 @@ #'@param obs an 's2dv object' containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. -#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal -#'forecast experiment data. If the forecast is provided, it will be downscaled using the -#'hindcast and observations; if not provided, the hindcast will be downscaled instead. The -#'default value is NULL. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param bc_method a character vector indicating the bias adjustment method to be applied after @@ -66,13 +62,13 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) -#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) -#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'bias', int_method = 'conservative') +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') #'@export -CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_method = NULL, - points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", +CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = NULL, ...) { if (!inherits(exp,'s2dv_cube')) { @@ -83,38 +79,25 @@ CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_meth stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intbc(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], - target_grid = target_grid, int_method = int_method, - bc_method = bc_method, points = points, - source_file_exp = exp$attrs$source_files[1], - source_file_obs = obs$attrs$source_files[1], + res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, + int_method = int_method, bc_method = bc_method, points = points, + source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores, ...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - if (is.null(exp_cor)) { - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) - } else { - exp_cor$data <- res$data - exp_cor$dims <- dim(exp_cor$data) - exp_cor$coords[[lon_dim]] <- res$lon - exp_cor$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp_cor, obs = obs) - } - + res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) } @@ -140,11 +123,7 @@ CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_meth #''region'. #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. -#'@param exp_cor an optional array with named dimensions containing the seasonal forecast -#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and -#'observations; if not provided, the hindcast will be downscaled instead. The default value -#'is NULL. +#'expected to be already subset for the desired region. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -208,11 +187,10 @@ CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_meth #'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, #'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') #'@export -Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, - bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", member_dim = "member", source_file_exp = NULL, - source_file_obs = NULL, region = NULL, ncores = NULL, ...) { +Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file_exp = NULL, source_file_obs = NULL, + region = NULL, ncores = NULL, ...) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -260,27 +238,6 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo "'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used.") } - if (!is.null(exp_cor)) { - if (is.na(match(lon_dim, names(dim(exp_cor))))) { - stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", - "'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp_cor))))) { - stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", - "'lat_dim'") - } - - if (is.na(match(sdate_dim, names(dim(exp_cor))))) { - stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", - "'sdate_dim'") - } - - if (is.na(match(member_dim, names(dim(exp_cor))))) { - stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") - } - } - # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -302,9 +259,9 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo } if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the ", - "four borders of the downscaling region are defined by the first and last ", - "elements of the parametres 'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } ## ncores @@ -315,34 +272,18 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, region = region, ncores = ncores) - - exp_cor_interpolated <- NULL - - if (!is.null(exp_cor)) { - exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, - region = region, ncores = ncores) - - } # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { @@ -365,17 +306,10 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, - exp_cor = exp_cor_interpolated$data, - na.rm = TRUE, memb_dim = member_dim, sdate_dim = sdate_dim, - ncores = ncores, ...) + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, + memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) } else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { - # Dynamical bias correction is not yet prepared to handle hindcast-forecast data - # It will return only the hindcast downscaled - if (!is.null(exp_cor)) { - warning("For the dynamical bias correction only the hindcast downscaled will be returned.") - } # the temporal dimension must be only one dimension called "time" if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), @@ -394,10 +328,8 @@ Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo if (dim(obs_ref)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with only one observation. Returning NA.') } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, - exp_cor = exp_cor_interpolated$data, - memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, - cal.method = bc_method) + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, + sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) } # Return a list of three elements diff --git a/modules/Downscaling/tmp/Interpolation.R b/modules/Downscaling/tmp/Interpolation.R index 51de9ec0748f9e1b2c575e6ecd3664c14cf03d48..5d5f70b8d074f50c4ef391c78bb32f51c75ed191 100644 --- a/modules/Downscaling/tmp/Interpolation.R +++ b/modules/Downscaling/tmp/Interpolation.R @@ -48,7 +48,7 @@ #'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) #'lons <- 1:5 #'lats <- 1:4 -#'exp <- s2dv_cube(data = exp, coords = list(lat = lats, lon = lons)) +#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) #'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') #'@export CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, @@ -68,8 +68,8 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") } - res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$attrs[[lon_dim]], - source_file = exp$attrs$source_files[1], points = points, + res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$coords[[lon_dim]], + source_file = exp$coords$source_files[1], points = points, method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, lon_dim = lon_dim, region = region, method_point_interp = method_point_interp, ncores = ncores) @@ -334,10 +334,10 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me if (griddes$yinc < 0) { system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) griddes <- .get_griddes(nc_cropped2) - system(paste0('rm ', nc_cropped2)) } # remove temporary files system(paste0('rm ', nc_cropped1)) + system(paste0('rm ', nc_cropped2)) if (is.null(griddes)) { stop("'griddes' not found in the NetCDF source files") @@ -679,9 +679,7 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # sdate, time, ensemble, num_procs, etc) #====================== .get_model_data <- function(weights.df, mdata, ncores = NULL) { - - require(plyr) - + #----------------- # Get data for all combinations of i and j. # (inefficient, getting many unneded pairs). diff --git a/modules/Downscaling/tmp/Intlr.R b/modules/Downscaling/tmp/Intlr.R index 6f7e1249a54d829455b30474c630dae62ed396b6..f108877b7d55a6016b437a17a8144d6c68b28198 100644 --- a/modules/Downscaling/tmp/Intlr.R +++ b/modules/Downscaling/tmp/Intlr.R @@ -22,24 +22,16 @@ #'@param obs an 's2dv object' containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. -#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal -#'forecast experiment data. If the forecast is provided, it will be downscaled using the -#'hindcast and observations; if not provided, the hindcast will be downscaled instead. The -#'default value is NULL. -#'@param lr_method a character vector indicating the linear regression method to be applied. -#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a -#'linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is applied to the -#'interpolated model data to correct the interpolated values. The 'large-scale' method -#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as -#'predictors and high-resolution observations as predictands. This equation is then applied -#'to the interpolated model values. Finally, the '9nn' method uses a linear regression -#'with the nine nearest neighbours as predictors and high-resolution observations as -#'predictands. Instead of constructing a regression model using all nine predictors, -#'principal component analysis is applied to the data of neighboring grids to reduce the -#'dimension of the predictors. The linear regression model is then built using the principal -#'components that explain 95% of the variance. The '9nn' method does not require a -#'pre-interpolation process. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes @@ -90,87 +82,44 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) -#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) #'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, points = NULL, - int_method = NULL, method_point_interp = NULL, predictors = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", large_scale_predictor_dimname = 'vars', - loocv = TRUE, region = NULL, ncores = NULL) { +CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, + method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", time_dim = "time", member_dim = "member", + large_scale_predictor_dimname = 'vars', loocv = TRUE, region = NULL, ncores = NULL) { - if (!inherits(exp, 's2dv_cube')) { + if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } - if (!inherits(obs, 's2dv_cube')) { + if (!inherits(obs,'s2dv_cube')) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } - - exp_cor_aux <- NULL - - if (!is.null(exp_cor)) { - if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) { - if (!inherits(exp_cor, 's2dv_cube')) { - stop("Parameter 'exp_cor' must be of the class 's2dv_cube'") - } - exp_cor_aux <- exp_cor$data - # when large-scale is selected, the forecast object does not have to be an s2dv_cube object - } else if (identical(lr_method, 'large-scale')) { - if (!inherits(exp_cor, 's2dv_cube')) { - exp_cor_aux <- exp_cor - } else { - exp_cor_aux <- exp_cor$data - } - } - } - res <- Intlr(exp = exp$data, obs = obs$data, exp_cor = exp_cor_aux, - exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], - points = points, source_file_exp = exp$attrs$source_files[1], - source_file_obs = obs$attrs$source_files[1], + + res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], points = points, + source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], target_grid = target_grid, lr_method = lr_method, int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, - member_dim = member_dim, - large_scale_predictor_dimname = large_scale_predictor_dimname, + member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, region = region, ncores = ncores) - + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - - if (is.null(exp_cor)) { - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) - } else { - if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) { - exp_cor$data <- res$data - exp_cor$dims <- dim(exp_cor$data) - exp_cor$coords[[lon_dim]] <- res$lon - exp_cor$coords[[lat_dim]] <- res$lat - # when large-scale is selected, the forecast object does not have to be an s2dv_cube object - } else if (identical(lr_method, 'large-scale')) { - if (!inherits(exp_cor, 's2dv_cube')) { - exp_cor <- suppressWarnings(s2dv_cube(res$data, lat = res$lat, lon = res$lon)) - } else { - exp_cor$data <- res$data - exp_cor$dims <- dim(exp_cor$data) - exp_cor$coords[[lon_dim]] <- res$lon - exp_cor$coords[[lat_dim]] <- res$lat - } - } - - res_s2dv <- list(exp = exp_cor, obs = obs) - } - + + res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) } @@ -198,10 +147,6 @@ CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, p #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. -#'@param exp_cor an optional array with named dimensions containing the seasonal forecast -#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and -#'observations; if not provided, the hindcast will be downscaled instead. The default value -#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -210,20 +155,16 @@ CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, p #'range from -90 to 90. #'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes #'can range from -180 to 180 or from 0 to 360. -#'@param lr_method a character vector indicating the linear regression method to be applied. -#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a -#'linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is applied to the -#'interpolated model data to correct the interpolated values. The 'large-scale' method -#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as -#'predictors and high-resolution observations as predictands. This equation is then applied -#'to the interpolated model values. Finally, the '9nn' method uses a linear regression -#'with the nine nearest neighbours as predictors and high-resolution observations as -#'predictands. Instead of constructing a regression model using all nine predictors, -#'principal component analysis is applied to the data of neighboring grids to reduce the -#'dimension of the predictors. The linear regression model is then built using the principal -#'components that explain 95% of the variance. The '9nn' method does not require a -#'pre-interpolation process. +#'@param lr_method a character vector indicating the linear regression method to be applied +#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' +#'method fits a linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is to the interpolated +#'model data to correct the interpolated values. The 'large-scale' method fits a linear +#'regression with large-scale predictors from the same model (e.g. teleconnection indices) +#'as predictors and high-resolution observations as predictands. This equation is then +#'applied to the interpolated model values. Finally, the '4nn' method uses a linear +#'regression with the four nearest neighbours as predictors and high-resolution observations +#'as predictands. It is then applied to model data to correct the interpolated values. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes @@ -281,12 +222,11 @@ CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, p #'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, #'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, - target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, - source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', - loocv = TRUE, ncores = NULL) { +Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, + predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = TRUE, + ncores = NULL) { #----------------------------------- # Checkings @@ -334,43 +274,6 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo "'sdate_dim'") } - if (!is.null(exp_cor)) { - - if (is.na(match(sdate_dim, names(dim(exp_cor))))) { - stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", - "'sdate_dim'") - } - - if (is.na(match(member_dim, names(dim(exp_cor))))) { - stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") - } - - if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast - loocv <- FALSE - warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", - "to train the model with the whole hindcast and predict with the forecast.") - } - - if (is.null(predictors)) { - - if (is.na(match(lon_dim, names(dim(exp_cor))))) { - stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", - "'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp_cor))))) { - stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", - "'lat_dim'") - } - } - } - - if (lr_method == '9nn') { - warning("9nn method skips the interpolation step since the method itself inherently downscale ", - "the coarse resolution data to the reference data resolution without the need ", - "for interpolation.") - } - # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -392,6 +295,10 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo "through the parameter 'method_point_interp'.") } + # sdate must be the time dimension in the input data + stopifnot(sdate_dim %in% names(dim(exp))) + stopifnot(sdate_dim %in% names(dim(obs))) + # the code is not yet prepared to handle members in the observations restore_ens <- FALSE if (member_dim %in% names(dim(obs))) { @@ -414,17 +321,6 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo stopifnot(sdate_dim %in% names(dim(predictors))) stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) } - # forecasts for the large scale predictors - if (!is.null(exp_cor)) { - if (is.na(match(large_scale_predictor_dimname, names(dim(exp_cor))))) { - stop("Missing large scale predictor dimension in 'exp_cor', or does not match ", - "the parameter 'large_scale_predictor_dimname'") - } - if (!identical(dim(exp_cor)[names(dim(exp_cor)) == large_scale_predictor_dimname], - dim(predictors)[names(dim(predictors)) == large_scale_predictor_dimname])) { - stop("Large scale predictor dimension in exp_cor and predictors must be identical.") - } - } } ## ncores if (!is.null(ncores)) { @@ -437,7 +333,7 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo #----------------------------------- # Interpolation #----------------------------------- - if (lr_method != '9nn') { + if (lr_method != '4nn') { if (is.null(int_method)) { stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", @@ -451,32 +347,19 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, points = points, - method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = int_method, - region = region, ncores = ncores) - - if (!is.null(exp_cor) & is.null(predictors)) { - exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, points = points, - method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = int_method, - region = region, ncores = ncores) - } + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region, ncores = ncores) # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if ((!suppressWarnings(.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons))) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, - target_grid = target_grid, points = points, - method_point_interp = method_point_interp, - source_file = source_file_obs, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = int_method, - region = region, ncores = ncores) + if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, + method_remap = int_method, region = region, ncores = ncores) lats <- obs_interpolated$lat lons <- obs_interpolated$lon @@ -500,17 +383,6 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo target_dims_predictor <- sdate_dim target_dims_predictand <- sdate_dim - - if (!is.null(exp_cor)) { - aux_dim <- NULL - forecast <- exp_cor_interpolated$data - target_dims_predictor <- c(sdate_dim, member_dim) - target_dims_forecast <- c(sdate_dim, member_dim) - } else { - forecast <- NULL - target_dims_forecast <- NULL - } - } # (Multi) linear regression with large-scale predictors @@ -526,37 +398,16 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) target_dims_predictand <- sdate_dim - - if (!is.null(exp_cor)) { - aux_dim <- large_scale_predictor_dimname - if (!inherits(exp_cor, 's2dv_cube')) { - forecast <- exp_cor - } else { - forecast <- exp_cor$data - } - target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname, member_dim) - target_dims_forecast <- c(sdate_dim, large_scale_predictor_dimname, member_dim) - } } # Multi-linear regression with the four nearest neighbours # Predictors: model data # Predictand: observations - else if (lr_method == '9nn') { + else if (lr_method == '4nn') { + + predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, + lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4, ncores = ncores) - predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, - lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, - lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - nn = 9, ncores = ncores) - - if (!is.null(exp_cor)) { - aux_dim <- 'nn' - forecast <- .find_nn(coar = exp_cor, lats_hres = obs_lats, lons_hres = obs_lons, - lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, - lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - nn = 9, ncores = ncores) - } - if (is.null(points) | ("location" %in% names(dim(obs)))) { if (!is.null(target_grid)) { warning("Interpolating to the 'obs' grid") @@ -565,86 +416,44 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo lats <- obs_lats lons <- obs_lons - } else { - # If the downscaling is to point locations: Once the 9 nearest neighbours have been found, + } + # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, # interpolate to point locations - - predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, - lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, + else { + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, - region = region, ncores = ncores) + source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) - predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, - lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, - region = region, ncores = ncores) - - if (!is.null(exp_cor)) { - forecast <- Interpolation(exp = forecast, lats = obs_lats, lons = obs_lons, - lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, - points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, - region = region, ncores = ncores) - forecast <- forecast$data - } - + source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) + lats <- predictor$lat lons <- predictor$lon predictor <- predictor$data predictand <- predictand$data } - - target_dims_predictand <- sdate_dim - - if (!is.null(exp_cor)) { - target_dims_predictor <- c(sdate_dim,'nn', member_dim) - target_dims_forecast <- c(sdate_dim,'nn', member_dim) - } else { - target_dims_predictor <- c(sdate_dim,'nn') - } - } else { - - stop(paste0(lr_method, " method is not implemented yet")) + + target_dims_predictor <- c(sdate_dim,'nn') + target_dims_predictand <- sdate_dim } - - k_out <- 1 ## ## It is used in constructing window for loocv = TRUE case. k_out is also used to order the dimensions of pred.lm output for both loocv = TRUE and loocv = FALSE cases. In case the data is NOT daily, k_out is 1. - daily <- FALSE - if ( time_dim %in% names(dim(predictor)) ) { - daily <- TRUE - k_out <- as.numeric (dim(predictor)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out - predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - predictand <- MergeDims (predictand, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - output_dims <- c(time_dim, sdate_dim) # for hindcast dwn - if (!is.null(exp_cor)) { - sdate_num_fr <- as.numeric (dim(forecast)[sdate_dim]) ## sdate_num of forecast - forecast <- MergeDims (forecast, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - } + + else { + stop(paste0(lr_method, " method is not implemented yet")) } + print(paste0('dim predictor',dim(predictor))) + print(paste0('dim predictand',dim(predictand))) + print(dim(list(predictor[1]))) # Apply the linear regressions - ## case hindcast - forecast - if (!is.null(exp_cor)) { - res <- Apply(list(predictor, predictand, forecast), - target_dims = list(target_dims_predictor, target_dims_predictand, - target_dims_forecast), - fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores, - sdate_dim = sdate_dim, k_out = k_out)$output1 - - if (daily) { - res <- SplitDim(data = res, split_dim = sdate_dim, new_dim_name = time_dim, - indices = rep (1:k_out, sdate_num_fr)) - } - } - ## case hindcast only - else { - res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, - target_dims_predictand), - fun = .intlr, loocv = loocv, ncores = ncores, sdate_dim = sdate_dim, - k_out = k_out, - output_dims = output_dims)$output1 - } + + + + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + + names(dim(res))[1] <- sdate_dim + # names(dim(res))[which(names(dim(res)) == '')] # restore ensemble dimension in observations if it existed originally if (restore_ens) { @@ -654,97 +463,48 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo # Return a list of three elements res <- list(data = res, obs = predictand, lon = lons, lat = lats) - # for testing 9nn - #x <- predictor[10,10,1,1,1,,,] - #x <- Reorder(x,c(2,3,1)) - #y <- predictand[1,1,1,10,,10] - #f <- forecast[10,10,1,1,1,,,] - #f <- Reorder(f,c(2,1)) - #f <- InsertDim(f,1,1,'sdate') - - # large-scale - #x <- Reorder(predictor,c(1,3,2)) - #y <- predictand[1,1,1,10,,10] - #f <- Reorder(forecast,c(1,3,2)) - return(res) } + #----------------------------------- # Atomic function to generate and apply the linear regressions #----------------------------------- -.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL, sdate_dim = "sdate", k_out = 1) { - - require (plyr) - - if (!is.null(f)) { - if (!is.null(aux_dim)) { - tmp_df <- data.frame(x = adply(x,.margins = 3, .id = NULL), y = y) - } else { - tmp_df <- data.frame(x = as.vector(x), y = y) - } - } else { - tmp_df <- data.frame(x = x, y = y) - } - +.intlr <- function(x, y, loocv) { + + tmp_df <- data.frame(x = x, y = y) # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { - if (is.null(f)) { - res <- array(rep(NA, nrow(tmp_df)), c(k_out, nrow(tmp_df)/k_out)) - } else { - if (!is.null(aux_dim)) { - res <- array(NA, dim(f)[names(dim(f)) != aux_dim]) - } else { - res <- array(NA, dim(f)) - } - } - } else { - ## apply principle components if method is 9nn - ## TODO 9NN MIGHT GIVE ERROR FOR FORECAST CASE. REVIEW AFTER DEVELOPING FORECAST STRATEGY!! - if (any(names(dim(x)) == "nn")) { - PR <- prcomp(~ x.1 + x.2 + x.3 + x.4 + x.5 + x.6 + x.7 + x.8 + x.9, - data = tmp_df[, -ncol(tmp_df)], - na.action = na.omit, scale = FALSE) ## prcomp between predictors - ## 95% of variablity should be explained by the principle components - N_predictors <- which (summary(PR)$importance[ "Cumulative Proportion",] > .95)[1] - PR_mat <- PR$x[, 1:N_predictors, drop = FALSE] - if (!is.null(PR$na.action)) { ## if there are NAs, relocate them to the correct order - dum <- array(NA, dim = c(nrow(tmp_df), ncol(PR_mat) )) - dum [as.numeric(rownames(PR_mat)), ] <- PR_mat - PR_mat <- dum - } - tmp_df <- data.frame(x = PR_mat, y = y) - colnames(tmp_df) <- c(paste0("x.",1:(ncol(tmp_df)-1)), "y") - } - + n <- nrow(tmp_df) + res <- rep(NA, n) + + } else { # training - lm1 <- .train_lm(df = tmp_df, loocv = loocv, k_out = k_out) - + lm1 <- .train_lm(df = tmp_df, loocv = loocv) + # prediction - res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim, k_out = k_out) + res <- .pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) } - - return(res) + + return(res) } #----------------------------------- # Function to generate the linear regressions. # Returns a list #----------------------------------- -.train_lm <- function(df, loocv, k_out = 1) { +.train_lm <- function(df, loocv) { # Remove predictor columns containing only NA's - df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, - function(x) !all(is.na(x)))] + df <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2, function(x) !all(is.na(x)))] if (loocv) { - - lm1 <- lapply(1:(nrow(df)/k_out), function(j) { - window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # omit the data of the year including corresponding day - if (all(is.na(df[-window ,]$y))) { + + lm1 <- lapply(1:nrow(df), function(j) { + if (all(is.na(df[-j,]$y))) { return(NA) } else { - return(lm(df[-window,], formula = y ~ .)) + return(lm(df[-j,], formula = y ~ .)) }}) } else { @@ -757,110 +517,43 @@ Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lo #----------------------------------- # Function to apply the linear regressions. #----------------------------------- -.pred_lm <- function(df, lm1, f, loocv, aux_dim, k_out = 1) { - - require (plyr) - +.pred_lm <- function(df, lm1, loocv) { if (loocv) { - pred_vals <- sapply(1:length(lm1), function(j) { + pred_vals <- sapply(1:nrow(df), function(j) { if (all(is.na(lm1[[j]]))) { - return(array(rep(NA, k_out), c(k_out, nrow(df) / k_out))) + return(NA) } else { - window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of the corresponding year - return(predict(lm1[[j]], df[window,])) + return(predict(lm1[[j]], df[j,])) }}) - pred_vals <- array (pred_vals, c(k_out, nrow(df) / k_out)) } else { - if (!is.na(lm1)) { - # case to downscale hindcasts - if (is.null(f)) { - pred_vals_ls <- lapply(lm1, predict, newdata = df) - pred_vals <- array (unlist(pred_vals_ls), c(k_out, nrow(df) / k_out)) - # case to downscale forecasts - } else { - if (!is.null(aux_dim)) { - # 9nn & large-scale - fcst_df <- adply(f,.margins = 3, .id = NULL) - colnames(fcst_df) <- paste0('x.',1:ncol(fcst_df)) - pred_vals_ls <- lapply(lm1, predict, newdata = fcst_df) - pred_vals <- unlist(pred_vals_ls) - dim(pred_vals) <- dim(f)[names(dim(f)) != aux_dim] - } else { - # basic - pred_vals_ls <- lapply(lm1, predict, newdata = data.frame(x = as.vector(f))) - pred_vals <- unlist(pred_vals_ls) - dim(pred_vals) <- dim(f) - } - } + if (!is.na(lm1)) { + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) } else { - if (is.null(f)) { - pred_vals <- array(rep(NA, k_out), c(k_out, nrow(df) / k_out)) - } else { - if (!is.null(aux_dim)) { - pred_vals <- array(NA, dim(f)[names(dim(f)) != aux_dim]) - } else { - pred_vals <- array(NA, dim(f)) - } - } + pred_vals <- rep(NA, nrow(df)) } - } - + } return(pred_vals) } #----------------------------------- -# Function to ind N nearest neighbours. +# Function to find N nearest neighbours. # 'coar' is an array with named dimensions #----------------------------------- -.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, - lat_dim, lon_dim, sdate_dim, member_dim, nn = 9, ncores = NULL) { - - # order lats and lons of the high res and coarse res data in 2-dimension in order to calculate the distances to each other. - hres_mat <- expand.grid(as.numeric(lats_hres), as.numeric(lons_hres)) - coar_mat <- expand.grid(as.numeric(lats_coar), as.numeric(lons_coar)) - - # calculate distances and obtain the closest 4 coarse res grid for each high res grid - dist_id <- apply (proxy::dist(hres_mat, coar_mat), 1, order)[1:nn,] - - names(dim(dist_id)) <- c("nn", lon_dim) - - idh <- SplitDim(dist_id, split_dim = lon_dim, - indices = rep(1:(length(lats_hres)),length(lons_hres)), - new_dim_name = lat_dim) - - idh <- aperm (idh, c(1,3,2)) # dimension: nn, lat_dim, lon_dim - - #coar - idc_lat_grid <- match( coar_mat[,1], as.numeric(lats_coar)) - idc_lon_grid <- match( coar_mat[,2], as.numeric(lons_coar)) - cgrid <- cbind(idc_lat_grid,idc_lon_grid) # For the coarse res data, include the lat and lon order each grids ordered in 2-dimension. - - idh_dum <- array(NA, dim = c(nn, 2, length(lats_hres), length(lons_hres))) - - for (i in 1:length(lats_hres)) { - for (j in 1:length(lons_hres)) { - idh_dum[,,i,j] <- cgrid[idh[,i,j],] - } - } - - idh <- idh_dum - rm(idh_dum) - names(dim(idh)) <- c("nn", "grd", lat_dim, lon_dim) - - # idh includes the lat and lon order of the nearest neighbours in the coarse res data specified for each lat lon of the high res data. - - nearest <- Apply(list(coar, idh), - target_dims = list(c(sdate_dim, lat_dim, lon_dim, member_dim), - "grd" ), - fun = function(x, y) { - a <- x[, y[1], y[2], , drop = FALSE] - a <- Subset (a, along = c(lat_dim, lon_dim), - indices = list (1,1), - drop = "selected") # drop lat lon dim coming from coar data - return(a) - }, - ncores = ncores)$output1 - +.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4, ncores = NULL) { + + # Sort the distances from closest to furthest + idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) + idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) + + names(dim(idx_lat)) <- c('nn', lat_dim) + names(dim(idx_lon)) <- c('nn', lon_dim) + + # obtain the values of the nearest neighbours + nearest <- Apply(list(coar, idx_lat, idx_lon), + target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), + fun = function(x, y, z) x[y, z], ncores = ncores)$output1 + return(nearest) } diff --git a/modules/Downscaling/tmp/LogisticReg.R b/modules/Downscaling/tmp/LogisticReg.R index d42c39e88fa79403c9370b1f873ac463296c72b0..67fa1b28c9d8bf508d3f2b0d39c6a4940ade29c1 100644 --- a/modules/Downscaling/tmp/LogisticReg.R +++ b/modules/Downscaling/tmp/LogisticReg.R @@ -21,10 +21,6 @@ #'@param obs an 's2dv object' with named dimensions containing the observational field. #'The object must have, at least, the dimensions latitude, longitude and start date. The #'object is expected to be already subset for the desired region. -#'@param exp_cor an optional array with named dimensions containing the seasonal forecast -#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and -#'observations; if not provided, the hindcast will be downscaled instead. The default value -#'is NULL. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param int_method a character vector indicating the regridding method to be passed to CDORemap. @@ -56,9 +52,7 @@ #'@param sdate_dim a character vector indicating the start date dimension name in the element #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param time_dim a character vector indicating the time dimension name in the element -#''data' in exp and obs. Default set to "time". +#''data' in exp and obs. Default set to "member". #'@param region a numeric vector indicating the borders of the region defined in obs. #'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers #'to the left border, while lonmax refers to the right border. latmin indicates the lower @@ -87,17 +81,16 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) -#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) +#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) +#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) #'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', #'probs_cat = c(1/3, 2/3)) #'@export -CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = NULL, - log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), - return_most_likely_cat = FALSE, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", time_dim = "time", - region = NULL, loocv = TRUE, ncores = NULL) { + +CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", + probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + member_dim = "member", region = NULL, loocv = TRUE, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -107,40 +100,28 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- LogisticReg(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, - exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], - target_grid = target_grid, probs_cat = probs_cat, - return_most_likely_cat = return_most_likely_cat, + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], + exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], + obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, + probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, int_method = int_method, log_reg_method = log_reg_method, points = points, method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - time_dim = time_dim, source_file_exp = exp$attrs$source_files[1], - source_file_obs = obs$attrs$source_files[1], + source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], region = region, loocv = loocv, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - if (is.null(exp_cor)) { - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) - } else { - exp_cor$data <- res$data - exp_cor$dims <- dim(exp_cor$data) - exp_cor$coords[[lon_dim]] <- res$lon - exp_cor$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp_cor, obs = obs) - } - + res_s2dv <- list(exp = exp, obs = obs) return(res_s2dv) } @@ -167,10 +148,6 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. -#'@param exp_cor an optional array with named dimensions containing the seasonal forecast -#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and -#'observations; if not provided, the hindcast will be downscaled instead. The default value -#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -210,9 +187,7 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = #'@param sdate_dim a character vector indicating the start date dimension name in the element #''data' in exp and obs. Default set to "sdate". #'@param member_dim a character vector indicating the member dimension name in the element -#''data' in exp and obs. Default set to "member". -#'@param time_dim a character vector indicating the time dimension name in the element -#''data' in exp and obs. Default set to "time". +#''data' in exp and obs. Default set to "member". #'@param source_file_exp a character vector with a path to an example file of the exp data. #'Only needed if the downscaling is to a point location. #'@param source_file_obs a character vector with a path to an example file of the obs data. @@ -249,13 +224,11 @@ CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = #'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', #'probs_cat = c(1/3, 2/3)) #'@export -LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, - target_grid, int_method = NULL, log_reg_method = "ens_mean", - probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", member_dim = "member", time_dim = "time", - source_file_exp = NULL, source_file_obs = NULL, region = NULL, - loocv = TRUE, ncores = NULL) { +LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, + int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", + source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = TRUE, ncores = NULL) { #----------------------------------- # Checkings @@ -320,34 +293,6 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, "'member_dim'") } - if (!is.null(exp_cor)) { - - if (is.na(match(sdate_dim, names(dim(exp_cor))))) { - stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", - "'sdate_dim'") - } - - if (is.na(match(member_dim, names(dim(exp_cor))))) { - stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") - } - - if (is.na(match(lon_dim, names(dim(exp_cor))))) { - stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", - "'lon_dim'") - } - - if (is.na(match(lat_dim, names(dim(exp_cor))))) { - stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", - "'lat_dim'") - } - - if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast - loocv <- FALSE - warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", - "to train the model with the whole hindcast and predict with the forecast.") - } - } - # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -369,9 +314,9 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming ", - "the four borders of the downscaling region are defined by the first and ", - "last elements of the parametres 'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming the four borders ", + "of the downscaling region are defined by the first and last elements of the parametres ", + "'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } ## ncores @@ -394,44 +339,24 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, region = region, ncores = ncores) - if (!is.null(exp_cor)) { - exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, - target_grid = target_grid, points = points, - method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, - lon_dim = lon_dim, method_remap = int_method, - region = region, ncores = ncores) - } - # compute ensemble mean anomalies if (log_reg_method == "ens_mean") { - predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores) + predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, + ncores = ncores) target_dims_predictor <- sdate_dim - if (!is.null(exp_cor)) { - clim_hcst <- Apply(exp_interpolated$data, target_dims = c(member_dim, sdate_dim), - mean, ncores = ncores, - na.rm = TRUE)$output1 ## climatology of hindcast ens mean - ens_mean_fcst <- Apply(exp_cor_interpolated$data, target_dims = member_dim, - mean, na.rm = TRUE, ncores = ncores)$output1 ## ens mean of the forecast - forecast <- Ano(ens_mean_fcst, clim_hcst, ncores = ncores) - target_dims_forecast <- sdate_dim - } } else if (log_reg_method == "ens_mean_sd") { require(abind) ens_mean_anom <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores) + sdate_dim = sdate_dim, ncores = ncores) ens_sd <- .get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) #merge two arrays into one array of predictors @@ -439,30 +364,8 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) target_dims_predictor <- c(sdate_dim, "pred") - - if (!is.null(exp_cor)) { - clim_hcst <- Apply(exp_interpolated$data, target_dims = c(member_dim, sdate_dim), - mean, ncores = ncores, - na.rm = TRUE)$output1 ## climatology of hindcast ens mean - ens_mean_fcst <- Apply(exp_cor_interpolated$data, target_dims = member_dim, - mean, na.rm = TRUE, ncores = ncores)$output1 ## ens mean of the forecast - forecast_mean_anom <- Ano(ens_mean_fcst, clim_hcst, ncores = ncores) - forecast_sd <- .get_ens_sd(obj_ens = exp_cor_interpolated$data, - member_dim = member_dim, ncores = ncores) - forecast <- abind(forecast_mean_anom, forecast_sd, along = 1/2) - names(dim(forecast)) <- c("pred", names(dim(forecast_mean_anom))) - - target_dims_forecast <- c(sdate_dim, "pred") - } - - } else if (log_reg_method == "sorted_members") { - - if (!is.null(exp_cor)) { - stop('sorted_members method cannot be used to downscale forecasts since ', - 'the ensemble members are generally exchangeable') - } - predictor <- .sort_members(obj_ens = exp_interpolated$data, - member_dim = member_dim, ncores = ncores) + } else if (log_reg_method == "sorted_members") { + predictor <- .sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) target_dims_predictor <- c(sdate_dim, member_dim) } else { @@ -473,11 +376,9 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, # the same grid to force the matching if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, - target_grid = target_grid, method_remap = int_method, - points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, - method_point_interp = method_point_interp, + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, + method_remap = int_method, points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { @@ -486,78 +387,28 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, # convert observations to categorical predictands - k_out <- 1 ## in case loocv = TRUE and the data is NOT daily, leave one data out. - daily <- FALSE - if ( time_dim %in% names(dim(obs_ref)) & dim(obs_ref)[time_dim] > 1) { - daily <- TRUE - sdate_num <- as.numeric (dim(obs_ref)[sdate_dim]) - k_out <- as.numeric (dim(obs_ref)[time_dim]) ## in case loocv = TRUE and the data is daily, leave one-year data out - obs_ref <- MergeDims (obs_ref, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - if (!is.null(exp_cor)) { - sdate_num_fr <- as.numeric (dim(forecast)[sdate_dim]) ## sdate_num of forecast - forecast <- MergeDims (forecast, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim) - } - } - - obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { - if (!any(!is.na(x))) { - rep(NA,length(x)) - } else { - terc <- convert2prob(as.vector(x), prob = probs_cat) - as.integer(apply(terc, 1, function(r) which (r == 1)))}}, - output_dims = sdate_dim, ncores = ncores)$output1 - - target_dims_predictand <- sdate_dim - # Apply the logistic regressions - ## case hindcast only - if (is.null(exp_cor)) { - res <- Apply(list(predictor, obs_cat), - target_dims = list(target_dims_predictor, target_dims_predictand), - fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv, probs_cat = probs_cat, - sdate_dim = sdate_dim, k_out = k_out), - output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 - - if ( daily ) { - res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim, - indices = rep(1:k_out, sdate_num)) - obs_ref <- SplitDim(obs_ref, split_dim = sdate_dim, new_dim_name = time_dim, - indices = rep(1:k_out, sdate_num)) - } +obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + if (!any(!is.na(x))) { + rep(NA,length(x)) + } else { + terc <- convert2prob(as.vector(x), prob = probs_cat) + as.integer(apply(terc, 1, function(r) which (r == 1)))}}, + output_dims = sdate_dim, ncores = ncores)$output1 - if (return_most_likely_cat) { - res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, - output_dims = sdate_dim, ncores = ncores)$output1 - } - } - ## case hindcast - forecast - else { - if ( time_dim %in% names(dim(predictor)) ) { - if (dim(predictor)[time_dim] > 1) { - target_dims_predictor <- c(target_dims_predictor, time_dim) - target_dims_predictand <- c(target_dims_predictand, time_dim) - target_dims_forecast <- c(target_dims_forecast, time_dim) - } - } - res <- Apply(list(predictor, obs_cat, forecast), - target_dims = list(target_dims_predictor, target_dims_predictand, - target_dims_forecast), - fun = function(x, y, f) .log_reg(x = x, y = y, f = f, loocv = loocv, - probs_cat = probs_cat, sdate_dim = sdate_dim, - k_out = k_out), - output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 - - if ( daily ) { - res <- SplitDim(res, split_dim = sdate_dim, new_dim_name = time_dim, - indices = rep(1:k_out, sdate_num_fr)) - obs_ref <- SplitDim(obs_ref, split_dim = sdate_dim, new_dim_name = time_dim, - indices = rep(1:k_out, sdate_num)) - } + + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), + fun = function(x, y) + .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), + output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + + if (return_most_likely_cat) { + res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, + output_dims = sdate_dim, ncores = ncores)$output1 } # restore ensemble dimension in observations if it existed originally if (restore_ens) { - obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim, ncores = ncores) } res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) @@ -578,8 +429,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, .sort_members <- function(obj_ens, member_dim, ncores = NULL) { - sorted <- Apply(obj_ens, target_dims = member_dim, - sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 + sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 return(sorted) } @@ -597,8 +447,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, require(s2dv) # compute climatology - clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), - mean, ncores = ncores, na.rm = TRUE)$output1 + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores, na.rm = TRUE)$output1 # compute ensemble mean ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE, ncores = ncores)$output1 @@ -610,39 +459,23 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, } # atomic functions for logistic regressions -.log_reg <- function(x, y, f = NULL, loocv, probs_cat, sdate_dim, k_out = 1) { - +.log_reg <- function(x, y, loocv,probs_cat) { + tmp_df <- data.frame(x = x, y = y) # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1) | all(is.na(tmp_df$y))) { - if (is.null(f)) { - n1 <- nrow(tmp_df) - } else { - if (is.null(dim(f))) { - n1 <- length(f) - } else { - n1 <- dim(f)[1] - } - } - n2 <- length(probs_cat) + 1 + n1 <- nrow(tmp_df) + n2<- length(probs_cat)+1 res <- matrix(NA, nrow = n1, ncol = n2) } else { # training - lm1 <- .train_lr(df = tmp_df, loocv = loocv, k_out) + lm1 <- .train_lr(df = tmp_df, loocv = loocv) # prediction - res <- .pred_lr(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, - probs_cat = probs_cat, k_out = k_out) - - if ( !(is.null(f)) ) { ## if forecast is provided, and syear of forecast is 1. - if ( nrow(f) == 1 ) { - res <- array(res, dim = c(1, length(probs_cat) + 1)) - } - } - + res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv, probs_cat=probs_cat) } return(res) } @@ -650,7 +483,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, #----------------------------------- # Function to train the logistic regressions. #----------------------------------- -.train_lr <- function(df, loocv, k_out = 1) { +.train_lr <- function(df, loocv) { require(nnet) @@ -659,9 +492,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, if (loocv) { - lm1 <- lapply(1:(nrow(df)/k_out), function(j) { - window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # omit the data of the year including corresponding day - multinom(y ~ ., data = df[ -window, ], trace = FALSE)}) + lm1 <- lapply(1:nrow(df), function(j) multinom(y ~ ., data = df[ -j, ], trace = FALSE)) } else { @@ -675,7 +506,7 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -.pred_lr <- function(df, lm1, f, loocv, probs_cat, k_out = 1) { +pred_lr <- function(df, lm1, loocv, probs_cat) { require(plyr) @@ -684,25 +515,23 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, # The error: "Error: Results must have the same dimensions." can # appear when the number of sdates is insufficient - pred_vals_ls <- list() - for (j in 1:length(lm1)) { - window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of the corresponding year - if (all(is.na(df[window, ]))) { - pred_vals_ls[[j]] <- array(rep(NA, length(window) * (length(probs_cat) + 1)), - dim = c(length(window), length(probs_cat) + 1)) + pred_vals_ls <-list() + for (j in 1:nrow(df)) { + if (all(is.na(df[j,]))) { + pred_vals_ls[[j]] <- rep(NA, length(probs_cat) + 1) } else { - pred_vals_ls[[j]] <- predict(lm1[[j]], df[window,], type = "probs") + pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") } } - pred_vals <- do.call(rbind, pred_vals_ls) + pred_vals <- laply(pred_vals_ls, .fun = as.array) - if( length(probs_cat) + 1 == 2) { - pred_vals_dum<-array(NA, dim=c(nrow(df), 2)) - pred_vals_dum[, 2]<-pred_vals - pred_vals_dum[, 1] <- 1-pred_vals - pred_vals <- pred_vals_dum - colnames(pred_vals) <- c(1, 2) + if( length(probs_cat)+1==2) { + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) } } else { @@ -710,35 +539,16 @@ LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, # type = class, probs #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") #pred_vals <- unlist(pred_vals_ls) - if (is.null(f)) { - pred_vals <- predict(lm1[[1]], df, type = "probs") + pred_vals <- predict(lm1[[1]], df, type = "probs") - if( length(probs_cat) + 1 == 2) { - pred_vals_dum <- array(NA, dim = c(nrow(df), 2)) - pred_vals_dum[, 2] <- pred_vals - pred_vals_dum[, 1] <- 1 - pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals) <- c(1,2) - } - - } else { - if (is.null(dim(f))) { - pred_vals <- predict(lm1[[1]], newdata = data.frame(x = as.vector(f)), type = "probs") - } else { - pred_vals <- predict(lm1[[1]], newdata = data.frame(x = f), type = "probs") - } - if (length(probs_cat) + 1 == 2) { - if (is.null(dim(f))) { - pred_vals_dum <- matrix(NA, nrow = length(f), ncol = 2) - } else { - pred_vals_dum <- matrix(NA, nrow = dim(f)[1], ncol = 2) - } - pred_vals_dum[, 2] <- pred_vals - pred_vals_dum[, 1] <- 1 - pred_vals - pred_vals <- pred_vals_dum - colnames(pred_vals) <- c(1,2) - } + if( length(probs_cat)+1==2) { + pred_vals_dum<-array(NA,dim=c(nrow(df),2)) + pred_vals_dum[,2]<-pred_vals + pred_vals_dum[,1]<-1-pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals)<-c(1,2) } + } return(pred_vals) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 9670c2f3ee5dc7d1493781e22cd3dbe084cbf183..42cb832c0fd5be6d2f64bc630308f5a7ec3ce705 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -370,7 +370,7 @@ check_recipe <- function(recipe) { DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") - LR_METHODS <- c("basic", "large-scale", "9nn") + LR_METHODS <- c("basic", "large-scale", "4nn") LOGREG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") # Check downscaling type if ("type" %in% names(downscal_params)) {