diff --git a/R/Analogs.R b/R/Analogs.R index 73b76ae56cd114536a07ec410e172b09e33b3067..b342a91a2291ddf06f0dd1ee614942777eb9b473 100644 --- a/R/Analogs.R +++ b/R/Analogs.R @@ -36,7 +36,8 @@ #'The object is expected to be already subset for the desired region. Data can be in one #'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct #'results in the latter case, the borders of the region should be specified in the parameter -#''region'. See parameter 'region'. +#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. +#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE. #'@param obs an 's2dv_cube' object with named dimensions containing the observational field #'for the variable targeted for downscaling. The object must have, at least, the dimensions #'latitude, longitude and start date. The object is expected to be already subset for the @@ -44,8 +45,7 @@ #'@param obsL an 's2dv_cube' object with named dimensions containing the observational #'field of the large-scale variable. The object must have, at least, the dimensions latitude, #'longitude and start date. The object is expected to be already subset for the desired region. -#'@param grid_exp a character vector with a path to an example file of the exp (if the -#'predictor is the local scale variable) or expL (if the predictor is a large scale variable) +#'@param grid_exp a character vector with a path to an example file of the exp #'data. It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the #'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. @@ -159,7 +159,7 @@ CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, #'the best analog, the function extracts the corresponding local-scale variable for that day #'from the observation of the local scale variable. The used local-scale and large-scale #'variables can be retrieved from independent regions. The input data for the first case must -#'include 'exp' and 'obs,' while in the second case, 'obs,' 'obsL,' and 'expL' are the +#'include 'exp' and 'obs,' while in the second case, 'exp', 'obs,' and 'obsL' are the #'required input fields. Users can perform the downscaling process over the subregions #'that can be identified through the 'region' and 'regionL' arguments, instead of focusing #'on the entire area of the loaded data. @@ -180,7 +180,8 @@ CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, #'The object is expected to be already subset for the desired region. Data can be in one #'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct #'results in the latter case, the borders of the region should be specified in the parameter -#''region'. See parameter 'region'. +#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. +#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE. #'@param obs an array with named dimensions containing the observational field for the variable #'targeted for downscaling. The object must have, at least, the dimensions latitude, longitude, #'start date and time. The object is expected to be already subset for the desired region. @@ -205,8 +206,7 @@ CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, #'range from -90 to 90. #'@param obsL_lons a numeric vector containing the longitude values in 'obsL'. Longitudes #'can range from -180 to 180 or from 0 to 360. -#'@param grid_exp a character vector with a path to an example file of the exp (if the -#'predictor is the local scale variable) or expL (if the predictor is a large scale variable) data. +#'@param grid_exp a character vector with a path to an example file of the exp data. #'It can be either a path to another NetCDF file which to read the target grid from #'(a single grid must be defined in such file) or a character vector indicating the #'coarse grid to be passed to CDO, and it must be a grid recognised by CDO. @@ -390,9 +390,14 @@ Analogs <- function(exp, obs, exp_lats = NULL, exp_lons = NULL, obs_lats, obs_lo # metric method to be used to specify the analogs stopifnot(metric %in% c("cor", "dcor", "dist")) - - + if ( (dim(exp)[sdate_dim] != dim(obs)[sdate_dim]) & loocv_window ) { + loocv_window <- FALSE + warning("The lengths of sdate_dim in the data provided through exp and obs are ", + "not the same. Thus, exp is considered as forecast data, and loocv_window ", + "is set to FALSE.") + } + if (!is.null(obsL)) { obs_train <- obsL obs_train_lats <- obsL_lats diff --git a/R/Intbc.R b/R/Intbc.R index 4b9527323aa58ecd67daf71870c5755e7b31492b..70043c1fe9cd1ac8e06fcceb3df5f81849c7e1eb 100644 --- a/R/Intbc.R +++ b/R/Intbc.R @@ -20,6 +20,10 @@ #'@param obs an 's2dv object' containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. +#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal +#'forecast experiment data. If the forecast is provided, it will be downscaled using the +#'hindcast and observations; if not provided, the hindcast will be downscaled instead. The +#'default value is NULL. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param bc_method a character vector indicating the bias adjustment method to be applied after @@ -62,13 +66,13 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) -#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) +#'res <- CST_Intbc(exp = exp, obs = obs, target_grid = 'r1280x640', bc_method = 'bias', int_method = 'conservative') #'@export -CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", +CST_Intbc <- function(exp, obs, exp_cor = NULL, target_grid, bc_method, int_method = NULL, + points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", region = NULL, ncores = NULL, ...) { if (!inherits(exp,'s2dv_cube')) { @@ -79,25 +83,38 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- Intbc(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, - int_method = int_method, bc_method = bc_method, points = points, - source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], + res <- Intbc(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, + exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], + target_grid = target_grid, int_method = int_method, + bc_method = bc_method, points = points, + source_file_exp = exp$attrs$source_files[1], + source_file_obs = obs$attrs$source_files[1], method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, region = region, ncores = ncores, ...) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - res_s2dv <- list(exp = exp, obs = obs) + if (is.null(exp_cor)) { + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + } else { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp_cor, obs = obs) + } + return(res_s2dv) } @@ -123,7 +140,11 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #''region'. #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is -#'expected to be already subset for the desired region. +#'expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -187,10 +208,11 @@ CST_Intbc <- function(exp, obs, target_grid, bc_method, int_method = NULL, point #'res <- Intbc(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, #'obs_lons = obs_lons, target_grid = 'r1280x640', bc_method = 'simple_bias', int_method = 'conservative') #'@export -Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, bc_method, int_method = NULL, - points = NULL, method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - time_dim = "time", member_dim = "member", source_file_exp = NULL, source_file_obs = NULL, - region = NULL, ncores = NULL, ...) { +Intbc <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, + bc_method, int_method = NULL, points = NULL, method_point_interp = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", + time_dim = "time", member_dim = "member", source_file_exp = NULL, + source_file_obs = NULL, region = NULL, ncores = NULL, ...) { if (!inherits(bc_method, 'character')) { stop("Parameter 'bc_method' must be of the class 'character'") @@ -238,6 +260,27 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, "'crps_min', 'rpc-based'. The abbreviations 'dbc','qm' can also be used.") } + if (!is.null(exp_cor)) { + if (is.na(match(lon_dim, names(dim(exp_cor))))) { + stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp_cor))))) { + stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", + "'lat_dim'") + } + + if (is.na(match(sdate_dim, names(dim(exp_cor))))) { + stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp_cor))))) { + stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") + } + } + # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -259,9 +302,9 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders ", - "of the downscaling region are defined by the first and last elements of the parametres ", - "'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming the ", + "four borders of the downscaling region are defined by the first and last ", + "elements of the parametres 'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } ## ncores @@ -272,18 +315,34 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) + + exp_cor_interpolated <- NULL + + if (!is.null(exp_cor)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, + region = region, ncores = ncores) + + } # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { @@ -306,10 +365,17 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (bc_method == 'qm' | bc_method == 'quantile_mapping') { - res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, - memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...) + res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, + exp_cor = exp_cor_interpolated$data, + na.rm = TRUE, memb_dim = member_dim, sdate_dim = sdate_dim, + ncores = ncores, ...) } else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') { + # Dynamical bias correction is not yet prepared to handle hindcast-forecast data + # It will return only the hindcast downscaled + if (!is.null(exp_cor)) { + warning("For the dynamical bias correction only the hindcast downscaled will be returned.") + } # the temporal dimension must be only one dimension called "time" if (all(c(time_dim, sdate_dim) %in% names(dim(exp_interpolated$data)))) { exp_interpolated$data <- Apply(exp_interpolated$data, target_dims = c(time_dim, sdate_dim), @@ -328,8 +394,10 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, if (dim(obs_ref)[sdate_dim] == 1) { warning('Simple Bias Correction should not be used with only one observation. Returning NA.') } - res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) + res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, + exp_cor = exp_cor_interpolated$data, + memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, + cal.method = bc_method) } # Return a list of three elements diff --git a/R/Interpolation.R b/R/Interpolation.R index 5d5f70b8d074f50c4ef391c78bb32f51c75ed191..51de9ec0748f9e1b2c575e6ecd3664c14cf03d48 100644 --- a/R/Interpolation.R +++ b/R/Interpolation.R @@ -48,7 +48,7 @@ #'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time = 1) #'lons <- 1:5 #'lats <- 1:4 -#'exp <- s2dv_cube(data = exp, lat = lats, lon = lons) +#'exp <- s2dv_cube(data = exp, coords = list(lat = lats, lon = lons)) #'res <- CST_Interpolation(exp = exp, method_remap = 'conservative', target_grid = 'r1280x640') #'@export CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_grid = NULL, @@ -68,8 +68,8 @@ CST_Interpolation <- function(exp, points = NULL, method_remap = NULL, target_gr stop("The name of the latitude/longitude dimensions in 'exp$data' must match the parametres 'lat_dim' and 'lon_dim'") } - res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$coords[[lon_dim]], - source_file = exp$coords$source_files[1], points = points, + res <- Interpolation(exp = exp$data, lats = exp$coords[[lat_dim]], lons = exp$attrs[[lon_dim]], + source_file = exp$attrs$source_files[1], points = points, method_remap = method_remap, target_grid = target_grid, lat_dim = lat_dim, lon_dim = lon_dim, region = region, method_point_interp = method_point_interp, ncores = ncores) @@ -334,10 +334,10 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me if (griddes$yinc < 0) { system(paste0('cdo invertlat ', nc_cropped1, ' ', nc_cropped2)) griddes <- .get_griddes(nc_cropped2) + system(paste0('rm ', nc_cropped2)) } # remove temporary files system(paste0('rm ', nc_cropped1)) - system(paste0('rm ', nc_cropped2)) if (is.null(griddes)) { stop("'griddes' not found in the NetCDF source files") @@ -679,7 +679,9 @@ Interpolation <- function(exp, lats, lons, points = NULL, source_file = NULL, me # sdate, time, ensemble, num_procs, etc) #====================== .get_model_data <- function(weights.df, mdata, ncores = NULL) { - + + require(plyr) + #----------------- # Get data for all combinations of i and j. # (inefficient, getting many unneded pairs). diff --git a/R/Intlr.R b/R/Intlr.R index f108877b7d55a6016b437a17a8144d6c68b28198..953031fe5dcbcfedc79ea3200f2c921ee92fe960 100644 --- a/R/Intlr.R +++ b/R/Intlr.R @@ -22,16 +22,24 @@ #'@param obs an 's2dv object' containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. -#'@param lr_method a character vector indicating the linear regression method to be applied -#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' -#'method fits a linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is to the interpolated -#'model data to correct the interpolated values. The 'large-scale' method fits a linear -#'regression with large-scale predictors from the same model (e.g. teleconnection indices) -#'as predictors and high-resolution observations as predictands. This equation is then -#'applied to the interpolated model values. Finally, the '4nn' method uses a linear -#'regression with the four nearest neighbours as predictors and high-resolution observations -#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal +#'forecast experiment data. If the forecast is provided, it will be downscaled using the +#'hindcast and observations; if not provided, the hindcast will be downscaled instead. The +#'default value is NULL. +#'@param lr_method a character vector indicating the linear regression method to be applied. +#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a +#'linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is applied to the +#'interpolated model data to correct the interpolated values. The 'large-scale' method +#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as +#'predictors and high-resolution observations as predictands. This equation is then applied +#'to the interpolated model values. Finally, the '9nn' method uses a linear regression +#'with the nine nearest neighbours as predictors and high-resolution observations as +#'predictands. Instead of constructing a regression model using all nine predictors, +#'principal component analysis is applied to the data of neighboring grids to reduce the +#'dimension of the predictors. The linear regression model is then built using the principal +#'components that explain 95% of the variance. The '9nn' method does not require a +#'pre-interpolation process. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes @@ -82,44 +90,87 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) #'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, int_method = NULL, - method_point_interp = NULL, predictors = NULL, lat_dim = "lat", lon_dim = "lon", - sdate_dim = "sdate", time_dim = "time", member_dim = "member", - large_scale_predictor_dimname = 'vars', loocv = TRUE, region = NULL, ncores = NULL) { +CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, points = NULL, + int_method = NULL, method_point_interp = NULL, predictors = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", large_scale_predictor_dimname = 'vars', + loocv = TRUE, region = NULL, ncores = NULL) { - if (!inherits(exp,'s2dv_cube')) { + if (!inherits(exp, 's2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") } - if (!inherits(obs,'s2dv_cube')) { + if (!inherits(obs, 's2dv_cube')) { stop("Parameter 'obs' must be of the class 's2dv_cube'") } - - res <- Intlr(exp = exp$data, obs = obs$data, exp_lats = coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], - obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], points = points, - source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], + + exp_cor_aux <- NULL + + if (!is.null(exp_cor)) { + if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) { + if (!inherits(exp_cor, 's2dv_cube')) { + stop("Parameter 'exp_cor' must be of the class 's2dv_cube'") + } + exp_cor_aux <- exp_cor$data + # when large-scale is selected, the forecast object does not have to be an s2dv_cube object + } else if (identical(lr_method, 'large-scale')) { + if (!inherits(exp_cor, 's2dv_cube')) { + exp_cor_aux <- exp_cor + } else { + exp_cor_aux <- exp_cor$data + } + } + } + res <- Intlr(exp = exp$data, obs = obs$data, exp_cor = exp_cor_aux, + exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], + obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], + points = points, source_file_exp = exp$attrs$source_files[1], + source_file_obs = obs$attrs$source_files[1], target_grid = target_grid, lr_method = lr_method, int_method = int_method, method_point_interp = method_point_interp, predictors = predictors, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, - member_dim = member_dim, large_scale_predictor_dimname = large_scale_predictor_dimname, + member_dim = member_dim, + large_scale_predictor_dimname = large_scale_predictor_dimname, loocv = loocv, region = region, ncores = ncores) - + # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - - res_s2dv <- list(exp = exp, obs = obs) + + if (is.null(exp_cor)) { + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + } else { + if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + # when large-scale is selected, the forecast object does not have to be an s2dv_cube object + } else if (identical(lr_method, 'large-scale')) { + if (!inherits(exp_cor, 's2dv_cube')) { + exp_cor <- suppressWarnings(s2dv_cube(res$data, lat = res$lat, lon = res$lon)) + } else { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + } + } + + res_s2dv <- list(exp = exp_cor, obs = obs) + } + return(res_s2dv) } @@ -147,6 +198,10 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -155,16 +210,20 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in #'range from -90 to 90. #'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes #'can range from -180 to 180 or from 0 to 360. -#'@param lr_method a character vector indicating the linear regression method to be applied -#'after the interpolation. Accepted methods are 'basic', 'large-scale' and '4nn'. The 'basic' -#'method fits a linear regression using high resolution observations as predictands and the -#'interpolated model data as predictor. Then, the regression equation is to the interpolated -#'model data to correct the interpolated values. The 'large-scale' method fits a linear -#'regression with large-scale predictors from the same model (e.g. teleconnection indices) -#'as predictors and high-resolution observations as predictands. This equation is then -#'applied to the interpolated model values. Finally, the '4nn' method uses a linear -#'regression with the four nearest neighbours as predictors and high-resolution observations -#'as predictands. It is then applied to model data to correct the interpolated values. +#'@param lr_method a character vector indicating the linear regression method to be applied. +#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a +#'linear regression using high resolution observations as predictands and the +#'interpolated model data as predictor. Then, the regression equation is applied to the +#'interpolated model data to correct the interpolated values. The 'large-scale' method +#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as +#'predictors and high-resolution observations as predictands. This equation is then applied +#'to the interpolated model values. Finally, the '9nn' method uses a linear regression +#'with the nine nearest neighbours as predictors and high-resolution observations as +#'predictands. Instead of constructing a regression model using all nine predictors, +#'principal component analysis is applied to the data of neighboring grids to reduce the +#'dimension of the predictors. The linear regression model is then built using the principal +#'components that explain 95% of the variance. The '9nn' method does not require a +#'pre-interpolation process. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param points a list of two elements containing the point latitudes and longitudes @@ -222,11 +281,12 @@ CST_Intlr <- function(exp, obs, lr_method, target_grid = NULL, points = NULL, in #'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, obs_lats = obs_lats, #'obs_lons = obs_lons, target_grid = 'r1280x640', lr_method = 'basic', int_method = 'conservative') #'@export -Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, target_grid = NULL, points = NULL, - int_method = NULL, method_point_interp = NULL, source_file_exp = NULL, source_file_obs = NULL, - predictors = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", - member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', loocv = TRUE, - ncores = NULL) { +Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, + target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, + source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, + lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", + member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', + loocv = TRUE, ncores = NULL) { #----------------------------------- # Checkings @@ -274,6 +334,43 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t "'sdate_dim'") } + if (!is.null(exp_cor)) { + + if (is.na(match(sdate_dim, names(dim(exp_cor))))) { + stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp_cor))))) { + stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") + } + + if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast + loocv <- FALSE + warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", + "to train the model with the whole hindcast and predict with the forecast.") + } + + if (is.null(predictors)) { + + if (is.na(match(lon_dim, names(dim(exp_cor))))) { + stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp_cor))))) { + stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", + "'lat_dim'") + } + } + } + + if (lr_method == '9nn') { + warning("9nn method skips the interpolation step since the method itself inherently downscale ", + "the coarse resolution data to the reference data resolution without the need ", + "for interpolation.") + } + # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -295,10 +392,6 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t "through the parameter 'method_point_interp'.") } - # sdate must be the time dimension in the input data - stopifnot(sdate_dim %in% names(dim(exp))) - stopifnot(sdate_dim %in% names(dim(obs))) - # the code is not yet prepared to handle members in the observations restore_ens <- FALSE if (member_dim %in% names(dim(obs))) { @@ -321,6 +414,17 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t stopifnot(sdate_dim %in% names(dim(predictors))) stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) } + # forecasts for the large scale predictors + if (!is.null(exp_cor)) { + if (is.na(match(large_scale_predictor_dimname, names(dim(exp_cor))))) { + stop("Missing large scale predictor dimension in 'exp_cor', or does not match ", + "the parameter 'large_scale_predictor_dimname'") + } + if (!identical(dim(exp_cor)[names(dim(exp_cor)) == large_scale_predictor_dimname], + dim(predictors)[names(dim(predictors)) == large_scale_predictor_dimname])) { + stop("Large scale predictor dimension in exp_cor and predictors must be identical.") + } + } } ## ncores if (!is.null(ncores)) { @@ -333,7 +437,7 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t #----------------------------------- # Interpolation #----------------------------------- - if (lr_method != '4nn') { + if (lr_method != '9nn') { if (is.null(int_method)) { stop("Parameter 'int_method' must be a character vector indicating the interpolation method. ", @@ -347,19 +451,32 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, - source_file = source_file_exp, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region, ncores = ncores) + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + + if (!is.null(exp_cor) & is.null(predictors)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + } # If after interpolating 'exp' data the coordinates do not match, the obs data is interpolated to # the same grid to force the matching - if ((!.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, - lon1 = exp_interpolated$lon, lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, lat_dim = lat_dim, lon_dim = lon_dim, - method_remap = int_method, region = region, ncores = ncores) + if ((!suppressWarnings(.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, + lon1 = exp_interpolated$lon, lon2 = obs_lons))) | !(point_obs)) { + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_obs, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) lats <- obs_interpolated$lat lons <- obs_interpolated$lon @@ -383,6 +500,17 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t target_dims_predictor <- sdate_dim target_dims_predictand <- sdate_dim + + if (!is.null(exp_cor)) { + aux_dim <- NULL + forecast <- exp_cor_interpolated$data + target_dims_predictor <- c(sdate_dim, member_dim) + target_dims_forecast <- c(sdate_dim, member_dim) + } else { + forecast <- NULL + target_dims_forecast <- NULL + } + } # (Multi) linear regression with large-scale predictors @@ -398,16 +526,37 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname) target_dims_predictand <- sdate_dim + + if (!is.null(exp_cor)) { + aux_dim <- large_scale_predictor_dimname + if (!inherits(exp_cor, 's2dv_cube')) { + forecast <- exp_cor + } else { + forecast <- exp_cor$data + } + target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname, member_dim) + target_dims_forecast <- c(sdate_dim, large_scale_predictor_dimname, member_dim) + } } # Multi-linear regression with the four nearest neighbours # Predictors: model data # Predictand: observations - else if (lr_method == '4nn') { - - predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, lats_coar = exp_lats, - lons_coar = exp_lons, lat_dim = lat_dim, lon_dim = lon_dim, nn = 4, ncores = ncores) + else if (lr_method == '9nn') { + predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, + lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + nn = 9, ncores = ncores) + + if (!is.null(exp_cor)) { + aux_dim <- 'nn' + forecast <- .find_nn(coar = exp_cor, lats_hres = obs_lats, lons_hres = obs_lons, + lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, + lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, + nn = 9, ncores = ncores) + } + if (is.null(points) | ("location" %in% names(dim(obs)))) { if (!is.null(target_grid)) { warning("Interpolating to the 'obs' grid") @@ -416,44 +565,65 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t lats <- obs_lats lons <- obs_lons - } - # If the downscaling is to point locations: Once the 4 nearest neighbours have been found, + } else { + # If the downscaling is to point locations: Once the 9 nearest neighbours have been found, # interpolate to point locations - else { - predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, target_grid = NULL, + + predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, + lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) + source_file = source_file_obs, method_remap = NULL, + region = region, ncores = ncores) - predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = NULL, + predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, points = points, method_point_interp = method_point_interp, - source_file = source_file_obs, method_remap = NULL, region = region, ncores = ncores) - + source_file = source_file_obs, method_remap = NULL, + region = region, ncores = ncores) + + if (!is.null(exp_cor)) { + forecast <- Interpolation(exp = forecast, lats = obs_lats, lons = obs_lons, + lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, + points = points, method_point_interp = method_point_interp, + source_file = source_file_obs, method_remap = NULL, + region = region, ncores = ncores) + forecast <- forecast$data + } + lats <- predictor$lat lons <- predictor$lon predictor <- predictor$data predictand <- predictand$data } - - target_dims_predictor <- c(sdate_dim,'nn') - target_dims_predictand <- sdate_dim + + target_dims_predictand <- sdate_dim + + if (!is.null(exp_cor)) { + target_dims_predictor <- c(sdate_dim,'nn', member_dim) + target_dims_forecast <- c(sdate_dim,'nn', member_dim) + } else { + target_dims_predictor <- c(sdate_dim,'nn') + } + } else { + + stop(paste0(lr_method, " method is not implemented yet")) } + # Apply the linear regressions + # case hindcast - forecast + if (!is.null(exp_cor)) { + res <- Apply(list(predictor, predictand, forecast), + target_dims = list(target_dims_predictor, target_dims_predictand, + target_dims_forecast), + fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores)$output1 + } + # case hindcast only else { - stop(paste0(lr_method, " method is not implemented yet")) + res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, + target_dims_predictand), + fun = .intlr, loocv = loocv, ncores = ncores)$output1 + names(dim(res))[1] <- sdate_dim } - - print(paste0('dim predictor',dim(predictor))) - print(paste0('dim predictand',dim(predictand))) - print(dim(list(predictor[1]))) - # Apply the linear regressions - - - - res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, target_dims_predictand), - fun = .intlr, loocv = loocv, ncores = ncores)$output1 - - names(dim(res))[1] <- sdate_dim - # names(dim(res))[which(names(dim(res)) == '')] # restore ensemble dimension in observations if it existed originally if (restore_ens) { @@ -463,30 +633,76 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t # Return a list of three elements res <- list(data = res, obs = predictand, lon = lons, lat = lats) + # for testing 9nn + #x <- predictor[10,10,1,1,1,,,] + #x <- Reorder(x,c(2,3,1)) + #y <- predictand[1,1,1,10,,10] + #f <- forecast[10,10,1,1,1,,,] + #f <- Reorder(f,c(2,1)) + #f <- InsertDim(f,1,1,'sdate') + + # large-scale + #x <- Reorder(predictor,c(1,3,2)) + #y <- predictand[1,1,1,10,,10] + #f <- Reorder(forecast,c(1,3,2)) + return(res) } #----------------------------------- # Atomic function to generate and apply the linear regressions #----------------------------------- -.intlr <- function(x, y, loocv) { - - tmp_df <- data.frame(x = x, y = y) +.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL) { + + require (plyr) + + if (!is.null(f)) { + if (!is.null(aux_dim)) { + tmp_df <- data.frame(x = adply(x,.margins = 3, .id = NULL), y = y) + } else { + tmp_df <- data.frame(x = as.vector(x), y = y) + } + } else { + tmp_df <- data.frame(x = x, y = y) + } # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) { - - n <- nrow(tmp_df) - res <- rep(NA, n) - + if (is.null(f)) { + res <- rep(NA, nrow(tmp_df)) + } else { + if (!is.null(aux_dim)) { + res <- array(NA, dim(f)[names(dim(f)) != aux_dim]) + } else { + res <- array(NA, dim(f)) + } + } } else { # training - lm1 <- .train_lm(df = tmp_df, loocv = loocv) + ## apply principle components if method is 9nn + if (any(names(dim(x)) == "nn")) { + PR <- prcomp(~ x.1 + x.2 + x.3 + x.4 + x.5 + x.6 + x.7 + x.8 + x.9, + data = tmp_df[, -ncol(tmp_df)], + na.action = na.omit, scale = FALSE) ## prcomp between predictors + ## 95% of variablity should be explained by the principle components + N_predictors <- which (summary(PR)$importance[ "Cumulative Proportion",] > .95)[1] + PR_mat <- PR$x[, 1:N_predictors, drop = FALSE] + if (!is.null(PR$na.action)) { ## if there are NAs, relocate them to the correct order + dum <- array(NA, dim = c(nrow(tmp_df), ncol(PR_mat) )) + dum [as.numeric(rownames(PR_mat)), ] <- PR_mat + PR_mat <- dum + } + tmp_df <- data.frame(x = PR_mat, y = y) + colnames(tmp_df) <- c(paste0("x.",1:(ncol(tmp_df)-1)), "y") + } + + lm1 <- .train_lm(df = tmp_df, loocv = loocv) + # prediction - res <- .pred_lm(lm1 = lm1, df = tmp_df, loocv = loocv) + res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim) } - - return(res) + + return(res) } #----------------------------------- @@ -517,7 +733,9 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t #----------------------------------- # Function to apply the linear regressions. #----------------------------------- -.pred_lm <- function(df, lm1, loocv) { +.pred_lm <- function(df, lm1, f, loocv, aux_dim) { + + require (plyr) if (loocv) { pred_vals <- sapply(1:nrow(df), function(j) { @@ -527,33 +745,95 @@ Intlr <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, t return(predict(lm1[[j]], df[j,])) }}) } else { - if (!is.na(lm1)) { - pred_vals_ls <- lapply(lm1, predict, data = df) - pred_vals <- unlist(pred_vals_ls) + if (!is.na(lm1)) { + # case to downscale hindcasts + if (is.null(f)) { + pred_vals_ls <- lapply(lm1, predict, data = df) + pred_vals <- unlist(pred_vals_ls) + # case to downscale forecasts + } else { + if (!is.null(aux_dim)) { + # 9nn & large-scale + fcst_df <- adply(f,.margins = 3, .id = NULL) + colnames(fcst_df) <- paste0('x.',1:ncol(fcst_df)) + pred_vals_ls <- lapply(lm1, predict, newdata = fcst_df) + pred_vals <- unlist(pred_vals_ls) + dim(pred_vals) <- dim(f)[names(dim(f)) != aux_dim] + } else { + # basic + pred_vals_ls <- lapply(lm1, predict, newdata = data.frame(x = as.vector(f))) + pred_vals <- unlist(pred_vals_ls) + dim(pred_vals) <- dim(f ) + } + } } else { - pred_vals <- rep(NA, nrow(df)) + if (is.null(f)) { + pred_vals <- rep(NA, nrow(df)) + } else { + if (!is.null(aux_dim)) { + pred_vals <- array(NA, dim(f)[names(dim(f)) != aux_dim]) + } else { + pred_vals <- array(NA, dim(f)) + } + } } - } + } + return(pred_vals) } #----------------------------------- -# Function to find N nearest neighbours. +# Function to ind N nearest neighbours. # 'coar' is an array with named dimensions #----------------------------------- -.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, lat_dim, lon_dim, nn = 4, ncores = NULL) { - - # Sort the distances from closest to furthest - idx_lat <- as.array(sapply(lats_hres, function(x) order(abs(lats_coar - x))[1:nn])) - idx_lon <- as.array(sapply(lons_hres, function(x) order(abs(lons_coar - x))[1:nn])) - - names(dim(idx_lat)) <- c('nn', lat_dim) - names(dim(idx_lon)) <- c('nn', lon_dim) - - # obtain the values of the nearest neighbours - nearest <- Apply(list(coar, idx_lat, idx_lon), - target_dims = list(c(lat_dim, lon_dim), lat_dim, lon_dim), - fun = function(x, y, z) x[y, z], ncores = ncores)$output1 - +.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, + lat_dim, lon_dim, sdate_dim, member_dim, nn = 9, ncores = NULL) { + + # order lats and lons of the high res and coarse res data in 2-dimension in order to calculate the distances to each other. + hres_mat <- expand.grid(as.numeric(lats_hres), as.numeric(lons_hres)) + coar_mat <- expand.grid(as.numeric(lats_coar), as.numeric(lons_coar)) + + # calculate distances and obtain the closest 4 coarse res grid for each high res grid + dist_id <- apply (proxy::dist(hres_mat, coar_mat), 1, order)[1:nn,] + + names(dim(dist_id)) <- c("nn", lon_dim) + + idh <- SplitDim(dist_id, split_dim = lon_dim, + indices = rep(1:(length(lons_hres)),length(lats_hres)), + new_dim_name = lat_dim) + + idh <- aperm (idh, c(1,3,2)) # dimension: nn, lat_dim, lon_dim + + #coar + idc_lat_grid <- match( coar_mat[,1], as.numeric(lats_coar)) + idc_lon_grid <- match( coar_mat[,2], as.numeric(lons_coar)) + cgrid <- cbind(idc_lat_grid,idc_lon_grid) # For the coarse res data, include the lat and lon order each grids ordered in 2-dimension. + + idh_dum <- array(NA, dim = c(nn, 2, length(lats_hres), length(lons_hres))) + + for (i in 1:length(lats_hres)) { + for (j in 1:length(lons_hres)) { + idh_dum[,,i,j] <- cgrid[idh[,i,j],] + } + } + + idh <- idh_dum + rm(idh_dum) + names(dim(idh)) <- c("nn", "grd", lat_dim, lon_dim) + + # idh includes the lat and lon order of the nearest neighbours in the coarse res data specified for each lat lon of the high res data. + + nearest <- Apply(list(coar, idh), + target_dims = list(c(sdate_dim, lat_dim, lon_dim, member_dim), + "grd" ), + fun = function(x, y) { + a <- x[, y[1], y[2], , drop = FALSE] + a <- Subset (a, along = c(lat_dim, lon_dim), + indices = list (1,1), + drop = "selected") # drop lat lon dim coming from coar data + return(a) + }, + ncores = ncores)$output1 + return(nearest) } diff --git a/R/LogisticReg.R b/R/LogisticReg.R index 67fa1b28c9d8bf508d3f2b0d39c6a4940ade29c1..f74f2ee17c181e8ebf3b6d98ad3c4f12f254d3cf 100644 --- a/R/LogisticReg.R +++ b/R/LogisticReg.R @@ -21,6 +21,10 @@ #'@param obs an 's2dv object' with named dimensions containing the observational field. #'The object must have, at least, the dimensions latitude, longitude and start date. The #'object is expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param target_grid a character vector indicating the target grid to be passed to CDO. #'It must be a grid recognised by CDO or a NetCDF file. #'@param int_method a character vector indicating the regridding method to be passed to CDORemap. @@ -81,16 +85,17 @@ #'dim(obs) <- c(lat = 12, lon = 15, sdate = 15) #'obs_lons <- seq(1,5, 4/14) #'obs_lats <- seq(1,4, 3/11) -#'exp <- s2dv_cube(data = exp, lat = exp_lats, lon = exp_lons) -#'obs <- s2dv_cube(data = obs, lat = obs_lats, lon = obs_lons) +#'exp <- s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons)) +#'obs <- s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons)) #'res <- CST_LogisticReg(exp = exp, obs = obs, int_method = 'bil', target_grid = 'r1280x640', #'probs_cat = c(1/3, 2/3)) #'@export - -CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_method = "ens_mean", - probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, - method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", - member_dim = "member", region = NULL, loocv = TRUE, ncores = NULL) { +CST_LogisticReg <- function(exp, obs, exp_cor = NULL, target_grid, int_method = NULL, + log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), + return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", region = NULL, + loocv = TRUE, ncores = NULL) { if (!inherits(exp,'s2dv_cube')) { stop("Parameter 'exp' must be of the class 's2dv_cube'") @@ -100,28 +105,38 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me stop("Parameter 'obs' must be of the class 's2dv_cube'") } - res <- LogisticReg(exp = exp$data, obs = obs$data, exp_lats = exp$coords[[lat_dim]], + res <- LogisticReg(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], target_grid = target_grid, probs_cat = probs_cat, return_most_likely_cat = return_most_likely_cat, int_method = int_method, log_reg_method = log_reg_method, points = points, method_point_interp = method_point_interp, lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, - source_file_exp = exp$coords$source_files[1], source_file_obs = obs$coords$source_files[1], + source_file_exp = exp$attrs$source_files[1], source_file_obs = obs$attrs$source_files[1], region = region, loocv = loocv, ncores = ncores) # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data - exp$data <- res$data - exp$dims <- dim(exp$data) - exp$coords[[lon_dim]] <- res$lon - exp$coords[[lat_dim]] <- res$lat - obs$data <- res$obs obs$dims <- dim(obs$data) obs$coords[[lon_dim]] <- res$lon obs$coords[[lat_dim]] <- res$lat - res_s2dv <- list(exp = exp, obs = obs) + if (is.null(exp_cor)) { + exp$data <- res$data + exp$dims <- dim(exp$data) + exp$coords[[lon_dim]] <- res$lon + exp$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp, obs = obs) + } else { + exp_cor$data <- res$data + exp_cor$dims <- dim(exp_cor$data) + exp_cor$coords[[lon_dim]] <- res$lon + exp_cor$coords[[lat_dim]] <- res$lat + + res_s2dv <- list(exp = exp_cor, obs = obs) + } + return(res_s2dv) } @@ -148,6 +163,10 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #'@param obs an array with named dimensions containing the observational field. The object #'must have, at least, the dimensions latitude, longitude and start date. The object is #'expected to be already subset for the desired region. +#'@param exp_cor an optional array with named dimensions containing the seasonal forecast +#'experiment data. If the forecast is provided, it will be downscaled using the hindcast and +#'observations; if not provided, the hindcast will be downscaled instead. The default value +#'is NULL. #'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must #'range from -90 to 90. #'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes @@ -224,11 +243,13 @@ CST_LogisticReg <- function(exp, obs, target_grid, int_method = NULL, log_reg_me #'obs_lats = obs_lats, obs_lons = obs_lons, int_method = 'bil', target_grid = 'r1280x640', #'probs_cat = c(1/3, 2/3)) #'@export -LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid, - int_method = NULL, log_reg_method = "ens_mean", probs_cat = c(1/3,2/3), - return_most_likely_cat = FALSE, points = NULL, method_point_interp = NULL, - lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", member_dim = "member", - source_file_exp = NULL, source_file_obs = NULL, region = NULL, loocv = TRUE, ncores = NULL) { +LogisticReg <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, + target_grid, int_method = NULL, log_reg_method = "ens_mean", + probs_cat = c(1/3,2/3), return_most_likely_cat = FALSE, points = NULL, + method_point_interp = NULL, lat_dim = "lat", lon_dim = "lon", + sdate_dim = "sdate", member_dim = "member", + source_file_exp = NULL, source_file_obs = NULL, region = NULL, + loocv = TRUE, ncores = NULL) { #----------------------------------- # Checkings @@ -293,6 +314,34 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target "'member_dim'") } + if (!is.null(exp_cor)) { + + if (is.na(match(sdate_dim, names(dim(exp_cor))))) { + stop("Missing start date dimension in 'exp_cor', or does not match the parameter ", + "'sdate_dim'") + } + + if (is.na(match(member_dim, names(dim(exp_cor))))) { + stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'") + } + + if (is.na(match(lon_dim, names(dim(exp_cor))))) { + stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ", + "'lon_dim'") + } + + if (is.na(match(lat_dim, names(dim(exp_cor))))) { + stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ", + "'lat_dim'") + } + + if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast + loocv <- FALSE + warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", + "to train the model with the whole hindcast and predict with the forecast.") + } + } + # When observations are pointwise if (!is.null(points) & !is.na(match("location", names(dim(obs))))) { point_obs <- T @@ -314,9 +363,9 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target } if (is.null(region)) { - warning("The borders of the downscaling region have not been provided. Assuming the four borders ", - "of the downscaling region are defined by the first and last elements of the parametres ", - "'obs_lats' and 'obs_lons'.") + warning("The borders of the downscaling region have not been provided. Assuming ", + "the four borders of the downscaling region are defined by the first and ", + "last elements of the parametres 'obs_lats' and 'obs_lons'.") region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)]) } ## ncores @@ -339,24 +388,44 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target } } - exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_exp, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_exp, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) + if (!is.null(exp_cor)) { + exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, + target_grid = target_grid, points = points, + method_point_interp = method_point_interp, + source_file = source_file_exp, lat_dim = lat_dim, + lon_dim = lon_dim, method_remap = int_method, + region = region, ncores = ncores) + } + # compute ensemble mean anomalies if (log_reg_method == "ens_mean") { - predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, sdate_dim = sdate_dim, - ncores = ncores) + predictor <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, + sdate_dim = sdate_dim, ncores = ncores) target_dims_predictor <- sdate_dim + if (!is.null(exp_cor)) { + clim_hcst <- Apply(exp_interpolated$data, target_dims = c(member_dim, sdate_dim), + mean, ncores = ncores, + na.rm = TRUE)$output1 ## climatology of hindcast ens mean + ens_mean_fcst <- Apply(exp_cor_interpolated$data, target_dims = member_dim, + mean, na.rm = TRUE, ncores = ncores)$output1 ## ens mean of the forecast + forecast <- Ano(ens_mean_fcst, clim_hcst, ncores = ncores) + target_dims_forecast <- sdate_dim + } } else if (log_reg_method == "ens_mean_sd") { require(abind) ens_mean_anom <- .get_ens_mean_anom(obj_ens = exp_interpolated$data, member_dim = member_dim, - sdate_dim = sdate_dim, ncores = ncores) + sdate_dim = sdate_dim, ncores = ncores) ens_sd <- .get_ens_sd(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) #merge two arrays into one array of predictors @@ -364,8 +433,30 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target names(dim(predictor)) <- c("pred", names(dim(ens_mean_anom))) target_dims_predictor <- c(sdate_dim, "pred") - } else if (log_reg_method == "sorted_members") { - predictor <- .sort_members(obj_ens = exp_interpolated$data, member_dim = member_dim, ncores = ncores) + + if (!is.null(exp_cor)) { + clim_hcst <- Apply(exp_interpolated$data, target_dims = c(member_dim, sdate_dim), + mean, ncores = ncores, + na.rm = TRUE)$output1 ## climatology of hindcast ens mean + ens_mean_fcst <- Apply(exp_cor_interpolated$data, target_dims = member_dim, + mean, na.rm = TRUE, ncores = ncores)$output1 ## ens mean of the forecast + forecast_mean_anom <- Ano(ens_mean_fcst, clim_hcst, ncores = ncores) + forecast_sd <- .get_ens_sd(obj_ens = exp_cor_interpolated$data, + member_dim = member_dim, ncores = ncores) + forecast <- abind(forecast_mean_anom, forecast_sd, along = 1/2) + names(dim(forecast)) <- c("pred", names(dim(forecast_mean_anom))) + + target_dims_forecast <- c(sdate_dim, "pred") + } + + } else if (log_reg_method == "sorted_members") { + + if (!is.null(exp_cor)) { + stop('sorted_members method cannot be used to downscale forecasts since ', + 'the ensemble members are generally exchangeable') + } + predictor <- .sort_members(obj_ens = exp_interpolated$data, + member_dim = member_dim, ncores = ncores) target_dims_predictor <- c(sdate_dim, member_dim) } else { @@ -376,9 +467,11 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target # the same grid to force the matching if ((!.check_coords(lat1 = as.numeric(exp_interpolated$lat), lat2 = obs_lats, lon1 = as.numeric(exp_interpolated$lon), lon2 = obs_lons)) | !(point_obs)) { - obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, target_grid = target_grid, - method_remap = int_method, points = points, source_file = source_file_obs, - lat_dim = lat_dim, lon_dim = lon_dim, method_point_interp = method_point_interp, + obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, + target_grid = target_grid, method_remap = int_method, + points = points, source_file = source_file_obs, + lat_dim = lat_dim, lon_dim = lon_dim, + method_point_interp = method_point_interp, region = region, ncores = ncores) obs_ref <- obs_interpolated$data } else { @@ -387,28 +480,34 @@ LogisticReg <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target # convert observations to categorical predictands -obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { - if (!any(!is.na(x))) { - rep(NA,length(x)) - } else { - terc <- convert2prob(as.vector(x), prob = probs_cat) - as.integer(apply(terc, 1, function(r) which (r == 1)))}}, - output_dims = sdate_dim, ncores = ncores)$output1 - - - res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), - fun = function(x, y) - .log_reg(x = x, y = y, loocv = loocv,probs_cat=probs_cat), - output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 - - if (return_most_likely_cat) { - res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, - output_dims = sdate_dim, ncores = ncores)$output1 + obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { + if (!any(!is.na(x))) { + rep(NA,length(x)) + } else { + terc <- convert2prob(as.vector(x), prob = probs_cat) + as.integer(apply(terc, 1, function(r) which (r == 1)))}}, + output_dims = sdate_dim, ncores = ncores)$output1 + + if (is.null(exp_cor)) { + res <- Apply(list(predictor, obs_cat), target_dims = list(target_dims_predictor, sdate_dim), + fun = function(x, y) .log_reg(x = x, y = y, loocv = loocv,probs_cat = probs_cat), + output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 + + if (return_most_likely_cat) { + res <- Apply(res, target_dims = c(sdate_dim, "category"), .most_likely_category, + output_dims = sdate_dim, ncores = ncores)$output1 + } + } else { + res <- Apply(list(predictor, obs_cat, forecast), + target_dims = list(target_dims_predictor, sdate_dim, target_dims_forecast), + fun = function(x, y, f) .log_reg(x = x, y = y, f = f, loocv = loocv, + probs_cat = probs_cat), + output_dims = c(sdate_dim, "category"), ncores = ncores)$output1 } # restore ensemble dimension in observations if it existed originally if (restore_ens) { - obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim, ncores = ncores) + obs_ref <- s2dv::InsertDim(obs_ref, posdim = 1, lendim = 1, name = member_dim) } res <- list(data = res, obs = obs_ref, lon = exp_interpolated$lon, lat = exp_interpolated$lat) @@ -429,7 +528,8 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { .sort_members <- function(obj_ens, member_dim, ncores = NULL) { - sorted <- Apply(obj_ens, target_dims = member_dim, sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 + sorted <- Apply(obj_ens, target_dims = member_dim, + sort, decreasing = TRUE, na.last = TRUE, ncores = ncores)$output1 return(sorted) } @@ -447,7 +547,8 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { require(s2dv) # compute climatology - clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), mean, ncores = ncores, na.rm = TRUE)$output1 + clim <- Apply(obj_ens, target_dims = c(member_dim, sdate_dim), + mean, ncores = ncores, na.rm = TRUE)$output1 # compute ensemble mean ens_mean <- Apply(obj_ens, target_dims = member_dim, mean, na.rm = TRUE, ncores = ncores)$output1 @@ -459,15 +560,23 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { } # atomic functions for logistic regressions -.log_reg <- function(x, y, loocv,probs_cat) { +.log_reg <- function(x, y, f = NULL, loocv, probs_cat) { tmp_df <- data.frame(x = x, y = y) # if the data is all NA, force return return NA if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1) | all(is.na(tmp_df$y))) { + if (is.null(f)) { + n1 <- nrow(tmp_df) + } else { + if (is.null(dim(f))) { + n1 <- length(f) + } else { + n1 <- dim(f)[1] + } + } - n1 <- nrow(tmp_df) - n2<- length(probs_cat)+1 + n2 <- length(probs_cat) + 1 res <- matrix(NA, nrow = n1, ncol = n2) } else { @@ -475,7 +584,14 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { lm1 <- .train_lr(df = tmp_df, loocv = loocv) # prediction - res <- pred_lr(lm1 = lm1, df = tmp_df, loocv = loocv, probs_cat=probs_cat) + res <- .pred_lr(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, probs_cat = probs_cat) + + if ( !(is.null(f)) ) { ## if forecast is provided, and syear of forecast is 1. + if ( nrow(f) == 1 ) { + res <- array(res, dim = c(1, length(probs_cat) + 1)) + } + } + } return(res) } @@ -506,7 +622,7 @@ obs_cat <- Apply(obs_ref, target_dims = sdate_dim, function(x) { #----------------------------------- # Function to apply the logistic regressions. #----------------------------------- -pred_lr <- function(df, lm1, loocv, probs_cat) { +.pred_lr <- function(df, lm1, f, loocv, probs_cat) { require(plyr) @@ -517,7 +633,7 @@ pred_lr <- function(df, lm1, loocv, probs_cat) { pred_vals_ls <-list() for (j in 1:nrow(df)) { - if (all(is.na(df[j,]))) { + if (all(is.na(df[j, ]))) { pred_vals_ls[[j]] <- rep(NA, length(probs_cat) + 1) } else { pred_vals_ls[[j]] <- predict(lm1[[j]], df[j,], type = "probs") @@ -526,12 +642,12 @@ pred_lr <- function(df, lm1, loocv, probs_cat) { pred_vals <- laply(pred_vals_ls, .fun = as.array) - if( length(probs_cat)+1==2) { - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) - pred_vals_dum[,2]<-pred_vals - pred_vals_dum[,1]<-1-pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals)<-c(1,2) + if( length(probs_cat) + 1 == 2) { + pred_vals_dum<-array(NA, dim=c(nrow(df), 2)) + pred_vals_dum[, 2]<-pred_vals + pred_vals_dum[, 1] <- 1-pred_vals + pred_vals <- pred_vals_dum + colnames(pred_vals) <- c(1, 2) } } else { @@ -539,16 +655,35 @@ pred_lr <- function(df, lm1, loocv, probs_cat) { # type = class, probs #pred_vals_ls <- lapply(lm1, predict, data = df, type = "probs") #pred_vals <- unlist(pred_vals_ls) - pred_vals <- predict(lm1[[1]], df, type = "probs") + if (is.null(f)) { + pred_vals <- predict(lm1[[1]], df, type = "probs") - if( length(probs_cat)+1==2) { - pred_vals_dum<-array(NA,dim=c(nrow(df),2)) - pred_vals_dum[,2]<-pred_vals - pred_vals_dum[,1]<-1-pred_vals - pred_vals<-pred_vals_dum - colnames(pred_vals)<-c(1,2) - } + if( length(probs_cat) + 1 == 2) { + pred_vals_dum <- array(NA, dim = c(nrow(df), 2)) + pred_vals_dum[, 2] <- pred_vals + pred_vals_dum[, 1] <- 1 - pred_vals + pred_vals<-pred_vals_dum + colnames(pred_vals) <- c(1,2) + } + } else { + if (is.null(dim(f))) { + pred_vals <- predict(lm1[[1]], newdata = data.frame(x = as.vector(f)), type = "probs") + } else { + pred_vals <- predict(lm1[[1]], newdata = data.frame(x = f), type = "probs") + } + if (length(probs_cat) + 1 == 2) { + if (is.null(dim(f))) { + pred_vals_dum <- matrix(NA, nrow = length(f), ncol = 2) + } else { + pred_vals_dum <- matrix(NA, nrow = dim(f)[1], ncol = 2) + } + pred_vals_dum[, 2] <- pred_vals + pred_vals_dum[, 1] <- 1 - pred_vals + pred_vals <- pred_vals_dum + colnames(pred_vals) <- c(1,2) + } + } } return(pred_vals) diff --git a/inst/examples/analogs.R b/inst/examples/analogs.R index 3446647dd00a7c241a85275f03accb244f319fcc..a4dcbc29682dc5b81880cad74649edcec084d128 100644 --- a/inst/examples/analogs.R +++ b/inst/examples/analogs.R @@ -44,8 +44,7 @@ obs_era5 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/daily_mean/$var$_f1 num_procs = 1, retrieve = TRUE) # Create 's2dv_cube' object -obs_s2dv <- s2dv_cube(obs_era5, lat = attr(obs_era5, "Variables")$dat1$lat, - lon = attr(obs_era5, "Variables")$dat1$lon, source_files = attr(obs_era5, "Files")[1,1,]) +obs_s2dv <- as.s2dv_cube(obs_era5) # ERA5-Land observations: high-resolution observations # change to time = indices(1:28) @@ -90,8 +89,7 @@ exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/daily_mean/$var$_f6h num_procs = 1, retrieve = TRUE) # Create 's2dv_cube' object -exp_s2dv <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, - source_files = attr(exp, "Files")[1,1,]) +exp_s2dv <- as.s2dv_cube(exp) #--------------------------- # Analogs with no 'window' dimension in the initial object and with cv diff --git a/inst/examples/interpolation-bc.R b/inst/examples/interpolation-bc.R index 2d8ef6b47a053d6074e9fffae98b4138f24a44a1..0a68e8de7d458eddc5c6a034de0e89c983324547 100644 --- a/inst/examples/interpolation-bc.R +++ b/inst/examples/interpolation-bc.R @@ -22,7 +22,8 @@ latmax <- 44.1 #sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') sdates <- format(ymd("20000201") + rep(years(0:20), each=1),"%Y%m%d") -sdates <- format(seq(ymd("20000201"), ymd("20200201"), "1 month"), "%Y%m%d") +#sdates <- format(seq(ymd("20000201"), ymd("20200201"), "1 month"), "%Y%m%d") +sdates_fcst <- format(seq(ymd("20230201"), ymd("20230401"), "1 month"), "%Y%m%d") obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), @@ -33,7 +34,7 @@ obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r num_procs = 1, retrieve = TRUE) exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + var = 'tas', time = indices(1), member = 'all', sdate = sdates, lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), @@ -41,13 +42,22 @@ exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f return_vars = list(lat = 'dat', lon = 'dat'), num_procs = 1, retrieve = TRUE) +fcst <- startR::Start(dat = '/esarchive/exp/ecmwf/system51c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = 'all', sdate = sdates_fcst, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + #------------------------------------ # Downscaling with Intbc #------------------------------------ down_1_sbc <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', - bc_method = 'simple_bias', ncores = 4) + bc_method = 'evmos', ncores = 4) down_1_cal <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$lat, exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, @@ -69,19 +79,9 @@ down_1_dbc <- Intbc(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1 #------------------------------------ # Transform exp and obs into s2dv_objects #------------------------------------ -obs <- s2dv_cube(obs, lat = attr(obs, "Variables")$dat1$lat, lon = attr(obs, "Variables")$dat1$lon, - source_files = attr(obs, "Files")[1,1,]) -attr(obs$lon,"first_lon") <- obs$lon[1] -attr(obs$lon,"last_lon") <- obs$lon[length(obs$lon)] -attr(obs$lat,"first_lat") <- obs$lat[1] -attr(obs$lat,"last_lat") <- obs$lat[length(obs$lat)] - -exp <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, - source_files = attr(exp, "Files")[1,1,]) -attr(exp$lon,"first_lon") <- exp$lon[1] -attr(exp$lon,"last_lon") <- exp$lon[length(exp$lon)] -attr(exp$lat,"first_lat") <- exp$lat[1] -attr(exp$lat,"last_lat") <- exp$lat[length(exp$lat)] +obs <- as.s2dv_cube(obs) +exp <- as.s2dv_cube(exp) +fcst <- as.s2dv_cube(fcst) #------------------------------------ # Downscaling with CST_Intbc @@ -128,6 +128,19 @@ down_points2_qm <- Intbc(exp = exp, obs = obs_point, exp_lats = attr(exp, "Vari source_file_exp = attr(exp, "Files")[1,1,1], source_file_obs = NULL, target_grid = target_grid, bc_method = 'quantile_mapping', ncores = 4) +#------------------------------------ +# Downscaling forecasts using hindcasts to train +#------------------------------------ +down_1_sbc <- Intbc(exp = exp, obs = obs, exp_cor = fcst, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = 'con', + bc_method = 'evmos', ncores = 4) + +#------------------------------------ +# Downscaling forecasts using hindcasts to train with CST_Intbc +#------------------------------------ +down_2_sbc <- CST_Intbc(exp = exp, obs = obs, exp_cor = fcst, target_grid = target_grid, int_method = 'con', + bc_method = 'evmos', ncores = 4) diff --git a/inst/examples/interpolation-lr.R b/inst/examples/interpolation-lr.R index 012dc3a8a687c0cade7f228e3962bd121359ec76..25446c85f2bc0a22d35c5c975252ec70d13b43f0 100644 --- a/inst/examples/interpolation-lr.R +++ b/inst/examples/interpolation-lr.R @@ -4,6 +4,7 @@ library(startR) library(s2dv) library(lubridate) library(multiApply) +library(plyr) source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Interpolation.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Intlr.R') source('/esarchive/scratch/jramon/GitLab_jramon/downscaling/csdownscale/R/Utils.R') @@ -23,6 +24,7 @@ latmax <- 44.1 #sdates <- format(ymd("19811201") + rep(years(0:36),each=1),"%Y%m%d") sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') +sdates_fcst <- c('20230201', '20230401') obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), @@ -33,7 +35,16 @@ obs1 <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h- num_procs = 1, retrieve = TRUE) exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + var = 'tas', time = indices(1), member = 'all', sdate = sdates, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + +fcst <- startR::Start(dat = '/esarchive/exp/ecmwf/system51c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = 'all', sdate = sdates_fcst, lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), @@ -44,9 +55,9 @@ exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_ #---------------------------------- # Downscaling with Intlr #---------------------------------- -ind_rdm <- array(NA, dim = c('sdate' = length(sdates), member = 3, 'vars' = 2)) -ind_rdm[,,1] <- rnorm(n = prod(length(sdates), 3), mean = 0, sd = 1) -ind_rdm[,,2] <- rnorm(n = prod(length(sdates), 3), mean = 0, sd = 1) +ind_rdm <- array(NA, dim = c('sdate' = length(sdates), member = 25, 'vars' = 2)) +ind_rdm[,,1] <- rnorm(n = prod(length(sdates),25), mean = 0, sd = 1) +ind_rdm[,,2] <- rnorm(n = prod(length(sdates),25), mean = 0, sd = 1) down_1_bas <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$dat1$lat, exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, @@ -92,19 +103,9 @@ down_2_nn <- Intlr(exp = exp1, obs = obs1, exp_lats = attr(exp1, "Variables")$da #---------------------------------- # Create s2dv objects #---------------------------------- -obs1 <- s2dv_cube(obs1, lat = attr(obs1, "Variables")$dat1$lat, lon = attr(obs1, "Variables")$dat1$lon, - source_files = attr(obs, "Files")[1,1,]) -attr(obs1$lon,"first_lon") <- obs1$lon[1] -attr(obs1$lon,"last_lon") <- obs1$lon[length(obs1$lon)] -attr(obs1$lat,"first_lat") <- obs1$lat[1] -attr(obs1$lat,"last_lat") <- obs1$lat[length(obs1$lat)] - -exp1 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, - source_files = attr(exp1, "Files")[1,1,]) -attr(exp1$lon,"first_lon") <- exp1$lon[1] -attr(exp1$lon,"last_lon") <- exp1$lon[length(exp1$lon)] -attr(exp1$lat,"first_lat") <- exp1$lat[1] -attr(exp1$lat,"last_lat") <- exp1$lat[length(exp1$lat)] +obs1 <- as.s2dv_cube(obs1) +exp1 <- as.s2dv_cube(exp1) +fcst <- as.s2dv_cube(fcst) #---------------------------------- # Downscaling with CST_Intlr @@ -130,5 +131,40 @@ down_4_bas <- Intlr(exp = exp1, obs = obs_point, exp_lats = attr(exp1, "Variable int_method = 'bilinear', predictors = NULL, source_file_exp = attr(exp1, "Files")[1,1,1], source_file_obs = attr(obs1, "Files")[1,1,1], loocv = TRUE, ncores = 1) +#------------------------------------ +# Downscaling forecasts using hindcasts to train +#------------------------------------ +ind_rdm <- array(NA, dim = c('sdate' = length(sdates), member = 25, 'vars' = 2)) +ind_rdm[,,1] <- rnorm(n = prod(length(sdates),25), mean = 0, sd = 1) +ind_rdm[,,2] <- rnorm(n = prod(length(sdates),25), mean = 0, sd = 1) +ind_rdm_fcst <- array(NA, dim = c('sdate' = length(sdates_fcst), member = 51, 'vars' = 2)) +ind_rdm_fcst[,,1] <- rnorm(n = prod(length(sdates_fcst),51), mean = 0, sd = 1) +ind_rdm_fcst[,,2] <- rnorm(n = prod(length(sdates_fcst),51), mean = 0, sd = 1) +down_5_bas <- Intlr(exp = exp1, obs = obs1, exp_cor = fcst, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "basic", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) +down_5_lsc <- Intlr(exp = exp1, obs = obs1, exp_cor = ind_rdm_fcst, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "large-scale", + int_method = "bilinear", predictors = ind_rdm, loocv = TRUE, ncores = 4) +down_5_nn <- Intlr(exp = exp1, obs = obs1, exp_cor = fcst, exp_lats = attr(exp1, "Variables")$dat1$lat, + exp_lons = attr(exp1, "Variables")$dat1$lon, obs_lats = attr(obs1, "Variables")$dat1$lat, + obs_lons = attr(obs1, "Variables")$dat1$lon, target_grid = target_grid, lr_method = "4nn", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +#------------------------------------ +# Downscaling forecasts using hindcasts to train with CST_Intlr +#------------------------------------ +down_6_bas <- CST_Intlr(exp = exp1, obs = obs1, exp_cor = fcst, target_grid = target_grid, lr_method = "basic", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + +down_6_lsc <- CST_Intlr(exp = exp1, obs = obs1, exp_cor = ind_rdm_fcst, target_grid = target_grid, lr_method = "large-scale", + int_method = "bilinear", predictors = ind_rdm, loocv = TRUE, ncores = 4) + +down_6_nn <- CST_Intlr(exp = exp1, obs = obs1, exp_cor = fcst, target_grid = target_grid, lr_method = "4nn", + int_method = "bilinear", predictors = NULL, loocv = TRUE, ncores = 4) + + diff --git a/inst/examples/interpolation.R b/inst/examples/interpolation.R index 0784398b40c77e205f4bcf1714743fcaab3ef455..176192988e2491536add758baecf718176a04e79 100644 --- a/inst/examples/interpolation.R +++ b/inst/examples/interpolation.R @@ -34,13 +34,8 @@ exp1 <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_ member = c('member','ensemble')), return_vars = list(lat = 'dat', lon = 'dat'), num_procs = 1, retrieve = TRUE) -exp2 <- s2dv_cube(exp1, lat = attr(exp1, "Variables")$dat1$lat, lon = attr(exp1, "Variables")$dat1$lon, - source_files = attr(exp1, "Files")[1,1,]) -#attr(exp2$lon,"first_lon") <- exp2$attrs$lon[1] -#attr(exp2$lon,"last_lon") <- exp2$attrs$lon[length(exp2$attrs$lon)] -#attr(exp2$lat,"first_lat") <- exp2$attrs$lat[1] -#attr(exp2$lat,"last_lat") <- exp2$attrs$lat[length(exp2$attrs$lat)] +exp2 <- as.s2dv_cube(exp1) #-------------------------------- # Downscaling with Interpolation diff --git a/inst/examples/logistic-reg.R b/inst/examples/logistic-reg.R index f2e4ba0e06fdc1a6eb5a00e377057b852915b6cc..6af00412a7f264cdd681e2452bf48c5649fb2762 100644 --- a/inst/examples/logistic-reg.R +++ b/inst/examples/logistic-reg.R @@ -23,6 +23,7 @@ latmax <- 44.1 #sdates <- c('20000201','20010201','20020201','20030201','20040201','20050201','20060201','20070201') sdates <- format(ymd("20000701") + rep(years(0:20), each=1),"%Y%m%d") +sdates_fcst <- c('20230201', '20230401') obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$sdate$.nc', var = 'tas', time = indices(1), lat = values(list(latmin, latmax)), @@ -33,7 +34,7 @@ obs <- startR::Start(dat = '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r num_procs = 1, retrieve = TRUE) exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', - var = 'tas', time = indices(1), member = indices(1:3), sdate = sdates, + var = 'tas', time = indices(1), member = 'all', sdate = sdates, lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), @@ -41,6 +42,15 @@ exp <- startR::Start(dat = '/esarchive/exp/ecmwf/system5c3s/monthly_mean/$var$_f return_vars = list(lat= 'dat', lon = 'dat'), num_procs = 1, retrieve = TRUE) +fcst <- startR::Start(dat = '/esarchive/exp/ecmwf/system51c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc', + var = 'tas', time = indices(1), member = 'all', sdate = sdates_fcst, + lat = values(list(latmin, latmax)), lat_reorder = Sort(decreasing = FALSE), + lon = values(list(lonmin, lonmax)), lon_reorder = CircularSort(-180, 180), + synonims = list(var = c('var','variable'), lon = c('lon', 'longitude'), + lat = c('lat', 'latitude'), member = c('member','ensemble')), + return_vars = list(lat = 'dat', lon = 'dat'), + num_procs = 1, retrieve = TRUE) + # downscaling to fields w LogisticReg() res <- LogisticReg(exp = exp, obs = obs, @@ -55,10 +65,8 @@ res <- LogisticReg(exp = exp, return_most_likely_cat = TRUE) # downscaling to fields w CST_LogisticReg() -obs <- s2dv_cube(obs, lat = attr(obs, "Variables")$dat1$lat, lon = attr(obs, "Variables")$dat1$lon, - source_files = attr(obs, "Files")[1,1,]) -exp <- s2dv_cube(exp, lat = attr(exp, "Variables")$dat1$lat, lon = attr(exp, "Variables")$dat1$lon, - source_files = attr(exp, "Files")[1,1,]) +obs <- as.s2dv_cube(obs) +exp <- as.s2dv_cube(exp) res_cst1 <- CST_LogisticReg(exp = exp, obs = obs, @@ -87,8 +95,9 @@ res_cst3 <- CST_LogisticReg(exp = exp, return_most_likely_cat = TRUE, loocv = TRUE) - +#---------------------------------- # Plot Most Likely Tercile +#---------------------------------- colors <- c("#33BFD1", "#b5b5b5", "#FF764D") PlotEquiMap(res_cst1$data[21,1,1,1,1:29,2:58], lat = res_cst1$lat[1:29], lon = res_cst1$lon[2:58], filled.continents = F, brks = c(0,1,2,3), cols = colors, @@ -108,8 +117,9 @@ PlotEquiMap(exp_anom[1,1,1,,,21], lat = attr(exp, "Variables")$dat1$lat, lon = a filled.continents = F, fileout = "/esarchive/scratch/jramon/downscaling/plots/exp_tas_202007.png", width = 7, height = 5) - +#---------------------------------- # downscaling to point locations +#---------------------------------- points <- list(lat = c(36, 36.1), lon = c(0.6, 1)) source_file <- '/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20000201.nc' method_point_interp <- 'bilinear' @@ -118,3 +128,21 @@ res <- LogisticReg(exp = exp, obs = obs, exp_lats = attr(exp, "Variables")$dat1$ obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = "bil", points = points, source_file = source_file, method_point_interp = method_point_interp) + +#---------------------------------- +# downscaling training with hindcasts and providing forecasts +#---------------------------------- +res_2_em <- LogisticReg(exp = exp, obs = obs, exp_cor = fcst, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = "bilinear", + log_reg_method = "ens_mean", return_most_likely_cat = TRUE, loocv = TRUE, probs_cat = 1:2/3, + ncores = 1) + +res_2_esd <- LogisticReg(exp = exp, obs = obs, exp_cor = fcst, exp_lats = attr(exp, "Variables")$dat1$lat, + exp_lons = attr(exp, "Variables")$dat1$lon, obs_lats = attr(obs, "Variables")$dat1$lat, + obs_lons = attr(obs, "Variables")$dat1$lon, target_grid = target_grid, int_method = "bilinear", + log_reg_method = "ens_mean_sd", return_most_likely_cat = TRUE, loocv = TRUE, probs_cat = 1:2/3, + ncores = 1) + + +